# CST Renamer Walkthrough


This python notebook file breaks down the functionality of the CST Renamer in our [Github](https://github.com/BryanFrederickson/CYBER1-CodeVersion/blob/main/CSTRenamer/CSTRename.py), going through the general flow and individual sections of generating semantic clones.

## Prerequisites
The below code blocks (library installation, imports, and file upload) are all required for any later code blocks to be run.

In [None]:
# install libCST
!pip install libcst

In [None]:
import ast as ast # for testing validity of Python file
import libcst as cst # LibCST library for editing source code
from libcst.metadata import MetadataWrapper, ParentNodeProvider # for in-depth cst node parsing
import random # used for randomly choosing placeholder names
import re # for regular expressions
import google.generativeai as genai # for generating synonyms

In [None]:
# note that only one python file input is assumed, otherwise it only saves the data of the last file
from google.colab import files
try:
    uploaded = files.upload()
except ImportError as e:
    pass


filenames = uploaded.keys()

for file in filenames:
    filename=file
    data = uploaded[file]

try:
  ast.parse(data)
  print(f"{filename} is syntactically valid.")
  original_cst=cst.parse_module(data)
except SyntaxError as error:
  print(f"Syntax error in {filename}: {error}")
  print("Please run this block again and upload a syntatically correct file.")

## 1. Define Parameters
Define the parameters of the clone generation. Note that for the notebook implementation, the parameter for where to save the output has been removed. Instead, after all generation is completed, in Google Colab look in the files section on the left to view or download outputs.

In [None]:
# Get N-Param
while True:
    try:
        n_param = int(input("Number of clones to generate: "))
        if n_param > 0:
          break
        else:
          print("Number of clones must be positive!")
    except ValueError:
        print("Not an integer!")


# Get Renaming Probabilities
print("Define the probabilities of either a variable or function getting renamed. Must be a float between 0 and 1 inclusive, such as 0.5 for 50%.")
# get variable renaming probability
while True:
    try:
        VAR_RENAME_PROBABILITY = float(input("Variable Rename Probability: "))
        if 0.0 <= VAR_RENAME_PROBABILITY <= 1.0:
            break
        else:
            print("Probability must be between 0 and 1!")
    except ValueError:
        print("Not a number!")

# get function renaming probability
while True:
    try:
        FUNC_RENAME_PROBABILITY = float(input("Function Rename Probability: "))
        if 0.0 <= FUNC_RENAME_PROBABILITY <= 1.0:
            break
        else:
            print("Probability must be between 0 and 1!")
    except ValueError:
        print("Not a number!")


## 2. Define Renaming Functionality
Below are the classes used to rename variables and functions.

#### Variable Renamer

In [None]:
# used to check how many api rename calls failed, so that after a specific amount of fails,
# stop notifying user and clogging terminal
call_fails=0
api_fail_notif_cap=5

# animal wordbank list from: https://gist.github.com/CheeseCake87/c1d222c387ff1342cf3b910456f4865a
animals = list()

## Utilizing Google Gemini 1.5-falsh model ##
model = genai.GenerativeModel("gemini-1.5-flash")

## API key necessary for authentication and prompt generation ##
genai.configure(api_key="AIzaSyCRhR2AhI9DIkbFsLdg8p30jyxOYczkPw8")

## Dictionary to keep track of original variable names and their changed values ##
existing_vars = dict()

## Creation of subclass derived from CSTTransformer which allows modified traversal attributes ##
class VarRename(cst.CSTTransformer):

    ## Allows access to parent node metadata ##
    METADATA_DEPENDENCIES = (ParentNodeProvider,)

    ############ Function to generate a new synonym for the existing variable using gemini API ############

    def get_synonym(self, original_varname):

        ## Only rename the variable if Gemini has not come up with a synonym for it. Otherwise return the current synonym ##
        if original_varname not in existing_vars:
            if random.random() < VAR_RENAME_PROBABILITY:

                try:

                    ## Creation of prompt for provided original variable name ##
                    synonym = model.generate_content(f"Provide a one-word synonym for '{original_varname}' in a coding context. Also make it lower case or camel-case. Make it unique, different, and distinct. [{random.randint(0, 10000)}. Please answer with only the wrod and nothing more.]")

                    ## Parse the API return value to extract the reponse as text ##
                    synonym = synonym.text
                    synonym = re.sub(r"[^a-zA-Z0-9_]", "", synonym)

                    ## Add key-value pair to the dictionary if it does not exist already ##
                    existing_vars[original_varname] = synonym

                except Exception as e:
                    global call_fails
                    if call_fails < api_fail_notif_cap:
                        print(f"Error fetching synonym for '{original_varname}', using animal name placeholder.")
                        call_fails=call_fails+1
                        if call_fails == api_fail_notif_cap:
                            print(f"Cap of '{api_fail_notif_cap}' error(s) with API synonym fetching, will no longer print errors.")
                    new_name=random.choice(animals)
                    animals.remove(new_name)
                    animals.append(str(new_name+str(1)))
                    existing_vars[original_varname] = new_name
            else:
                # keep unchanged, add to dict so that it doesn't run this every time an unchanged var is hit
                existing_vars[original_varname] = original_varname
        return existing_vars[original_varname]
    #######################################################################################################



    ######################### Function for transforming the 'Param' type Name nodes #######################

    # Context for function: Variable is present within a functiondef or call. ( Ex: function(variable1, variable2) ).

    def leave_Param(self, original_node: cst.Param, updated_node: cst.Param) -> cst.Param:

        if isinstance(updated_node.name, cst.Name):

            new_varname = self.get_synonym(updated_node.name.value)
            updated_node = updated_node.with_changes(name=updated_node.name.with_changes(value=new_varname))

        return updated_node
    #######################################################################################################



     ######################### Function for transforming the 'For' type Name nodes #########################

    # Context for function: Variable is present within a For statement as a target or iter value. #
    # (Ex: for variable in variable2:)

    def leave_For(self, original_node: cst.For, updated_node: cst.For) -> cst.For:

        if isinstance(updated_node.target, cst.Name):

            new_varname = self.get_synonym(updated_node.target.value)
            updated_node = updated_node.with_changes(target=updated_node.target.with_changes(value=new_varname))

        elif isinstance(updated_node.target, cst.Tuple):

              updated_tuple = updated_node.target

              for i, element in enumerate(updated_tuple.elements):

                  if isinstance(element.value, cst.Name):

                      new_varname = self.get_synonym(element.value.value)
                      new_element = element.with_changes(value=element.value.with_changes(value=new_varname))
                      updated_tuple = updated_tuple.with_changes(elements=tuple(updated_tuple.elements[:i]) + (new_element,) + tuple(updated_tuple.elements[i+1:]))

              updated_node = updated_node.with_changes(target=updated_tuple)

        if isinstance(updated_node.iter, cst.Name):

            new_varname = self.get_synonym(updated_node.iter.value)
            updated_node = updated_node.with_changes(iter=updated_node.iter.with_changes(value=new_varname))

        return updated_node
    #######################################################################################################



    ######################### Function for transforming the 'AssignTarget' type Name nodes ################

    # Context for function: Left target value of an assignment operator. (Ex: variable = 5).

    def leave_AssignTarget(self, original_node: cst.AssignTarget, updated_node: cst.AssignTarget) -> cst.AssignTarget:

        if isinstance(updated_node.target, cst.Name):

            new_varname = self.get_synonym(updated_node.target.value)
            updated_node = updated_node.with_changes(target=updated_node.target.with_changes(value=new_varname))


        return updated_node
    #######################################################################################################



    ######################### Function for transforming the 'Attribute' type Name nodes ###################

    # Context for function:

    def leave_Attribute(self, original_node: cst.AssignTarget, updated_node: cst.AssignTarget) -> cst.AssignTarget:

        if (isinstance(updated_node.value, cst.Name) and updated_node.value.value in existing_vars):

            new_varname = self.get_synonym(updated_node.value.value)
            return updated_node.with_changes(value=updated_node.value.with_changes(value=new_varname))


        if (isinstance(updated_node.attr, cst.Name) and updated_node.attr.value in existing_vars):
            new_varname = self.get_synonym(updated_node.attr.value)
            return updated_node.with_changes(attr=updated_node.attr.with_changes(value=new_varname))

        return updated_node
    #######################################################################################################



    ######################### Function for transforming the 'Arg' type Name nodes #########################

    # Context for function:

    def leave_Arg(self, original_node: cst.Arg, updated_node: cst.Arg) -> cst.Arg:

        if isinstance(updated_node.value, cst.Name):

            new_varname = self.get_synonym(updated_node.value.value)
            return updated_node.with_changes(value=updated_node.value.with_changes(value=new_varname))

        return updated_node
    #######################################################################################################


    '''
    ######################### Function for transforming the 'ImportAlias' type Name nodes #################

    # Context for function:

    def leave_ImportAlias(self, original_node: cst.ImportAlias, updated_node: cst.ImportAlias) -> cst.ImportAlias:
        alias_node = updated_node.asname
        if alias_node and alias_node.name.value:
            new_alias = self.get_synonym(alias_node.name.value)
            return updated_node.with_deep_changes(alias_node.name, value=new_alias)
        return updated_node
    #######################################################################################################
    '''

    ######################### Function for transforming the 'BinaryOperation' type Name nodes #############

    # Context for function:

    def leave_BinaryOperation(self, original_node: cst.BinaryOperation, updated_node: cst.BinaryOperation) -> cst.BinaryOperation:

        if isinstance(updated_node.left, cst.Name):

            new_left_varname = self.get_synonym(updated_node.left.value)
            updated_node = updated_node.with_changes(left=updated_node.left.with_changes(value=new_left_varname))

        if isinstance(updated_node.right, cst.Name):

            new_right_varname = self.get_synonym(updated_node.right.value)
            updated_node = updated_node.with_changes(right=updated_node.right.with_changes(value=new_right_varname))

        return updated_node
    #######################################################################################################


    '''
    ######################### Function for transforming the 'AsName' type Name nodes ######################

    # Context for function:

    def leave_AsName(self, original_node: cst.AsName, updated_node: cst.AsName) -> cst.AsName:

        if isinstance(updated_node.name, cst.Name):

            new_varname = self.get_synonym(updated_node.name.value)
            updated_node = updated_node.with_changes(name=updated_node.name.with_changes(value=new_varname))

        return updated_node
    #######################################################################################################
    '''


    ######################### Function for transforming the 'Comparison' type Name nodes ##################

    # Context for function:

    def leave_Comparison(self, original_node: cst.Comparison, updated_node: cst.Comparison) -> cst.Comparison:

        if isinstance(updated_node.left, cst.Name):

            new_left_varname = self.get_synonym(updated_node.left.value)
            updated_node = updated_node.with_changes(left=updated_node.left.with_changes(value=new_left_varname))

        return updated_node
    #######################################################################################################



    ######################### Function for transforming the 'Return' type Name nodes #######################

    # Context for function:

    def leave_Return(self, original_node: cst.Return, updated_node: cst.Return) -> cst.Return:

        if isinstance(updated_node.value, cst.Name):

            new_varname = self.get_synonym(updated_node.value.value)
            updated_node = updated_node.with_changes(value=updated_node.value.with_changes(value=new_varname))

        return updated_node
    #######################################################################################################



    def leave_FormattedString(self, original_node: cst.FormattedString, updated_node: cst.FormattedString) -> cst.FormattedString:

        new_parts = []

        for part in updated_node.parts:

            if isinstance(part, cst.FormattedStringExpression):

                if isinstance(part.expression, cst.Name):

                    new_varname = self.get_synonym(part.expression.value)
                    new_part = part.with_changes(expression=part.expression.with_changes(value=new_varname))
                    new_parts.append(new_part)

                else:

                    new_parts.append(part)
            else:

                new_parts.append(part)

        return updated_node.with_changes(parts=new_parts)
      #######################################################################################################


#### Function Renamer

In [None]:
# placeholder list to replace function names with
colors = list()

#dict of existing pairs, will be used to replace all calls of old function with calls to new name
# assuming that multiple funcs don't have the same new name
func_name_pairs = dict()

# rename all function defs or aliases (only rename custom functions, not things like print() or math.log)
class FuncRename(cst.CSTTransformer):

  # rename function names in a "def funcname: " node
  # FunctionDef node docs: (https://libcst.readthedocs.io/_/downloads/en/latest/pdf/#page=73&zoom=auto,-205,215)
  def leave_FunctionDef(self, node: cst.FunctionDef, updated_node: cst.FunctionDef) -> cst.FunctionDef:
    if updated_node.name.value not in func_name_pairs:
        if random.random() < FUNC_RENAME_PROBABILITY:
            new_name=random.choice(colors)
            colors.remove(new_name)
            colors.append(str(new_name+str(1)))
            func_name_pairs.update({updated_node.name.value: new_name})
        else:
            func_name_pairs.update({updated_node.name.value: updated_node.name.value})
    # the name node in function def is a child node, thus to change function name via the FunctionDef parent node, use with_deep_changes via:
    # (https://libcst.readthedocs.io/en/latest/nodes.html#libcst.CSTNode.with_deep_changes)

    #print("Function def of \'"+updated_node.name.value+"\' has been renamed to \'"+func_name_pairs[updated_node.name.value]+"\'")
    return updated_node.with_deep_changes(updated_node.name, value=func_name_pairs[updated_node.name.value])

  # rename function names in a "import x as y" node
  # ImportAlias node docs: (https://libcst.readthedocs.io/_/downloads/en/latest/pdf/#page=78&zoom=auto,-205,314)
  def leave_ImportAlias(self, node: cst.ImportAlias, updated_node: cst.ImportAlias) -> cst.ImportAlias:
    alias_node=updated_node.asname
    if alias_node:
      if alias_node.name.value not in func_name_pairs:
        if random.random() < FUNC_RENAME_PROBABILITY:
            new_name=random.choice(colors)
            colors.remove(new_name)
            colors.append(str(new_name+str(1)))
            func_name_pairs.update({alias_node.name.value: new_name})
        else:
            func_name_pairs.update({alias_node.name.value: alias_node.name.value})
      #print("Import alias of \'"+alias_node.name.value+"\' has been renamed to \'"+func_name_pairs[alias_node.name.value]+"\'")
      return updated_node.with_deep_changes(updated_node.asname.name, value=func_name_pairs[alias_node.name.value])
    return updated_node


# kept separate from FuncRename to do two-pass and prevent renaming predefined functions like print()
class CallRename(cst.CSTTransformer):

  # rename function names in a function call node
  # Call node docs: (https://libcst.readthedocs.io/_/downloads/en/latest/pdf/#page=53&zoom=auto,-205,721)
  def leave_Call(self, node: cst.Call, updated_node: cst.Call) -> cst.Call:

    # Name node: (https://libcst.readthedocs.io/_/downloads/en/latest/pdf/#page=48&zoom=auto,-205,344)
    if (type(updated_node.func)) is cst._nodes.expression.Name:
      if (updated_node.func.value) in func_name_pairs:

        #print("Function call of \'"+updated_node.func.value+"\' has been renamed to \'"+func_name_pairs[updated_node.func.value]+"\'")
        return updated_node.with_deep_changes(updated_node.func, value=func_name_pairs[updated_node.func.value])

    # for attributes, aka package.function(), only the package can be potentially custom (e.g. 'import math as blah', 'blah.log()')
    # as if you import a subsect and rename it, it will be a normal function call (e.g. 'from math import log as blah', funct call would be
    #                                                                                   'blah()' not 'math.blah()'
    # Attribute node: (https://libcst.readthedocs.io/_/downloads/en/latest/pdf/#page=48&zoom=auto,-205,344)
    elif (type(updated_node.func)) is cst._nodes.expression.Attribute:
      if updated_node.func.value.value in func_name_pairs:

        #print("Function call of \'"+updated_node.func.value.value+"."+updated_node.func.attr.value+"\' has been renamed to \'"+func_name_pairs[updated_node.func.value.value]+"."+updated_node.func.attr.value+"\'")
        return updated_node.with_deep_changes(updated_node.func.value, value=func_name_pairs[updated_node.func.value.value])
    return updated_node


#### Logic renaming

In [None]:

class LogicRenamer(cst.CSTTransformer): # Logic renamer subclass definition #

  METADATA_DEPENDENCIES = (ParentNodeProvider,) # Allow access to parent node attributes # #

  def LessThan_Handler(self, updated_node):

          if isinstance(updated_node.test.left, cst.Name):

                loop_var = updated_node.test.left # Assigns to var if on LHS of comp. #
                num_bound = updated_node.test.comparisons[0].comparator # Assigns to integer that terminates loop #

            # Return new CST module structure back into the original CST #
                return cst.For(
                    target=loop_var,
                    iter=cst.Call(
                        func=cst.Name("range"),
                        args=[cst.Arg(num_bound)],
                    ),
                    body=updated_node.body,
                    orelse=updated_node.orelse,
                )

          else:

                loop_var = updated_node.test.comparisons[0].comparator # Assigns to var if on RHS of comp. #
                num_bound = updated_node.test.left  # Assigns to integer that terminates loop #

            # Return new CST module structure back into the original CST #
                return cst.For(
                    target=loop_var,
                    iter=cst.Call(
                        func=cst.Name("range"),
                        args=[cst.Arg(num_bound)],
                    ),
                    body=updated_node.body,
                    orelse=updated_node.orelse,
                )

  def GreaterThan_Handler(self, updated_node):

          if isinstance(updated_node.test.left, cst.Name):

                loop_var = updated_node.test.left # Assigns to var if on LHS of comp. #
                num_bound = updated_node.test.comparisons[0].comparator # Assigns to integer that terminates loop #

            # Return new CST module structure back into the original CST #
                return cst.For(
                    target=loop_var,
                    iter=cst.Call(
                        func=cst.Name("range"),
                        args=[cst.Arg(num_bound)],
                    ),
                    body=updated_node.body,
                    orelse=updated_node.orelse,
                )

          else:

                loop_var = updated_node.test.comparisons[0].comparator # Assigns to var if on RHS of comp. #
                num_bound = updated_node.test.left  # Assigns to integer that terminates loop #

            # Return new CST module structure back into the original CST #
                return cst.For(
                    target=loop_var,
                    iter=cst.Call(
                        func=cst.Name("range"),
                        args=[cst.Arg(num_bound)],
                    ),
                    body=updated_node.body,
                    orelse=updated_node.orelse,
                )






  def leave_While(self, original_node: cst.While, updated_node: cst.While) -> cst.For: # Handles logic swapping for While -> For looping #

    if isinstance(updated_node.test, cst.Comparison): # If 'while' loop contains any type of logical comparison ( >, <, ==, >=, <=, etc.) #

      if (len(updated_node.test.comparisons) == 1 and isinstance(updated_node.test.comparisons[0].operator, cst.LessThan)): # Handles < logical comparison #
         return self.LessThan_Handler(updated_node)

      if (len(updated_node.test.comparisons) == 1 and isinstance(updated_node.test.comparisons[0].operator, cst.GreaterThan)): # Handles > logical comparison #
         return self.GreaterThan_Handler(updated_node)



    return updated_node



with open("test.py", "r") as file:
    code = file.read()

module = cst.parse_module(code)

wrapper = MetadataWrapper(module)

transformed_module = wrapper.visit(LogicRenamer())

print(transformed_module.code)


## 3. Begin Clone Generation

In [None]:
for i in range(n_param):

    # reset values
    func_name_pairs=dict()
    existing_vars=dict()
    colors = ['aliceblue','antiquewhite','aqua','aquamarine','azure','beige','bisque','black','blanchedalmond',
          'blue','blueviolet','brown','burlywood','cadetblue','chartreuse','chocolate','coral','cornflowerblue',
          'cornsilk','crimson','cyan','darkblue','darkcyan','darkgoldenrod','darkgray','darkgreen','darkkhaki',
          'darkmagenta','darkolivegreen','darkorange','darkorchid','darkred','darksalmon','darkseagreen',
          'darkslateblue','darkslategray','darkturquoise','darkviolet','deeppink','deepskyblue','dimgray',
          'dodgerblue','firebrick','floralwhite','forestgreen','fuchsia','gainsboro','ghostwhite','gold','goldenrod',
          'gray','green','greenyellow','honeydew','hotpink','indianred','indigo','ivory','khaki','lavender',
          'lavenderblush','lawngreen','lemonchiffon','lightblue','lightcoral','lightcyan','lightgoldenrodyellow',
          'lightgreen','lightgray','lightpink','lightsalmon','lightseagreen','lightskyblue','lightslategray',
          'lightsteelblue','lightyellow','lime','limegreen','linen','magenta','maroon','mediumaquamarine','mediumblue',
          'mediumorchid','mediumpurple','mediumseagreen','mediumslateblue','mediumspringgreen','mediumturquoise',
          'mediumvioletred','midnightblue','mintcream','mistyrose','moccasin','navajowhite','navy','oldlace','olive',
          'olivedrab','orange','orangered','orchid','palegoldenrod','palegreen','paleturquoise','palevioletred',
          'papayawhip','peachpuff','peru','pink','plum','powderblue','purple','red','rosybrown','royalblue','saddlebrown',
          'salmon','sandybrown','seagreen','seashell','sienna','silver','skyblue','slateblue','slategray','snow',
          'springgreen','steelblue','tan','teal','thistle','tomato','turquoise','violet','wheat','white','whitesmoke'
          ,'yellow','yellowgreen']
    animals = ['Canidae', 'Felidae', 'Cat', 'Cattle', 'Dog', 'Donkey', 'Goat', 'Horse', 'Pig', 'Rabbit',
           'Aardvark', 'Aardwolf', 'Albatross', 'Alligator', 'Alpaca', 'Amphibian', 'Anaconda',
           'Angelfish', 'Anglerfish', 'Ant', 'Anteater', 'Antelope', 'Antlion', 'Ape', 'Aphid',
           'Armadillo', 'Asp', 'Baboon', 'Badger', 'Bandicoot', 'Barnacle', 'Barracuda', 'Basilisk',
           'Bass', 'Bat', 'Bear', 'Beaver', 'Bedbug', 'Bee', 'Beetle', 'Bird', 'Bison', 'Blackbird',
           'Boa', 'Boar', 'Bobcat', 'Bobolink', 'Bonobo', 'Bovid', 'Bug', 'Butterfly', 'Buzzard',
           'Camel', 'Canid', 'Capybara', 'Cardinal', 'Caribou', 'Carp', 'Cat', 'Catshark',
           'Caterpillar', 'Catfish', 'Cattle', 'Centipede', 'Cephalopod', 'Chameleon', 'Cheetah',
           'Chickadee', 'Chicken', 'Chimpanzee', 'Chinchilla', 'Chipmunk', 'Clam', 'Clownfish',
           'Cobra', 'Cockroach', 'Cod', 'Condor', 'Constrictor', 'Coral', 'Cougar', 'Cow', 'Coyote',
           'Crab', 'Crane', 'Crawdad', 'Crayfish', 'Cricket', 'Crocodile', 'Crow', 'Cuckoo', 'Cicada',
           'Damselfly', 'Deer', 'Dingo', 'Dinosaur', 'Dog', 'Dolphin', 'Donkey', 'Dormouse', 'Dove',
           'Dragonfly', 'Dragon', 'Duck', 'Eagle', 'Earthworm', 'Earwig', 'Echidna', 'Eel', 'Egret',
           'Elephant', 'Elk', 'Emu', 'Ermine', 'Falcon', 'Ferret', 'Finch', 'Firefly', 'Fish',
           'Flamingo', 'Flea', 'Fly', 'Flyingfish', 'Fowl', 'Fox', 'Frog', 'Gamefowl', 'Galliform',
           'Gazelle', 'Gecko', 'Gerbil', 'Gibbon', 'Giraffe', 'Goat', 'Goldfish', 'Goose', 'Gopher',
           'Gorilla', 'Grasshopper', 'Grouse', 'Guan', 'Guanaco', 'Guineafowl', 'Gull', 'Guppy',
           'Haddock', 'Halibut', 'Hamster', 'Hare', 'Harrier', 'Hawk', 'Hedgehog', 'Heron', 'Herring',
           'Hippopotamus', 'Hookworm', 'Hornet', 'Horse', 'Hoverfly', 'Hummingbird', 'Hyena', 'Iguana',
           'Impala', 'Jackal', 'Jaguar', 'Jay', 'Jellyfish', 'Junglefowl', 'Kangaroo', 'Kingfisher',
           'Kite', 'Kiwi', 'Koala', 'Koi', 'Krill', 'Ladybug', 'Lamprey', 'Landfowl', 'Lark', 'Leech',
           'Lemming', 'Lemur', 'Leopard', 'Leopon', 'Limpet', 'Lion', 'Lizard', 'Llama', 'Lobster',
           'Locust', 'Loon', 'Louse', 'Lungfish', 'Lynx', 'Macaw', 'Mackerel', 'Magpie', 'Mammal',
           'Manatee', 'Mandrill', 'Marlin', 'Marmoset', 'Marmot', 'Marsupial', 'Marten', 'Mastodon',
           'Meadowlark', 'Meerkat', 'Mink', 'Minnow', 'Mite', 'Mockingbird', 'Mole', 'Mollusk',
           'Mongoose', 'Monkey', 'Moose', 'Mosquito', 'Moth', 'Mouse', 'Mule', 'Muskox', 'Narwhal',
           'Newt', 'Nightingale', 'Ocelot', 'Octopus', 'Opossum', 'Orangutan', 'Orca', 'Ostrich',
           'Otter', 'Owl', 'Ox', 'Panda', 'Panther', 'Parakeet', 'Parrot', 'Parrotfish', 'Partridge',
           'Peacock', 'Peafowl', 'Pelican', 'Penguin', 'Perch', 'Pheasant', 'Pig', 'Pigeon', 'Pike',
           'Pinniped', 'Piranha', 'Planarian', 'Platypus', 'Pony', 'Porcupine', 'Porpoise', 'Possum',
           'Prawn', 'Primate', 'Ptarmigan', 'Puffin', 'Puma', 'Python', 'Quail', 'Quelea', 'Quokka',
           'Rabbit', 'Raccoon', 'Rat', 'Rattlesnake', 'Raven', 'Reindeer', 'Reptile', 'Rhinoceros',
           'Roadrunner', 'Rodent', 'Rook', 'Rooster', 'Roundworm', 'Sailfish', 'Salamander', 'Salmon',
           'Sawfish', 'Scallop', 'Scorpion', 'Seahorse', 'Shark', 'Sheep', 'Shrew', 'Shrimp',
           'Silkworm', 'Silverfish', 'Skink', 'Skunk', 'Sloth', 'Slug', 'Smelt', 'Snail', 'Snake',
           'Snipe', 'Sole', 'Sparrow', 'Spider', 'Spoonbill', 'Squid', 'Squirrel', 'Starfish',
           'Stingray', 'Stoat', 'Stork', 'Sturgeon', 'Swallow', 'Swan', 'Swift', 'Swordfish',
           'Swordtail', 'Tahr', 'Takin', 'Tapir', 'Tarantula', 'Tarsier', 'Termite', 'Tern', 'Thrush',
           'Tick', 'Tiger', 'Tiglon', 'Toad', 'Tortoise', 'Toucan', 'Trout', 'Tuna', 'Turkey',
           'Turtle', 'Tyrannosaurus', 'Urial', 'Vicuna', 'Viper', 'Vole', 'Vulture', 'Wallaby',
           'Walrus', 'Wasp', 'Warbler', 'Weasel', 'Whale', 'Whippet', 'Whitefish', 'Wildcat',
           'Wildebeest', 'Wildfowl', 'Wolf', 'Wolverine', 'Wombat', 'Woodpecker', 'Worm', 'Wren',
           'Xerinae', 'Yak', 'Zebra', 'Alpaca', 'Cat', 'Cattle', 'Chicken', 'Dog', 'Donkey', 'Ferret',
           'Gayal', 'Goldfish', 'Guppy', 'Horse', 'Koi', 'Llama', 'Sheep', 'Yak']

    ## Provides extra data for every node in the tree ##
    wrapped_module = MetadataWrapper(original_cst)
    ## Traverse tree with transformer subclass ##
    vars_renamed = wrapped_module.visit(VarRename())
    print("Variables renamed...")
    funcs_renamed=vars_renamed.visit(FuncRename())
    print("Functions renamed...")
    func_calls_renamed=funcs_renamed.visit(CallRename())
    print("Function calls renamed...")
    modified_code=func_calls_renamed.code
    ## Write the newly modified code to a new file
    output_filename=str(f"{filename}_RENAMED_{i+1}.py")
    with open(output_filename, "w") as output_file:
        output_file.write(modified_code)
    output_file.close()

    print(f"'{output_filename}' generated...")
    print("\n")
print(f"Finished! '{n_param}' semantic clones generated.")

## 4. View Output
In Google Colab, look to the left and select the 'Files' directory. Inside, you will see your input file as well as the renamed output files in the naming format of '[original name]\_RENAMED\_[nth generated clone].py' e.g. input_\RENAMED\_2.py for the second output of the file 'input.py'. You can view the output in Google Colab by double clicking or download by right clicking. Note that when a Google Colab runtime is disconnected, these files are lost unless downloaded.



# CFG Experimentation

For now, this section is reserved as a testing environment to see how the creation of CFG works with new libraries and how we can visualize the new graphs.

## Install & Import Necessary Python Libraries

In [None]:
!pip3 install py2cfg
!pip3 install networkx
!pip3 install pydot

from py2cfg import CFGBuilder
import networkx as nx
import pydot

## Build CFG

Converting Python source code file to a Control Flow Graph. Specify a source file to target, and output a .plain file as a text representation and a .pdf as a pictorial representation of CFG.

In [None]:
# Build cfg using py2cfg library #
cfg = CFGBuilder().build_from_file('example', './sample.py')

# Convert cfg to code/text representation of cfg #
text_rep = cfg.build_visual('sample_CFG (Text)', 'plain')

# Convert cfg to pictorial representation of cfg #
cfg.build_visual('sample_CFG (Visual)', 'pdf')



## Insert CFG into networkx

Translate the CFG to a networkx Directed Graph so it can be manipulated and traversed.

In [None]:
cfg_graph = nx.DiGraph()

# Open newly created .plain file and read data as raw string #
with open('sample_CFG (Text).plain', 'r') as cfg_file:
  cfg_raw = cfg_file.read()
  cfg_file.close()

# Parse CFG data into seperate lines #
  lines = cfg_raw.strip().split("\n")

# For each line, extract the Node ID, label for node creation in Directed Graph #
for line in lines:

    parts = line.split()

    if line.startswith("node"):
        node_id = parts[1]
        label = parts[6].strip('"')
        cfg_graph.add_node(node_id, label=label)

# Extract source and destination node connections, as well as label for edges in Directed Graph #
    elif line.startswith("edge"):
        source = parts[1]
        target = parts[2]
        label = parts[13]
        cfg_graph.add_edge(source, target, label=label)

# Print nodes and edges in Directed Graph #
print("Nodes:")
for node, data in cfg_graph.nodes(data=True):
    print(node, data)

print("\nEdges:")
for source, target, data in cfg_graph.edges(data=True):
    print(source, "->", target, data)


# Synonyms and Translation Tests

In [None]:
!python3 -m pip install --upgrade PyMultiDictionary
!pip install langdetect

In [None]:
from langdetect import detect
print(detect("abc"))
print(detect("Dies ist ein Testsatz."))

In [None]:
from PyMultiDictionary import MultiDictionary
dictionary = MultiDictionary()

translated_list=(dictionary.translate('en', 'processed'))
print(translated_list)

In [None]:
!pip install argostranslate

In [None]:
import argostranslate.package
import argostranslate.translate

argostranslate.package.update_package_index()
available_packages = argostranslate.package.get_available_packages()

en_to=[]
to_en=[]
# pt <-> es
for package in available_packages:
  if package.from_code!="en" and package.to_code!="en":
    print(str(package.from_code)+" to "+str(package.to_code))
  else:
    to_en.append(package.from_code)
    en_to.append(package.to_code)

'''
new_en_to = [x for x in en_to if x != "en"]
new_to_en = [x for x in to_en if x != "en"]
print(new_en_to)
for lang in new_en_to:
  if lang not in new_to_en:
    print(lang)
'''


# Variable/Function Parser

Parses into individual terms based on common naming conventions (see comment under split_word function def)

In [None]:
# splits given var/function name into individual words based on naming conventions
def split_word(input: str):
  # Supported Styles: camelCase, PascalCase, snake_case, SCREAMING_SNAKE_CASE, camel_Snake_Case,
  #                   Pascal_Snake_Case, kebab-case, COBOL-CASE, Train-Case
  # Unsupported Styles (would need a more complex parser e.g. python-wordsegment which only supports english): flatcase, UPPERCASE

  # if not a single letter
  if not len(input)==1:
    # code based on: https://www.geeksforgeeks.org/python-split-camelcase-string-to-individual-strings/ (method 5)
    # first parse for '_' or '-'
    modified_string = list(map(lambda x: '_' if x=='-' else x, input))
    split_string = ''.join(modified_string).split('_')
    sep_words = list(filter(lambda x: x != '', split_string))
    # next check split words for any camelCase or PascalCase, avoiding UPPERCASE and single letters
    # number based strings e.g. 13vals also supported
    i=0
    # while loop as size of list increases as words broken down
    while i < len(sep_words):
      # if current word not one char and is not all upper or lowercase, split based on capital letters
      if len(sep_words[i])!=1 and not (sep_words[i].isupper() or sep_words[i].islower()):
        modified_string = list(map(lambda x: '_'+x if x.isupper() else x, sep_words[i]))
        split_string = ''.join(modified_string).split('_')
        sep_subwords = list(filter(lambda x: x != '', split_string))
        # remove unseparated word from larger list
        sep_words.pop(i)
        for k in range(len(sep_subwords)):
          sep_words.insert(i+k,sep_subwords[k])
      i+=1
    return sep_words
  else:
    return [input]

Define common abbreviation pairs in variable names for later translation

In [None]:
# abbreviations list created based on common code abbreviations: https://github.com/abbrcode/abbreviations-in-code
abbreviations={'abbr': 'abbreviation', 'abs': 'absolute', 'acro': 'acronym', 'act': 'actual',
 'sum': 'addition', 'addr': 'address', 'algo': 'algorithm', 'alloc': 'allocation',
 'alt': 'alternative', 'anno': 'annotation', 'app': 'application',
 'acosec': 'arccosecant', 'acos': 'arccosine', 'acot': 'arccotangent',
 'actg': 'arccotangent', 'asec': 'arcsecant', 'asin': 'arcsine', 'atan': 'arctangent',
 'arg': 'argument', 'arr': 'array', 'async': 'asynchronous', 'attr': 'attribute',
 'auth': 'authentication', 'aux': 'auxiliary', 'avg': 'average', 'bg': 'background',
 'bat': 'battery', 'bin': 'binary', 'bool': 'boolean', 'brk': 'break', 'bl': 'breakline',
 'buf': 'buffer', 'buff': 'buffer', 'btn': 'button', 'calc': 'calculator',
 'cb': 'callback', 'cert': 'certificate', 'ch': 'channel', 'char': 'character',
 'chk': 'check', 'csum': 'checksum', 'circ': 'circle', 'cls': 'class', 'clr': 'clear',
 'coll': 'collection', 'col': 'column', 'cmd': 'command', 'com': 'communication',
 'comm': 'common', 'cmp': 'comparison', 'comp': 'component', 'concat': 'concatenation',
 'cond': 'condition', 'cfg': 'configuration', 'conf': 'configuration',
 'config': 'configuration', 'con': 'connection', 'conn': 'connection', 'const': 'constant',
 'cntr': 'container', 'ctx': 'context', 'cont': 'continue', 'ctrl': 'control',
 'conv': 'conversation', 'coord': 'coordinate', 'cpy': 'copy', 'cosec': 'cosecant',
 'cos': 'cosine', 'cot': 'cotangent', 'ctg': 'cotangent', 'cnt': 'count', 'cur': 'current',
 'curr': 'current', 'db': 'database', 'dbg': 'debug', 'dec': 'decrease',
 'decl': 'declaration', 'def': 'definition', 'deg': 'degrees', 'del': 'deletion',
 'dt': 'delta time', 'dep': 'dependency', 'desc': 'description', 'dest': 'destination',
 'dev': 'device', 'diff': 'difference', 'dim': 'dimension', 'dir': 'directory',
 'dis': 'disable', 'disp': 'display', 'div': 'division', 'doc': 'document',
 'docs': 'documentation', 'drv': 'driver', 'dyn': 'dynamic', 'elm': 'element',
 'en': 'enable', 'env': 'environment', 'eq': 'equal', 'err': 'error', 'evt': 'event',
 'exe': 'execution', 'exp': 'exponential', 'expr': 'expression', 'ext': 'extension',
 'fct': 'facet', 'fac': 'factory', 'fig': 'figure', 'fc': 'file chooser',
 'fd': 'file descriptor', 'fp': 'function pointer', 'fr': 'file reader',
 'fs': 'file system', 'fw': 'file writer', 'e.g.': 'for example', 'fmt': 'format',
 'frac': 'fraction', 'freq': 'frequence', 'fn': 'function', 'func': 'function',
 'gen': 'generation', 'geom': 'geometry', 'ge': 'greater or equal',
 'gt': 'greater than', 'hw': 'hardware', 'hdr': 'header', 'hex': 'hexadecimal',
 'hor': 'horizontal', 'id': 'identifier', 'img': 'image', 'impl': 'implementation',
 'imp': 'import', 'inc': 'increase', 'idx': 'index', 'info': 'information',
 'init': 'initialization', 'in': 'input', 'ins': 'insertion', 'inst': 'instance',
 'int': 'integer', 'iface': 'interface', 'intf': 'interface', 'inv': 'inverse',
 'iter': 'iterator', 'km': 'keymap', 'kwd': 'keyword', 'lang': 'language',
 'lat': 'latitude', 'len': 'length', 'le': 'less or equal', 'lt': 'less than',
 'lvl': 'level', 'lib': 'library', 'lnk': 'link', 'll': 'linked list', 'loc': 'location',
 'lon': 'longitude', 'mng': 'manager', 'mat': 'matrix', 'mtx': 'matrix', 'max': 'maximum',
 'mem': 'memory', 'msg': 'message', 'meta': 'metadata', 'mcu': 'microcontroller',
 'mid': 'middle', 'min': 'minimum', 'misc': 'miscellaneous', 'mod': 'modulo',
 'mul': 'multiplication', 'mut': 'mutable', 'nav': 'navigation', 'net': 'network',
 'nl': 'newline', '$...': 'node', 'ne': 'not equal', 'num': 'number of', 'obj': 'object',
 'key': 'object key', 'oct': 'octal', 'os': 'operating system', 'oss': 'open source software',
 'op': 'operation', 'opt': 'option', 'ord': 'order', 'org': 'organization', 'orig': 'origin',
 'out': 'output', 'pkg': 'package', 'param': 'parameter', 'perf': 'performance',
 'pic': 'picture', 'px': 'pixel', 'ptr': 'pointer', 'pol': 'poligon', 'pos': 'position',
 'pwr': 'power', 'pred': 'prediction', 'pref': 'preference', 'prev': 'previous',
 'priv': 'private', 'proc': 'process', 'prod': 'production', 'prof': 'profiler',
 'pub': 'public', 'qry': 'query', 'rad': 'radians', 'rand': 'random', 'rnd': 'random',
 'rng': 'range', 'recv': 'receive', 'rec': 'record', 'rect': 'rectangle', 'ref': 'reference',
 'regex': 'regular expression', 'rgx': 'regular expression', 'rel': 'relation',
 'rem': 'remote', 'rm': 'remove', 'rmv': 'remove', 'repo': 'repository', 'req': 'request',
 'res': 'result', 'ret': 'return', 'rev': 'revision', 'sc': 'script', 'sec': 'secant',
 'sel': 'selection', 'sem': 'semaphore', 'sep': 'separator', 'seq': 'sequence', 'svc': 'service',
 'sess': 'session', 'sin': 'sine', 'sw': 'software', 'sln': 'solution', 'sol': 'solver',
 'src': 'source', 'spec': 'specification', 'sqrt': 'square root', 'std': 'standard',
 'stdio': 'standard input output', 'stmt': 'statement', 'stat': 'statistic', 'str': 'string',
 'sub': 'subtration', 'sync': 'synchronization', 'td': 'table data', 'th': 'table header', 'tr':
 'table row', 'tan': 'tangent', 'tgt': 'target', 'tmp': 'temporary', 'temp': 'temporary', 'txt':
 'text', 'tmr': 'timer', 'ts': 'timestamp', 'tgl': 'toggle', 'tx': 'transaction', 'tpe': 'type',
 'usr': 'user', 'util': 'utility', 'val': 'value', 'var': 'variable', 'vec': 'vector', 'ver':
 'vertical', 'ws': 'white space', 'win': 'window', 'wiz': 'wizard'}

Swap common abbreviations defined above with their full terms, so that a variable like curr_val parsed as ['curr', 'val'] becomes ['current', 'value'] for later translation.

In [None]:
# lowercases all words in list and checks if they are a common abbreviation
def swap_abbreviations(input: list):
  output=[]
  for i in range(len(input)):
    curr=input[i].lower()
    if curr in abbreviations:
      for word in (abbreviations[curr]).split():
        output.append(word)
    else:
      output.append(curr)
  return output

Run the parsing and abbreviation replacement implemented so far on a list of test cases.

In [None]:
test_var_names=["a", "flatcase", "UPPERCASE", "camelCase", "PascalCase", "snake_case", "SCREAMING_SNAKE_CASE", "camel_Snake_Case",
                "Pascal_Snake_Case", "kebab-case", "COBOL-CASE", "Train-Case", "COMPLEX-case", "ComplEx2-CaseEnv", "123world4Case"]

for var_name in test_var_names:
  print(var_name)
  words=(split_word(var_name))
  words_full=swap_abbreviations(words)
  print(words_full)

Incomplete Tests

In [None]:
from langdetect import detect
from PyMultiDictionary import MultiDictionary
import random # used for randomly choosing placeholder names

test_var_names=["a", "flatcase", "UPPERCASE", "camelCase", "PascalCase", "snake_case", "SCREAMING_SNAKE_CASE", "camel_Snake_Case",
                "Pascal_Snake_Case", "kebab-case", "COBOL-CASE", "Train-Case", "COMPLEX-case", "ComplEx2-CaseEnv", "123world4Case"]


# splits given var/function name into individual words based on naming conventions
def split_word(input: str):
  # Supported Styles: camelCase, PascalCase, snake_case, SCREAMING_SNAKE_CASE, camel_Snake_Case,
  #                   Pascal_Snake_Case, kebab-case, COBOL-CASE, Train-Case
  # Unsupported Styles (would need a more complex parser e.g. python-wordsegment which only supports english): flatcase, UPPERCASE

  # if not a single letter
  if not len(input)==1:
    # code based on: https://www.geeksforgeeks.org/python-split-camelcase-string-to-individual-strings/ (method 5)
    # first parse for '_' or '-'
    modified_string = list(map(lambda x: '_' if x=='-' else x, input))
    split_string = ''.join(modified_string).split('_')
    sep_words = list(filter(lambda x: x != '', split_string))
    # next check split words for any camelCase or PascalCase, avoiding UPPERCASE and single letters
    # number based strings e.g. 13vals also supported
    i=0
    # while loop as size of list increases as words broken down
    while i < len(sep_words):
      # if current word not one char and is not all upper or lowercase, split based on capital letters
      if len(sep_words[i])!=1 and not (sep_words[i].isupper() or sep_words[i].islower()):
        modified_string = list(map(lambda x: '_'+x if x.isupper() else x, sep_words[i]))
        split_string = ''.join(modified_string).split('_')
        sep_subwords = list(filter(lambda x: x != '', split_string))
        # remove unseparated word from larger list
        sep_words.pop(i)
        for k in range(len(sep_subwords)):
          sep_words.insert(i+k,sep_subwords[k])
      i+=1
    return sep_words
  else:
    return [input]

# abbreviations list created based on common code abbreviations: https://github.com/abbrcode/abbreviations-in-code
abbreviations={'abbr': 'abbreviation', 'abs': 'absolute', 'acro': 'acronym', 'act': 'actual',
 'sum': 'addition', 'addr': 'address', 'algo': 'algorithm', 'alloc': 'allocation',
 'alt': 'alternative', 'anno': 'annotation', 'app': 'application',
 'acosec': 'arccosecant', 'acos': 'arccosine', 'acot': 'arccotangent',
 'actg': 'arccotangent', 'asec': 'arcsecant', 'asin': 'arcsine', 'atan': 'arctangent',
 'arg': 'argument', 'arr': 'array', 'async': 'asynchronous', 'attr': 'attribute',
 'auth': 'authentication', 'aux': 'auxiliary', 'avg': 'average', 'bg': 'background',
 'bat': 'battery', 'bin': 'binary', 'bool': 'boolean', 'brk': 'break', 'bl': 'breakline',
 'buf': 'buffer', 'buff': 'buffer', 'btn': 'button', 'calc': 'calculator',
 'cb': 'callback', 'cert': 'certificate', 'ch': 'channel', 'char': 'character',
 'chk': 'check', 'csum': 'checksum', 'circ': 'circle', 'cls': 'class', 'clr': 'clear',
 'coll': 'collection', 'col': 'column', 'cmd': 'command', 'com': 'communication',
 'comm': 'common', 'cmp': 'comparison', 'comp': 'component', 'concat': 'concatenation',
 'cond': 'condition', 'cfg': 'configuration', 'conf': 'configuration',
 'config': 'configuration', 'con': 'connection', 'conn': 'connection', 'const': 'constant',
 'cntr': 'container', 'ctx': 'context', 'cont': 'continue', 'ctrl': 'control',
 'conv': 'conversation', 'coord': 'coordinate', 'cpy': 'copy', 'cosec': 'cosecant',
 'cos': 'cosine', 'cot': 'cotangent', 'ctg': 'cotangent', 'cnt': 'count', 'cur': 'current',
 'curr': 'current', 'db': 'database', 'dbg': 'debug', 'dec': 'decrease',
 'decl': 'declaration', 'def': 'definition', 'deg': 'degrees', 'del': 'deletion',
 'dt': 'delta time', 'dep': 'dependency', 'desc': 'description', 'dest': 'destination',
 'dev': 'device', 'diff': 'difference', 'dim': 'dimension', 'dir': 'directory',
 'dis': 'disable', 'disp': 'display', 'div': 'division', 'doc': 'document',
 'docs': 'documentation', 'drv': 'driver', 'dyn': 'dynamic', 'elm': 'element',
 'en': 'enable', 'env': 'environment', 'eq': 'equal', 'err': 'error', 'evt': 'event',
 'exe': 'execution', 'exp': 'exponential', 'expr': 'expression', 'ext': 'extension',
 'fct': 'facet', 'fac': 'factory', 'fig': 'figure', 'fc': 'file chooser',
 'fd': 'file descriptor', 'fp': 'function pointer', 'fr': 'file reader',
 'fs': 'file system', 'fw': 'file writer', 'e.g.': 'for example', 'fmt': 'format',
 'frac': 'fraction', 'freq': 'frequence', 'fn': 'function', 'func': 'function',
 'gen': 'generation', 'geom': 'geometry', 'ge': 'greater or equal',
 'gt': 'greater than', 'hw': 'hardware', 'hdr': 'header', 'hex': 'hexadecimal',
 'hor': 'horizontal', 'id': 'identifier', 'img': 'image', 'impl': 'implementation',
 'imp': 'import', 'inc': 'increase', 'idx': 'index', 'info': 'information',
 'init': 'initialization', 'in': 'input', 'ins': 'insertion', 'inst': 'instance',
 'int': 'integer', 'iface': 'interface', 'intf': 'interface', 'inv': 'inverse',
 'iter': 'iterator', 'km': 'keymap', 'kwd': 'keyword', 'lang': 'language',
 'lat': 'latitude', 'len': 'length', 'le': 'less or equal', 'lt': 'less than',
 'lvl': 'level', 'lib': 'library', 'lnk': 'link', 'll': 'linked list', 'loc': 'location',
 'lon': 'longitude', 'mng': 'manager', 'mat': 'matrix', 'mtx': 'matrix', 'max': 'maximum',
 'mem': 'memory', 'msg': 'message', 'meta': 'metadata', 'mcu': 'microcontroller',
 'mid': 'middle', 'min': 'minimum', 'misc': 'miscellaneous', 'mod': 'modulo',
 'mul': 'multiplication', 'mut': 'mutable', 'nav': 'navigation', 'net': 'network',
 'nl': 'newline', '$...': 'node', 'ne': 'not equal', 'num': 'number of', 'obj': 'object',
 'key': 'object key', 'oct': 'octal', 'os': 'operating system', 'oss': 'open source software',
 'op': 'operation', 'opt': 'option', 'ord': 'order', 'org': 'organization', 'orig': 'origin',
 'out': 'output', 'pkg': 'package', 'param': 'parameter', 'perf': 'performance',
 'pic': 'picture', 'px': 'pixel', 'ptr': 'pointer', 'pol': 'poligon', 'pos': 'position',
 'pwr': 'power', 'pred': 'prediction', 'pref': 'preference', 'prev': 'previous',
 'priv': 'private', 'proc': 'process', 'prod': 'production', 'prof': 'profiler',
 'pub': 'public', 'qry': 'query', 'rad': 'radians', 'rand': 'random', 'rnd': 'random',
 'rng': 'range', 'recv': 'receive', 'rec': 'record', 'rect': 'rectangle', 'ref': 'reference',
 'regex': 'regular expression', 'rgx': 'regular expression', 'rel': 'relation',
 'rem': 'remote', 'rm': 'remove', 'rmv': 'remove', 'repo': 'repository', 'req': 'request',
 'res': 'result', 'ret': 'return', 'rev': 'revision', 'sc': 'script', 'sec': 'secant',
 'sel': 'selection', 'sem': 'semaphore', 'sep': 'separator', 'seq': 'sequence', 'svc': 'service',
 'sess': 'session', 'sin': 'sine', 'sw': 'software', 'sln': 'solution', 'sol': 'solver',
 'src': 'source', 'spec': 'specification', 'sqrt': 'square root', 'std': 'standard',
 'stdio': 'standard input output', 'stmt': 'statement', 'stat': 'statistic', 'str': 'string',
 'sub': 'subtration', 'sync': 'synchronization', 'td': 'table data', 'th': 'table header', 'tr':
 'table row', 'tan': 'tangent', 'tgt': 'target', 'tmp': 'temporary', 'temp': 'temporary', 'txt':
 'text', 'tmr': 'timer', 'ts': 'timestamp', 'tgl': 'toggle', 'tx': 'transaction', 'tpe': 'type',
 'usr': 'user', 'util': 'utility', 'val': 'value', 'var': 'variable', 'vec': 'vector', 'ver':
 'vertical', 'ws': 'white space', 'win': 'window', 'wiz': 'wizard'}

# lowercases all words in list and checks if they are a common abbreviation
def swap_abbreviations(input: list):
  output=[]
  for i in range(len(input)):
    curr=input[i].lower()
    if curr in abbreviations:
      for word in (abbreviations[curr]).split():
        output.append(word)
    else:
      output.append(curr)
  return output

languages_inuse=[]
latin_languages=["de","en","es","fr","it","jv","ms","pl","pt","ro","tr"]
nonlatin_languages=["bn","hi","ja","ko","mr","ru","ta","uk","zh"]
languages_inuse.extend(latin_languages)
languages_inuse.extend(nonlatin_languages)

def translate_words(input: list):
  alphabet=['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
  output=[]
  for word in input:
    # if single char
    if len(word)==1:
      # if a letter, replace with other random letter (or, if that letter already chosen, that letter with a random letter appended)
      if word.isalpha():
        curr_letter=random.choice(alphabet)
        alphabet.remove(curr_letter)
        alphabet.append(str(curr_letter+str(random.choice(alphabet))))
        output.append(curr_letter)
      # if a single number, leave as normal
      else:
        output.append(word)
    # else word is longer than a single char
    else:
      # try detecting what language it's in
      try:
        in_lang=detect(word)
      # if word not valid (e.g. numbers, not a real word, etc.), leave as is
      except Exception:
        output.append(word)
      # else if word is valid, begin translating
      else:
        # if detected language of word is able to be translated
        if in_lang in languages_inuse:
          out_lang=random.choice(languages_inuse)
          output.append(dictionary.translate(in_lang, word, out_lang))
        # else leave word as is
        else:
          output.append(word)
  return output


'''
from langdetect import detect
print(detect("War doesn't show who's right, just who's left."))
print(detect("Dies ist ein Testsatz."))

from PyMultiDictionary import MultiDictionary
dictionary = MultiDictionary()

translated_list=(dictionary.translate('en', 'processed'))

key = 'es' # desired language
value = next((v for k, v in translated_list if k == key), None)
print(str(type(value))+str(value))
'''

'''
def change_words(input: list):
    alphabet=['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
    output=str()
    for i in range(len(input)):
      curr=input[i]
      # if word is 1 char
      if len(curr)==1:
        # if word is a letter (so single numbers aren't changed)
        if curr.isalpha():
          # replace single letter with another
          curr_letter=random.choice(alphabet)
          alphabet.remove(curr_letter)
          alphabet.append(str(curr_letter+str(random.choice(alphabet))))
          output+=str(' '+curr_letter+' ')
      else:
        # if word is a common abbreviation
        if curr in abbreviations:
'''


#split_word(varname)
for var_name in test_var_names:
  print(var_name)
  words=(split_word(var_name))
  words_full=swap_abbreviations(words)
  translated_words=translate_words(words_full)
  print(translated_words)




