In [1096]:
import ast
def print_ast(src):
    print(ast.dump(src, indent=4))
def print_code(src):
    print(ast.unparse(ast.fix_missing_locations(src)))

In [1097]:
def simple_function(x):
    y = 3 * x
    print(y)

In [1098]:
import inspect

In [1099]:
tree = ast.parse(inspect.getsource(simple_function))

In [1100]:
print_ast(tree)

Module(
    body=[
        FunctionDef(
            name='simple_function',
            args=arguments(
                posonlyargs=[],
                args=[
                    arg(arg='x')],
                kwonlyargs=[],
                kw_defaults=[],
                defaults=[]),
            body=[
                Assign(
                    targets=[
                        Name(id='y', ctx=Store())],
                    value=BinOp(
                        left=Constant(value=3),
                        op=Mult(),
                        right=Name(id='x', ctx=Load()))),
                Expr(
                    value=Call(
                        func=Name(id='print', ctx=Load()),
                        args=[
                            Name(id='y', ctx=Load())],
                        keywords=[]))],
            decorator_list=[])],
    type_ignores=[])


In [1101]:
print_ast(tree.body[0])

FunctionDef(
    name='simple_function',
    args=arguments(
        posonlyargs=[],
        args=[
            arg(arg='x')],
        kwonlyargs=[],
        kw_defaults=[],
        defaults=[]),
    body=[
        Assign(
            targets=[
                Name(id='y', ctx=Store())],
            value=BinOp(
                left=Constant(value=3),
                op=Mult(),
                right=Name(id='x', ctx=Load()))),
        Expr(
            value=Call(
                func=Name(id='print', ctx=Load()),
                args=[
                    Name(id='y', ctx=Load())],
                keywords=[]))],
    decorator_list=[])


In [1102]:
import random

We create a PythonMutator class that links to other Mutator classes, the PythonMutator class tying all of them together with helper functions to call those class' methods. We can later use it to add probabilities etc. to each mutation.

In [1103]:
class PythonMutator(ast.NodeTransformer):
    def __init__(self):
        self.reverse = None
        self.mutations = []        
    
    # def visit_Module(self, src):
    #     return self.generic_visit(src)
    
    # def visit_FunctionDef(self, src):
    #     return self.generic_visit(src)
    
    # def visit_BinOp(self, src):
    #     return self.generic_visit(src)
    
    # def visit_Assign(self, src):
    #     return self.generic_visit(src)
    
    # def visit_Call(self, src):
    #     return self.generic_visit(src)
    
    # def visit_Name(self, src):
    #     return self.generic_visit(src)
    
    # def visit_Constant(self, src):
    #     return self.generic_visit(src)

    def expand_constants(self, src, trials=3):
        node = ExprMutator().modify_value(src, trials)
        self.mutations.extend(node[1])
        return node[0]
    
    def swap_numbers(self, src):
        node = ExprMutator().commute_value(src)
        self.mutations.extend(node[1])
        return node[0]

Now we need to define modify_value that can replace a given constant with an equivalent arithmetic expression, and swap_numbers that will swap the children of a + or * node.

In [1104]:
op_map_int = [("+", ast.Add()), ("*", ast.Mult()), ("//", ast.FloorDiv()), ("-", ast.Sub())]
op_map_float = [("+", ast.Add()), ("*", ast.Mult()), ("/", ast.Div()), ("-", ast.Sub())]

In [1105]:
class ExprMutator(ast.NodeTransformer):
    EXPAND = 1
    COMMUTE = 2

    def __init__(self):
        self.transform = False
        self.trials = 0
        self.mode = self.EXPAND
        self.mutations = []

    def modify_value(self, src, trials):
        self.mode = self.EXPAND
        self.trials = trials
        node = self._modify_value(src)
        return (node, self.mutations)
    
    def _modify_value(self, src):
        if self.trials == 0: return src
        self.transform = True
        self.visit(random.choice(src.body))
        return self._modify_value(src)

We need the mode so we can swap between traversing a path and swapping children. Depth allows us to control how many numbers we want to go and replace with expressions.

In [1106]:
class ExprMutator(ExprMutator):    
    def commute_value(self, n):
        self.mode = self.COMMUTE
        return (self.visit(n), self.mutations)

Now come the real functions. The visits to Constant or BinOp nodes are what will truly handle the functionality.

In [1107]:
from copy import deepcopy

In [1108]:
class ExprMutator(ExprMutator):
    def visit_Constant(self, src):
        if self.transform and self.mode and (isinstance(src.value, int) or isinstance(src.value, float)) == self.EXPAND:
            op_map = {}
            while True:
                try:
                    op = random.randint(0, 3)

                    if isinstance(src.value, int): 
                        op_map = op_map_int
                        other = random.randint(-10000, 10000)
                    else: 
                        op_map = op_map_float
                        other = 500 * random.randint(1, 10) * (random.random() + random.random()) * (1 - 2 * random.randint(0, 1))
                    
                    assert eval("(" + str(src.value) + op_map[3-op][0] + str(other) + ")" + op_map[op][0] + str(other)) == src.value
                    break
                except ZeroDivisionError: continue
                except AssertionError: continue
            self.trials -= 1
            self.transform = False
            node = ast.fix_missing_locations(ast.BinOp(left = ast.Constant(value=eval("(" + str(src.value) + op_map[3-op][0] + str(other) + ")")), op = op_map[op][1], right = ast.Constant(value=other)))
            #self.mutations update
            self.mutations.append(deepcopy((src, node)))
            return node

        return src

    def visit_BinOp(self, src):
        if self.mode == self.EXPAND:
            if random.randint(1, 2) == 1:
                src.left = self.visit(src.left)
            else:
                src.right = self.visit(src.right)
            return src
        
        if self.mode == self.COMMUTE:
            if isinstance(src.op, ast.Add) or isinstance(src.op, ast.Mult):
                #self.mutations update
                mut = deepcopy(src)
                self.mutations.append((mut, ast.BinOp(left = mut.right, op = mut.op, right = mut.left)))
                src.left, src.right = src.right, src.left
                
            return self.generic_visit(src)

In [1109]:
print_ast(ExprMutator().modify_value(ast.Module(body=[ast.Expr(value=ast.Constant(value=1))]), trials=2)[0])

Module(
    body=[
        Expr(
            value=BinOp(
                left=BinOp(
                    left=Constant(value=-3688),
                    op=Add(),
                    right=Constant(value=-1839)),
                op=FloorDiv(),
                right=Constant(value=-5527)))])


In [1110]:
print_code(PythonMutator().expand_constants(ast.parse("x+1")))

x + (-963 - 3589 + 5570 + -1017)


In [1111]:
print_code(PythonMutator().swap_numbers(ast.parse("x + 0.00042426813746287653 * (-5.193317422434368 * 1257 + 8885)")))

(8885 + 1257 * -5.193317422434368) * 0.00042426813746287653 + x


In [1112]:
tree_two = deepcopy(tree)
for i in range(5):
    if random.randint(1, 5) == 1: PythonMutator().swap_numbers(tree_two)
    else: PythonMutator().expand_constants(tree_two)
new_code = ast.unparse(tree_two)
print(new_code)

def simple_function(x):
    y = x * (81366648 // (1867 + -5783) // (45777886 // 4774 + (-9217 + -7298)))
    print(y)


In [1113]:
simple_function(456)

1368


In [1114]:
exec(new_code)

In [1115]:
simple_function(456)

1368


Clearly the output remains the same inspite of our changes. Next, we look into transforming range-based for loops into while loops.

In [1116]:
for_tree = ast.parse('''for i in range(10, 1, -2):
                        print(i)''')
print_ast(for_tree)

Module(
    body=[
        For(
            target=Name(id='i', ctx=Store()),
            iter=Call(
                func=Name(id='range', ctx=Load()),
                args=[
                    Constant(value=10),
                    Constant(value=1),
                    UnaryOp(
                        op=USub(),
                        operand=Constant(value=2))],
                keywords=[]),
            body=[
                Expr(
                    value=Call(
                        func=Name(id='print', ctx=Load()),
                        args=[
                            Name(id='i', ctx=Load())],
                        keywords=[]))],
            orelse=[])],
    type_ignores=[])


In [1117]:
while_tree = ast.parse('''
i = 10
while i > 1:
    print(i)
    i += -2''')
print_ast(while_tree)

Module(
    body=[
        Assign(
            targets=[
                Name(id='i', ctx=Store())],
            value=Constant(value=10)),
        While(
            test=Compare(
                left=Name(id='i', ctx=Load()),
                ops=[
                    Gt()],
                comparators=[
                    Constant(value=1)]),
            body=[
                Expr(
                    value=Call(
                        func=Name(id='print', ctx=Load()),
                        args=[
                            Name(id='i', ctx=Load())],
                        keywords=[])),
                AugAssign(
                    target=Name(id='i', ctx=Store()),
                    op=Add(),
                    value=UnaryOp(
                        op=USub(),
                        operand=Constant(value=2)))],
            orelse=[])],
    type_ignores=[])


In [1118]:
src = for_tree.body[0]
def analyze_for(node):
    args = node.iter.args
    if len(args) == 1:
        return [ast.Constant(value=0), ast.Lt(), args[0], ast.Constant(value=1)]
    elif len(args) == 2:
        return [args[0], ast.Lt(), args[1], ast.Constant(value=1)]
    else:
        step = eval(ast.unparse(args[2]))
        if step < 0:
            return [args[0], ast.Gt(), args[1], args[2]]
        else:
            return [args[0], ast.Lt(), args[1], args[2]]
        
while_args = analyze_for(src)
print_ast(
    ast.Assign(targets=[src.target], value=while_args[0])
    ) 
print_ast(
    ast.While(test=ast.Compare(left=ast.Name(id=src.target.id, ctx=ast.Load()), ops=[while_args[1]], comparators=[while_args[2]]), \
              body=src.body + [ast.AugAssign(target=src.target, op=ast.Add(), value=while_args[3])], orelse=src.orelse)
)

Assign(
    targets=[
        Name(id='i', ctx=Store())],
    value=Constant(value=10))
While(
    test=Compare(
        left=Name(id='i', ctx=Load()),
        ops=[
            Gt()],
        comparators=[
            Constant(value=1)]),
    body=[
        Expr(
            value=Call(
                func=Name(id='print', ctx=Load()),
                args=[
                    Name(id='i', ctx=Load())],
                keywords=[])),
        AugAssign(
            target=Name(id='i', ctx=Store()),
            op=Add(),
            value=UnaryOp(
                op=USub(),
                operand=Constant(value=2)))],
    orelse=[])


In [1119]:
class PythonMutator(PythonMutator):
    def transform_for(self, src):
        node = ForMutator().transform_for(src)
        self.mutations.extend(node[1])
        return node[0]

In [1120]:
print_ast(ast.parse("for i in delays: print(i)"))
print_ast(ast.parse("for i in range(2): print(i)"))

Module(
    body=[
        For(
            target=Name(id='i', ctx=Store()),
            iter=Name(id='delays', ctx=Load()),
            body=[
                Expr(
                    value=Call(
                        func=Name(id='print', ctx=Load()),
                        args=[
                            Name(id='i', ctx=Load())],
                        keywords=[]))],
            orelse=[])],
    type_ignores=[])
Module(
    body=[
        For(
            target=Name(id='i', ctx=Store()),
            iter=Call(
                func=Name(id='range', ctx=Load()),
                args=[
                    Constant(value=2)],
                keywords=[]),
            body=[
                Expr(
                    value=Call(
                        func=Name(id='print', ctx=Load()),
                        args=[
                            Name(id='i', ctx=Load())],
                        keywords=[]))],
            orelse=[])],
    type_ignores=[])


In [1121]:
class ForMutator(ast.NodeTransformer):
    def __init__(self):
        self.mutations = []

    def transform_for(self, src):
        return (self.visit(src), self.mutations)

    def visit_For(self, src):  
        try: while_args = analyze_for(src)
        except: return src
        node = [ast.Assign(targets=[src.target], value=while_args[0]), \
                ast.While(test=ast.Compare(left=ast.Name(id=src.target.id, ctx=ast.Load()), ops=[while_args[1]], comparators=[while_args[2]]), \
                          body=src.body + [ast.AugAssign(target=src.target, op=ast.Add(), value=while_args[3])], orelse=src.orelse)]
        #self.mutations update
        self.mutations.append((deepcopy(src), deepcopy(node)))
        return node

In [1122]:
for_tree_two = deepcopy(for_tree)
print(ast.unparse(for_tree))
print("====")
print_code(PythonMutator().transform_for(for_tree_two))

for i in range(10, 1, -2):
    print(i)
====
i = 10
while i > 1:
    print(i)
    i += -2


That takes care of for-loops based on ranges. <b>What about iterators?</b>

In [1123]:
print_ast(ast.parse(
'''
L = [1, 4, "hello"]
for i in [len(str(x)) for x in L]:
    print(i)
'''
))

Module(
    body=[
        Assign(
            targets=[
                Name(id='L', ctx=Store())],
            value=List(
                elts=[
                    Constant(value=1),
                    Constant(value=4),
                    Constant(value='hello')],
                ctx=Load())),
        For(
            target=Name(id='i', ctx=Store()),
            iter=ListComp(
                elt=Call(
                    func=Name(id='len', ctx=Load()),
                    args=[
                        Call(
                            func=Name(id='str', ctx=Load()),
                            args=[
                                Name(id='x', ctx=Load())],
                            keywords=[])],
                    keywords=[]),
                generators=[
                    comprehension(
                        target=Name(id='x', ctx=Store()),
                        iter=Name(id='L', ctx=Load()),
                        ifs=[],
                        is_async=0)

In [1124]:
print_ast(ast.parse('x,y = 3 + 5, 3 + 5'))

Module(
    body=[
        Assign(
            targets=[
                Tuple(
                    elts=[
                        Name(id='x', ctx=Store()),
                        Name(id='y', ctx=Store())],
                    ctx=Store())],
            value=Tuple(
                elts=[
                    BinOp(
                        left=Constant(value=3),
                        op=Add(),
                        right=Constant(value=5)),
                    BinOp(
                        left=Constant(value=3),
                        op=Add(),
                        right=Constant(value=5))],
                ctx=Load()))],
    type_ignores=[])


In [1125]:
assgn_tree = ast.parse('''
tmp1, tmp2 = 3 + 5, 3 + 5
x, y = tmp1, tmp2''')

print_ast(assgn_tree)

Module(
    body=[
        Assign(
            targets=[
                Tuple(
                    elts=[
                        Name(id='tmp1', ctx=Store()),
                        Name(id='tmp2', ctx=Store())],
                    ctx=Store())],
            value=Tuple(
                elts=[
                    BinOp(
                        left=Constant(value=3),
                        op=Add(),
                        right=Constant(value=5)),
                    BinOp(
                        left=Constant(value=3),
                        op=Add(),
                        right=Constant(value=5))],
                ctx=Load())),
        Assign(
            targets=[
                Tuple(
                    elts=[
                        Name(id='x', ctx=Store()),
                        Name(id='y', ctx=Store())],
                    ctx=Store())],
            value=Tuple(
                elts=[
                    Name(id='tmp1', ctx=Load()),
                    Name(id='

In [1126]:
a1 = assgn_tree.body[0].value.elts[0]
a2 = assgn_tree.body[0].value.elts[1]
print_ast(a1)
print_ast(a2)
a1 == a2

BinOp(
    left=Constant(value=3),
    op=Add(),
    right=Constant(value=5))
BinOp(
    left=Constant(value=3),
    op=Add(),
    right=Constant(value=5))


False

In [1127]:
test_tree = ast.parse(
'''
def hello(x):
    print("Hello", x)
L = [1, 3 + 4 * 5, "hello"]
for i in [len(str(x)) for x in L]:
    print(i)
'''
)

def func(src):
    while hasattr(src, "body"):
        src = random.choice(src.body)
        parent = src
        if hasattr(src, "value"): src = src.value
        if hasattr(src, "left"): 
            if random.randint(0, 1): src = src.left
            else: src = src.right

    print_code(src)
    child = ast.Load()
    while isinstance(child, (ast.Load, ast.Store, ast.Call)) or not ast.unparse(child) or child == parent:
        child = random.choice(list(ast.walk(src)))

    print_code(parent)
    return ast.unparse(child)

    
func(test_tree)

[1, 3 + 4 * 5, 'hello']
L = [1, 3 + 4 * 5, 'hello']


"[1, 3 + 4 * 5, 'hello']"

In [1128]:
import string

In [1129]:
class AssignMutator(ast.NodeTransformer):
    def __init__(self):
        self.mutations = []
        self.mode = 0
        self.par = None
        self.child = None
        self.pair = None

    def transform_assign(self, src):
        while True:
            node = src
            parent = None
            
            while hasattr(node, "body"):
                node = random.choice(node.body)
                parent = node
                if hasattr(node, "value"): node = node.value
                if hasattr(node, "left"): 
                    if random.randint(0, 1): node = node.left
                    else: node = node.right

            child = ast.Load()
            trials = 0
            while trials < 10 and (isinstance(child, (ast.Load, ast.Store, ast.Call)) or not ast.unparse(child) or child == parent):
                child = random.choice(list(ast.walk(node)))
                trials += 1
            if trials == 10: continue
            break

        self.par = deepcopy(parent)
        self.child = deepcopy(child)

        var = ''.join(random.choices(string.ascii_letters + string.digits, k=random.randint(10, 20)))
        var = var if not var[0].isdigit() else '_' + var

        new_target = ast.Name(id = var, ctx = ast.Store())
        self.pair = (ast.Name(id=new_target.id, ctx=ast.Load()), child)
        self.mode = 1
        self.visit(src)
        self.pair = (ast.Assign(targets = [new_target], value=self.child), parent)
        self.mode = 2
        self.visit(src)

        return (src, self.mutations)
    
    def visit(self, src):
        if self.mode == 1:
            test = src == self.pair[1]
            if test:
                node = self.pair[0]
                self.mode = 0
                return node
        if self.mode == 2:
            test = src == self.pair[1]
            if test:
                node = list(self.pair)
                self.mutations.append((self.par, deepcopy(node)))
                return node
        return self.generic_visit(src)

In [1130]:
test_tree = ast.parse(
'''
def hello(x):
    print("Hello", x)
L = [1, 3 + 4 * 5, "hello"]
for i in [len(str(x)) for x in L]:
    print(i)
'''
)

AssignMutator().transform_assign(test_tree)[0]
print_code(test_tree)

def hello(x):
    print('Hello', x)
Ody2m0cxVURiJhbz0T = [1, 3 + 4 * 5, 'hello']
L = Ody2m0cxVURiJhbz0T
for i in [len(str(x)) for x in L]:
    print(i)


In [1131]:
print_ast(ast.parse('x,y = 3 + 5, 5 + 3'))

Module(
    body=[
        Assign(
            targets=[
                Tuple(
                    elts=[
                        Name(id='x', ctx=Store()),
                        Name(id='y', ctx=Store())],
                    ctx=Store())],
            value=Tuple(
                elts=[
                    BinOp(
                        left=Constant(value=3),
                        op=Add(),
                        right=Constant(value=5)),
                    BinOp(
                        left=Constant(value=5),
                        op=Add(),
                        right=Constant(value=3))],
                ctx=Load()))],
    type_ignores=[])


In [1132]:
print_ast(AssignMutator().transform_assign(ast.parse('x,y = 3 + 5, 5 + 3'))[0])

Module(
    body=[
        Assign(
            targets=[
                Name(id='GfZaLgpE2zzlZRlkWVZ', ctx=Store())],
            value=Constant(value=5)),
        Assign(
            targets=[
                Tuple(
                    elts=[
                        Name(id='x', ctx=Store()),
                        Name(id='y', ctx=Store())],
                    ctx=Store())],
            value=Tuple(
                elts=[
                    BinOp(
                        left=Constant(value=3),
                        op=Add(),
                        right=Name(id='GfZaLgpE2zzlZRlkWVZ', ctx=Load())),
                    BinOp(
                        left=Constant(value=5),
                        op=Add(),
                        right=Constant(value=3))],
                ctx=Load()))],
    type_ignores=[])


In [1133]:
print_ast(ast.parse('x=y=5'))

Module(
    body=[
        Assign(
            targets=[
                Name(id='x', ctx=Store()),
                Name(id='y', ctx=Store())],
            value=Constant(value=5))],
    type_ignores=[])


In [1134]:
print_ast(AssignMutator().transform_assign(ast.parse('x=y=5'))[0])

Module(
    body=[
        Assign(
            targets=[
                Name(id='_7tMo7vthvG', ctx=Store())],
            value=Constant(value=5)),
        Assign(
            targets=[
                Name(id='x', ctx=Store()),
                Name(id='y', ctx=Store())],
            value=Name(id='_7tMo7vthvG', ctx=Load()))],
    type_ignores=[])


In [1135]:
print_code(AssignMutator().transform_assign(ast.parse('x,y = 3 + 5, 5 + 3'))[0])

_6YBxyouGE5ICiesX6Jv = 5 + 3
x, y = (3 + 5, _6YBxyouGE5ICiesX6Jv)


In [1136]:
print_code(AssignMutator().transform_assign(ast.parse('x=y=5'))[0])

c6UVIiyPxx1D = 5
x = y = c6UVIiyPxx1D


In [1137]:
class PythonMutator(PythonMutator):
    def transform_assign(self, src, trials=3):
        for i in range(trials):
            node = AssignMutator().transform_assign(src)
            self.mutations.extend(node[1])
        return src

In [1138]:
test_tree = ast.parse(
'''
def hello(x):
    print("Hello", x)
L = [1, 3 + 4 * 5, "hello"]
for i in [len(str(x)) for x in L]:
    print(i)
'''
)

PythonMutator().transform_assign(test_tree)
print_code(test_tree)

def hello(x):
    print('Hello', x)
dJQWZpui9lnQANORzLJ = 4
luVcqqEygHcV = dJQWZpui9lnQANORzLJ
AY4aLpFuwmOZnx5tKEZd = luVcqqEygHcV * 5
L = [1, 3 + AY4aLpFuwmOZnx5tKEZd, 'hello']
for i in [len(str(x)) for x in L]:
    print(i)


In [1139]:
big_tree = ast.parse(r'''
with open("circuit.txt", "r") as F:
    circuit = F.readlines() # read circuit file into a list
with open("gate_delays.txt", "r") as F:
    delays = F.readlines() # read gate delays into a list

gates = {-1: 0} # prepare dictionary to allow simpler access of gate delays
nodes = {} # prepare dictionary to store node data
out_nodes = [] # prepare list to store names of output nodes
flag1 = flag2 = flag3 = False # prep for processing circuit later

# loop to assign delay value to each kind of gate
for i in delays:
    x = i.strip() # ignore trailing whitespace
    if x[:2] == "//": continue # ignoring whitespace followed by //
    if len(x) == 0: continue # ignoring blank lines or whitespace-only lines
    inps = x.split() # separate line into words
    gates[inps[0]] = float(inps[1]) # assign corresponding delay values with key as gate name

for i in circuit:
    x = i.strip() # ignore trailing whitespace
    if x[:2] == "//": continue # ignoring whitespace followed by //
    if len(x) == 0: continue # ignoring blank lines or whitespace-only lines
    inps = x.split() # separate line into words
    if inps[0] == "PRIMARY_INPUTS": # handling input signal data
        for j in inps[1:]:
            nodes[j] = [0, [], -1] # initializing data with 0 value of delay, no nodes feeding in, associated with no gate  
        flag1 = True # flag to say input signals have been read
        continue
    if inps[0] == "INTERNAL_SIGNALS": # handling internal signal data
        for j in inps[1:]:
            nodes[j] = [0, [], -1] # initializing data with 0 value of delay, no nodes feeding in, associated with no gate
        flag2 = True # flag to say internal signals have been read
        continue
    if inps[0] == "PRIMARY_OUTPUTS": # handling output signal data
        for j in inps[1:]:
            nodes[j] = [0, [], -1] # initializing data with 0 value of delay, no nodes feeding in, associated with no gate
        out_nodes.extend(inps[1:]) # list of output nodes
        flag3 = True # flag to say output signals have been read
        continue
    if flag1 and flag2 and flag3: break # break the loop if all 3 conditions are met before loop termination

for i in circuit: # processing the input and setting up input nodes and gates for each node
    x = i.strip() # ignore trailing whitespace
    if x[:2] == "//": continue # ignoring whitespace followed by //
    if len(x) == 0: continue # ignoring blank lines or whitespace-only lines
    inps = x.split() # separate line into words
    if ((inps[0]=="PRIMARY_INPUTS") or (inps[0]=="INTERNAL_SIGNALS") or (inps[0]=="PRIMARY_OUTPUTS")): 
        continue # ignore signal lines
    out = inps[-1]
    nodes[out][1].extend(inps[1:-1]) # set up input nodes for each node
    nodes[out][2] = inps[0] # set gate delay for relevant nodes

def calcVal_A(x): # recursive function to calculate the delay at each node
    # print(x, nodes) # debug line
    if nodes[x][1] == []: return nodes[x][0] # skip recursive step if node already processed
    s = 0
    for i in nodes[x][1]: # find max delay time of each input node
        nodes[i][0] = calcVal_A(i) # recursive call to function
        s = max(nodes[i][0], s) # node delay that controls delay time of output
    nodes[x][1] = [] # clear input nodes to indicate node delay is already calculated
    return s + gates[nodes[x][2]] # gate delay compensation

to_write = [] # initialize array of lines to be written to output

for i in out_nodes:
    nodes[i][0] = calcVal_A(i) # calculate delay for each output node using the recursive function
    if nodes[i][0] == round(nodes[i][0]): nodes[i][0] = round(nodes[i][0])
    to_write.append(i + " " + str(nodes[i][0]) + "\n") # write delay at each output node to array

with open("output_delays.txt", "w") as F:
    F.writelines(to_write) # write output array to file
''')

In [1140]:
print_code(PythonMutator().expand_constants(PythonMutator().transform_assign(big_tree, 5), trials=5))

with open('circuit.txt', 'r') as F:
    dx3BG9CyNiKJDO6Pwn3J = F.readlines
    circuit = dx3BG9CyNiKJDO6Pwn3J()
with open('gate_delays.txt', 'r') as F:
    delays = F.readlines()
gates = {-(7627 - 7626): 0}
nodes = {}
out_nodes = []
flag1 = flag2 = flag3 = False
for i in delays:
    x = i.strip()
    if x[:7664 + -7662] == '//':
        continue
    if len(x) == 0:
        continue
    inps = x.split()
    gates[inps[0]] = float(inps[1])
for i in circuit:
    x = i.strip()
    if x[:6390 // 3195] == '//':
        continue
    if len(x) == 0:
        continue
    inps = x.split()
    if inps[0] == 'PRIMARY_INPUTS':
        for j in inps[1:]:
            nodes[j] = [0, [], -1]
        flag1 = True
        continue
    if inps[0] == 'INTERNAL_SIGNALS':
        for j in inps[1:]:
            nodes[j] = [0, [], -1]
        flag2 = True
        continue
    if inps[0] == 'PRIMARY_OUTPUTS':
        for j in inps[1:]:
            nodes[j] = [0, [], -1]
        out_nodes.extend(inps[1:])
      

In [1141]:
import trace
import sys

In [1142]:
def traceit(frame, event, arg):
    """Trace program execution. To be passed to sys.settrace()."""
    if event == 'line':
        global coverage
        function_name = frame.f_code.co_name
        lineno = frame.f_lineno
        vars = dict(frame.f_locals)
        coverage.append([function_name, lineno, vars])
    return traceit

def tracer(f):
    global coverage
    coverage = []
    sys.settrace(traceit)  # Turn on
    f()
    sys.settrace(None)    # Turn off

In [1143]:
def g():
    def simple_function(x):
        z = 2
        y = 3 * x
        return y
    
    a = simple_function(2)
    b = 0
    for _ in range(6):
        b += 2 * a

    print("The answer is", b)

In [1144]:
tracer(g)

The answer is 72


In [1145]:
for i in coverage:
    print(f"{i[0]} {i[1]} {i[2]}")

g 2 {}
g 7 {'simple_function': <function g.<locals>.simple_function at 0x000002A802302020>}
simple_function 3 {'x': 2}
simple_function 4 {'x': 2, 'z': 2}
simple_function 5 {'x': 2, 'z': 2, 'y': 6}
g 8 {'simple_function': <function g.<locals>.simple_function at 0x000002A802302020>, 'a': 6}
g 9 {'simple_function': <function g.<locals>.simple_function at 0x000002A802302020>, 'a': 6, 'b': 0}
g 10 {'simple_function': <function g.<locals>.simple_function at 0x000002A802302020>, 'a': 6, 'b': 0, '_': 0}
g 9 {'simple_function': <function g.<locals>.simple_function at 0x000002A802302020>, 'a': 6, 'b': 12, '_': 0}
g 10 {'simple_function': <function g.<locals>.simple_function at 0x000002A802302020>, 'a': 6, 'b': 12, '_': 1}
g 9 {'simple_function': <function g.<locals>.simple_function at 0x000002A802302020>, 'a': 6, 'b': 24, '_': 1}
g 10 {'simple_function': <function g.<locals>.simple_function at 0x000002A802302020>, 'a': 6, 'b': 24, '_': 2}
g 9 {'simple_function': <function g.<locals>.simple_funct

In [1146]:
g_tree = ast.parse(inspect.getsource(g)).body[0]
print_ast(g_tree)

FunctionDef(
    name='g',
    args=arguments(
        posonlyargs=[],
        args=[],
        kwonlyargs=[],
        kw_defaults=[],
        defaults=[]),
    body=[
        FunctionDef(
            name='simple_function',
            args=arguments(
                posonlyargs=[],
                args=[
                    arg(arg='x')],
                kwonlyargs=[],
                kw_defaults=[],
                defaults=[]),
            body=[
                Assign(
                    targets=[
                        Name(id='z', ctx=Store())],
                    value=Constant(value=2)),
                Assign(
                    targets=[
                        Name(id='y', ctx=Store())],
                    value=BinOp(
                        left=Constant(value=3),
                        op=Mult(),
                        right=Name(id='x', ctx=Load()))),
                Return(
                    value=Name(id='y', ctx=Load()))],
            decorator_list=[]),
   

In [1147]:
print_ast(ast.parse('def f(x, y, *, z=3): print(x)'))

Module(
    body=[
        FunctionDef(
            name='f',
            args=arguments(
                posonlyargs=[],
                args=[
                    arg(arg='x'),
                    arg(arg='y')],
                kwonlyargs=[
                    arg(arg='z')],
                kw_defaults=[
                    Constant(value=3)],
                defaults=[]),
            body=[
                Expr(
                    value=Call(
                        func=Name(id='print', ctx=Load()),
                        args=[
                            Name(id='x', ctx=Load())],
                        keywords=[]))],
            decorator_list=[])],
    type_ignores=[])


In [1148]:
for node in g_tree.body:
    print(node.lineno)

2
7
8
9
12


In [1149]:
def get_trace(src):
    for node in src.body:
        data = src.name, node.lineno
        print(data)
        if isinstance(node, ast.FunctionDef):
            get_trace(node)

In [1150]:
get_trace(g_tree)

('g', 2)
('simple_function', 3)
('simple_function', 4)
('simple_function', 5)
('g', 7)
('g', 8)
('g', 9)
('g', 12)


Now that we can get the line data for each node in the AST, we can get the data of the local variables at a particular AST node and use it for substitutions.

In [1151]:
class VariableInjector(ast.NodeTransformer):  
    def __init__(self):
        self.mutations = []
        self.fn = False
        self.local_vars = {}
          
    def traceit(self, frame, event, arg):
        if event == 'line':
            function_name = frame.f_code.co_name
            lineno = frame.f_lineno
            vars = dict(frame.f_locals)
            self.coverage.append([function_name, lineno, vars])
        return self.traceit

    def tracer(self, f):
        self.coverage = []
        sys.settrace(self.traceit)  # Turn on
        f()
        sys.settrace(None)    # Turn off

    def profile_function(self, f, fn_tree = None):
        self.fn = False

        self.mutations = []
        if fn_tree is None:
            fn_tree = ast.parse(inspect.getsource(f)).body[0]
        self.tracer(f)

        self.seen = set()
        self.unstable = set()
        self.local_vars = set()
        self.browsing = True        
        self.visit(fn_tree)
        
        self.browsing = False
        self.visit(fn_tree)
            
        return (fn_tree, self.mutations)
    
    def inject_functions(self, src, db):
        self.fn = True
        self.db = db
        self.mutations = []

        self.seen = set()
        self.browsing = False
        self.visit(src)
        self.mutations = [(None, x) for x in self.seen] + self.mutations
        self.browsing = True
        self.visit(src)
        
        return (src, self.mutations)
    
                 

Currenly our class simply combines our existing methods, and then visits the AST. Now what we have to do is, while visiting the AST, we need to find the in-scope variables and their values at every line of execution. Then, we need to look for constants and check if they can be replaced by some variable or some simple arithmetic expression involving a variable.

In [1152]:
class VariableInjector(VariableInjector):
    def visit_Assign(self, src):
        if self.browsing and not self.fn:
            for v in src.targets:
                self.check_seens(v)

        return self.generic_visit(src)
    
    def visit_AugAssign(self, src):
        if self.browsing and not self.fn:
            v = src.target
            self.check_seens(v)

        return self.generic_visit(src)
    
    def visit_For(self, src):
        if self.browsing and not self.fn:
            v = src.target
            self.check_seens(v, True)

        return self.generic_visit(src)
            
    def check_seens(self, v, seen=False):
        if isinstance(v, ast.Tuple):
            for var in v.elts: self.check_seens(var, seen)
        elif isinstance(v, ast.Subscript): self.check_seens(v.value, seen)
        else: 
            if seen: self.seen.add(v.id)
            if v.id in self.seen: self.unstable.add(v.id)
            else: self.seen.add(v.id)
    
    def visit_FunctionDef(self, src):
        if self.browsing and self.fn and src.name == "pymutator_profile_function":
            return ast.FunctionDef(name=src.name, body=[*list(self.seen)] + src.body, decorator_list=src.decorator_list, args=src.args)
        self.args = [x.arg for x in src.args.args + src.args.kwonlyargs]
        for node in src.body:
            if not self.browsing: self.get_locals(src.name, node.lineno)
            self.visit(node)
        return src
        

Moreover, we want that variables that are assigned lists to be usable by indexing the list. For this, we need to flatten the lists/dicts assigned in our code. This is much of the reason for the visit_Constant function's complexity; if we have a singular value we try substituting that, else we flatten the list and insert tuples of (node, node_value) that will be used for the substitution. 

In [1153]:
print_ast(ast.parse("L[1]"))
print_ast(ast.parse("[1,2,3]"))
print_ast(ast.parse("D['x']"))

Module(
    body=[
        Expr(
            value=Subscript(
                value=Name(id='L', ctx=Load()),
                slice=Constant(value=1),
                ctx=Load()))],
    type_ignores=[])
Module(
    body=[
        Expr(
            value=List(
                elts=[
                    Constant(value=1),
                    Constant(value=2),
                    Constant(value=3)],
                ctx=Load()))],
    type_ignores=[])
Module(
    body=[
        Expr(
            value=Subscript(
                value=Name(id='D', ctx=Load()),
                slice=Constant(value='x'),
                ctx=Load()))],
    type_ignores=[])


In [1154]:
class VariableInjector(VariableInjector):
    def visit_Constant(self, src):
        if len(self.local_vars) == 0 or self.browsing: return src

        queue = list(self.local_vars.keys()).copy()
        random.shuffle(queue)
        while len(queue) != 0:
            n = len(queue) - 1
            if isinstance(queue[n], tuple):
                val = queue[n][1]
                node = queue[n][0]
            else:
                val = self.local_vars[queue[n]]
                node = queue[n]

            queue.pop()
            new_node = None

            if isinstance(val, list) or isinstance(val, tuple):
                rand_val = list(enumerate(val)).copy()
                random.shuffle(rand_val)
                for i in range(len(rand_val)):
                    queue.append((ast.Subscript(value=node, slice=ast.Constant(rand_val[i][0])), rand_val[i][1]))
                
            elif isinstance(val, dict):
                rand_val = list(val.keys()).copy()
                random.shuffle(rand_val)
                for i in rand_val:
                    queue.append((ast.Subscript(value=node, slice=ast.Constant(i)), val[i]))

            else: new_node = self.unify_value(src, node, val)
            
            if new_node is not None: return new_node
            
        return src

In [1155]:
tree = ast.parse("print(2)")
PythonMutator().expand_constants(tree)
print_code(tree)

print((522 - 8630) // (-9039 - -4985))


Finally, we write the functions to set local_vars and replace basic types with appropriate variable calls.

Note that we slightly modify local_vars to support function substitution by changing the local vars set returned by get_locals.

In [1156]:
class VariableInjector(VariableInjector):
    def get_locals(self, fn, ln):
        self.local_vars = {}
        if self.fn:
            for i in range(len(self.db.function_list)):
                print_code(self.db.function_list[i]['source'])
                fun = self.db.function_list[i]
                types = fun['params']
                kw = []
                for j in types:
                    gen_val = None
                    if j[1] == 'int':
                        gen_val = random.randint(-10000, 10000)
                    
                    if j[1] == 'str':
                        gen_val = "".join(random.choices("abcdefghijklmnopqrstuvwxyz123456789", k=random.randint(1, 15)))

                    if j[1] == 'float':
                        gen_val = 500 * random.randint(1, 10) * (random.random() + random.random()) * (1 - 2 * random.randint(0, 1))

                    if [1] == 'bool':
                        gen_val = True if random.randint(0, 1) else False
                    kw.append(ast.keyword(arg=j[0], value=ast.Constant(value=gen_val)))
                print(kw)
                try:
                    self.local_vars[ast.Call(func=ast.Name(id=fun['source'].name, ctx=ast.Load()), args=[], keywords=kw)] = self.db.simulate(i, [], kw)
                    self.seen.add(fun['source'])
                except Exception as e:
                    print(repr(e))
                    continue
            return
        
        for i in self.coverage:
            if i[0] == fn and i[1] == ln:
                self.local_vars = {ast.parse(k).body[0].value: v for k, v in i[2].items() if k not in self.args and k not in self.unstable}
                return
        
    def unify_value(self, src, var, val):
        if src.value == val:
            return var
        elif isinstance(src.value, int) and isinstance(val, int) or isinstance(src.value, float) and (isinstance(val, int) or isinstance(val, float)):
            op_map = {}
            try:
                op = random.randint(0, 3)
                if isinstance(src.value, int): op_map = op_map_int
                else: op_map = op_map_float
                assert eval("(" + str(src.value) + op_map[3-op][0] + str(val) + ")" + op_map[op][0] + str(val)) == src.value
                node = ast.BinOp(left = ast.Constant(value=eval("(" + str(src.value) + op_map[3-op][0] + str(val) + ")")), op = op_map[op][1], right = var) 
                #self.mutations update
                self.mutations.append((deepcopy(src), deepcopy(node)))
                return node
            except ZeroDivisionError: return None
            except AssertionError: return None
        elif isinstance(src.value, str) and isinstance(val, str):
            if src.value in val:
                ind = val.find(src.value)
                node = ast.Subscript(value = var, slice = ast.Slice(lower=ast.Constant(value=ind), upper=ast.Constant(value=ind+len(src.value))))
                #self.mutations update
                self.mutations.append((deepcopy(src), deepcopy(node)))
                return node
            elif val in src.value:
                ind = src.value.find(val)
                node = ast.BinOp(left = ast.BinOp(left = ast.Constant(value = src.value[:ind]), op = ast.Add(), right = var), op = ast.Add(), right = ast.Constant(value = src.value[ind + len(val):]))
                #self.mutations update
                self.mutations.append((deepcopy(src), deepcopy(node)))
                return node
        

We have written the functions that traverse the tree and make appropriate calls to functions to get our local variables. Since we are running this entire thing on a function, the outermost scope will always be handled, and then similiarly inner scopes will get handled. One thing we should note is, when using get_locals, we should avoid substituting constants with arguments to the function, because it won't be consistent across function calls.

In [1157]:
print_code(ast.parse(inspect.getsource(g)))
g()

def g():

    def simple_function(x):
        z = 2
        y = 3 * x
        return y
    a = simple_function(2)
    b = 0
    for _ in range(6):
        b += 2 * a
    print('The answer is', b)
The answer is 72


In [1158]:
new_g_code = ast.unparse(VariableInjector().profile_function(g)[0])
print(new_g_code)

The answer is 72
def g():

    def simple_function(x):
        z = 2
        y = 6 // z * x
        return y
    a = simple_function(2)
    b = 6 - a
    for _ in range(a):
        b += (8 - a) * a
    print('The answer is', b)


In [1159]:
exec(new_g_code)
g()

The answer is 72


Note that using exec to set the value of g currently breaks the VariableInjector because it is unable to find the source code of the function through inspect. Instead, we create a temporary function inside the PythonMutator class and modify that so any code can have variables injected.

In [1160]:
class PythonMutator(PythonMutator):
    sample_tree = ast.parse('''
def pymutator_profile_function():
    pass
''')

    def inject_variables(self, src):
        node = deepcopy(self.sample_tree)
        node.body[0].body = src.body
        node = ast.fix_missing_locations(node)

        current_module = sys.modules[__name__]
        code = compile(node, filename="<ast>", mode="exec")
        exec(code, current_module.__dict__)

        n = VariableInjector().profile_function(pymutator_profile_function, node.body[0])
        self.mutations.extend(n[1])
        
        return src
    
class PythonMutator(PythonMutator):
    def inject_functions(self, src, db):
        node = deepcopy(self.sample_tree)
        node.body[0].body = src.body
        node = ast.fix_missing_locations(node)

        current_module = sys.modules[__name__]
        code = compile(node, filename="<ast>", mode="exec")
        exec(code, current_module.__dict__)

        n = VariableInjector().inject_functions(node, db)
        self.mutations.extend(n[1])
        
        
        return src        

In [1161]:
test_str = ast.parse('''
a = "hello world"
print("hello")                     
''')
print_code(PythonMutator().inject_variables(test_str))

hello
a = 'hello world'
print(a[0:5])


In [1162]:
test_list = ast.parse('''
L = [1,2,3]
a=[int(2),int(4),int(6)]
print(a)
print(L)                    
''')

PythonMutator().inject_variables(test_list)
print_code(test_list)

[2, 4, 6]
[1, 2, 3]
L = [1, 2, 3]
a = [int(L[1]), int(12 // L[2]), int(5 + L[0])]
print(a)
print(L)


In [1163]:
g_code = ast.parse('''
def simple_function(x):
    z = 2
    y = 3 * x
    return y
s = "Hello world!"
a = simple_function(2)
b = 0
for _ in range(6):
    b += 2 * a
print("Hello!")
''')

print_code(g_code)
print("=============")
PythonMutator().expand_constants(g_code)
PythonMutator().expand_constants(g_code)
PythonMutator().inject_variables(g_code)
PythonMutator().transform_assign(g_code)
PythonMutator().expand_constants(g_code)
PythonMutator().inject_variables(g_code)
PythonMutator().transform_assign(g_code)
PythonMutator().swap_numbers(g_code)
PythonMutator().transform_for(g_code)
print("=============")
print_code(g_code)

def simple_function(x):
    z = 2
    y = 3 * x
    return y
s = 'Hello world!'
a = simple_function(2)
b = 0
for _ in range(6):
    b += 2 * a
print('Hello!')
Hello!
Hello!
def simple_function(x):
    z = -45761036 // 2854 // (7553 + -15570)
    QrejjR4d6JivawSkP3g = 3 - z
    y = x * (z + QrejjR4d6JivawSkP3g)
    return y
s = 'Hello world!'
a = simple_function(-6876204 // 2969 + (4578 + (6703 - 76015203 // 8481)))
TLGN7Z8CBSkIet = 6
b = (TLGN7Z8CBSkIet - a) // a
_ = 0
while _ < -54384 // a - a + 54456 // a:
    xLuvXdVR8FuUKnJOipUb = a
    b += xLuvXdVR8FuUKnJOipUb * (a + (a + -10))
    _ += 1
MtXw5gErinFa = print
_6voSE3xP9Rh4af2x = MtXw5gErinFa
_48vvrjrDEcB = _6voSE3xP9Rh4af2x
_48vvrjrDEcB('Hello!')


Because the equality of python AST nodes is checked by reference address, we need to define a custom function to test equality of two AST nodes. Here it is:

In [1164]:
from itertools import zip_longest
from typing import Union


def compare_ast(node1: Union[ast.expr, list[ast.expr]], node2: Union[ast.expr, list[ast.expr]]) -> bool:
    if type(node1) is not type(node2):
        return False

    if isinstance(node1, ast.AST):
        for k, v in vars(node1).items():
            if k in {"lineno", "end_lineno", "col_offset", "end_col_offset", "parent"}:
                continue
            if not compare_ast(v, getattr(node2, k)):
                return False
        return True

    elif isinstance(node1, list) and isinstance(node2, list):
        return all(compare_ast(n1, n2) for n1, n2 in zip_longest(node1, node2))
    else:
        return node1 == node2

In [1165]:
class PythonMutator(PythonMutator):
    def reverse_mutation(self, src, log=False):
        n = len(self.mutations)
        self.reverse = self.mutations[n-1]
        if log:
            print_code(self.reverse[0])
            print("<==")
            print_code(self.reverse[1])
            print("==")
        self.compare = False
        self.visit(src)
        self.mutations.pop()
        
    def generic_visit(self, src):
        if self.reverse is not None and not self.compare:
            self.compare = True
            test = compare_ast(src, self.reverse[1])
            self.compare = False
            if test:
                src = self.reverse[0]
                self.reverse = None
                return src
        return super().generic_visit(src)

In [1166]:
pm = PythonMutator()
alg_tree = ast.parse("x = 3 * 4")
const_one = alg_tree.body[0].value.left
print_code(pm.expand_constants(alg_tree))
const_two = alg_tree.body[0].value.left

x = (-8974 - -1070 - -7907) * (16552 // 4138)


In [1167]:
print_ast(const_one)
print_ast(const_two)

Constant(value=3)
BinOp(
    left=BinOp(
        left=Constant(value=-8974),
        op=Sub(),
        right=Constant(value=-1070)),
    op=Sub(),
    right=Constant(value=-7907))


In [1168]:
pm.mutations.append((const_one, const_two))

In [1169]:
pm.reverse_mutation(alg_tree)
print_code(alg_tree)

x = 3 * (16552 // 4138)


Now that we have found a way to reverse the mutations, all that remains is automatically updating self.mutations, which we have done in the classes.

In [1170]:
pm = PythonMutator()
alg_tree = ast.parse("x = 3 * 4")
pm.expand_constants(alg_tree)
print_code(alg_tree)
for i in range(len(pm.mutations)):
    print_ast(pm.mutations[i][0])
    print("==>")
    print_ast(pm.mutations[i][1])
    print("==")


x = (6730 + -1610 + -5117) * (8902 + -8898)
Constant(value=3)
==>
BinOp(
    left=Constant(value=5120),
    op=Add(),
    right=Constant(value=-5117))
==
Constant(value=4)
==>
BinOp(
    left=Constant(value=8902),
    op=Add(),
    right=Constant(value=-8898))
==
Constant(value=5120)
==>
BinOp(
    left=Constant(value=6730),
    op=Add(),
    right=Constant(value=-1610))
==


In [1171]:
pm.reverse_mutation(alg_tree)
print_code(alg_tree)
pm.reverse_mutation(alg_tree)
print_code(alg_tree)
pm.reverse_mutation(alg_tree)
print_code(alg_tree)

x = (5120 + -5117) * (8902 + -8898)
x = (5120 + -5117) * 4
x = 3 * 4


In [1172]:
pm = PythonMutator()
alg_tree = ast.parse('''
x = 3 * 4
y = 6            
print(y)                
''')
pm.expand_constants(alg_tree)
pm.swap_numbers(alg_tree)
pm.inject_variables(alg_tree)
pm.expand_constants(alg_tree)
pm.swap_numbers(alg_tree)
pm.inject_variables(alg_tree)
print("===")
print_code(alg_tree)
print("===")
for _ in range(3):
    pm.reverse_mutation(alg_tree, True)
    pm.reverse_mutation(alg_tree, True)
    print_code(alg_tree)
    print("===")

6
6
===
x = 5619 // 1873 * (-1383 - (-8015 + 6628))
y = -997430616 // x // -8554 - x - (x + (235980 // x - 9978))
print(y)
===
19665
<==
235980 // x
==
-83119218
<==
-997430616 // x
==
x = 5619 // 1873 * (-1383 - (-8015 + 6628))
y = -83119218 // -8554 - x - (x + (19665 - 9978))
print(y)
===
19665 - 9978 + x
<==
x + (19665 - 9978)
==
6628 + -8015
<==
-8015 + 6628
==
x = 5619 // 1873 * (-1383 - (6628 + -8015))
y = -83119218 // -8554 - x - (19665 - 9978 + x)
print(y)
===
(-1383 - (6628 + -8015)) * (5619 // 1873)
<==
5619 // 1873 * (-1383 - (6628 + -8015))
==
9717
<==
-83119218 // -8554
==
x = (-1383 - (6628 + -8015)) * (5619 // 1873)
y = 9717 - x - (19665 - 9978 + x)
print(y)
===


In [1173]:
while len(pm.mutations) > 0:
    pm.reverse_mutation(alg_tree, True)
    print_code(alg_tree)

9687
<==
19665 - 9978
==
x = (-1383 - (6628 + -8015)) * (5619 // 1873)
y = 9717 - x - (9687 + x)
print(y)
-1387
<==
6628 + -8015
==
x = (-1383 - -1387) * (5619 // 1873)
y = 9717 - x - (9687 + x)
print(y)
9699
<==
9687 + x
==
x = (-1383 - -1387) * (5619 // 1873)
y = 9717 - x - 9699
print(y)
9705
<==
9717 - x
==
x = (-1383 - -1387) * (5619 // 1873)
y = 9705 - 9699
print(y)
5619 // 1873 * (-1383 - -1387)
<==
(-1383 - -1387) * (5619 // 1873)
==
x = 5619 // 1873 * (-1383 - -1387)
y = 9705 - 9699
print(y)
4
<==
-1383 - -1387
==
x = 5619 // 1873 * 4
y = 9705 - 9699
print(y)
6
<==
9705 - 9699
==
x = 5619 // 1873 * 4
y = 6
print(y)
3
<==
5619 // 1873
==
x = 3 * 4
y = 6
print(y)


In [1174]:
import os
import ast
import random
import builtins
import datetime
import collections
import re
import pathlib
import decimal
import fractions
import functools

sample_values = {
    'int': 42,
    'float': 3.14,
    'str': 'example string',
    'bool': True,
    'NoneType': None,
    'list': [1, 2, 3, 'a', 'b', 'c'],
    'dict': {'key1': 'value1', 'key2': 42},
    'set': {1, 2, 3, 'a', 'b', 'c'},
    'tuple': (1, 2, 3, 'a', 'b', 'c'),
    'bytes': b'example bytes',
    'bytearray': bytearray(b'example bytearray'),
    'range': range(5),
    'complex': 1+2j,
    'frozenset': frozenset([1, 2, 3, 'a', 'b', 'c']),
    'datetime': datetime.datetime.now(),
    'date': datetime.date.today(),
    'time': datetime.datetime.now().time(),
    'timedelta': datetime.timedelta(days=1),
    'memoryview': memoryview(b'example memoryview'),
    'deque': collections.deque([1, 2, 3, 'a', 'b', 'c']),
    'namedtuple': collections.namedtuple('Point', ['x', 'y'])(1, 2),
    'defaultdict': collections.defaultdict(int, {'key1': 1, 'key2': 2}),
    'Counter': collections.Counter(['a', 'b', 'c', 'a', 'b', 'b']),
    'OrderedDict': collections.OrderedDict([('key1', 'value1'), ('key2', 'value2')]),
    'types.FunctionType': (lambda x: x + 1),
    'types.LambdaType': (lambda x: x + 1),
    'types.BuiltinFunctionType': abs,
    'pattern': re.compile(r'\d+'),
    'match': re.match(r'\d+', '123abc'),
    'decimal.Decimal': decimal.Decimal('3.14'),
    'fractions.Fraction': fractions.Fraction(3, 4),
    'functools.partial': functools.partial(int, base=2),
    'map': map(str, [1, 2, 3]),
    'filter': filter(lambda x: x > 1, [0, 1, 2, 3]),
    'zip': zip([1, 2, 3], ['a', 'b', 'c']),
    'reversed': reversed([1, 2, 3]),
    'enumerate': enumerate(['a', 'b', 'c']),
    'generator': (x * x for x in range(10)),
}


class FunctionCallChecker(ast.NodeVisitor):
    def __init__(self, function_return_types):
        self.has_function_call = False
        self.calls = []
        self.imports = set()
        self.function_return_types = function_return_types

    def visit_Call(self, node):                             # stores the function name and the return type for each Call Node visited
        self.has_function_call = True
        func_name = None
        if isinstance(node.func, ast.Name):
            func_name = node.func.id
        elif isinstance(node.func, ast.Attribute):
            value = node.func.value
            if isinstance(value, ast.Name):
                func_name = f"{value.id}.{node.func.attr}"
            else:
                func_name = node.func.attr
        
        return_type = self.function_return_types.get(func_name, "Unknown")
        self.calls.append((func_name, return_type))
        self.generic_visit(node)

class FunctionExtractor:
    def __init__(self, exclude_integer_parameters=False):
        self.exclude_integer_parameters = exclude_integer_parameters

    def extract_python_files(self, folder_path):            # extracts python files from directory provided as argument
        all_files = []
        for root, dirs, files in os.walk(folder_path):
            for file in files:
                if file.endswith(".py"):
                    file_path = os.path.join(root, file)
                    all_files.append(file_path)
        return all_files
    
    def extract_function_declarations(self, file_path):     # extracts the functions found from the python file and returns their ast node    
        try:
            with open(file_path, 'r', encoding="utf8") as f: 
                code = f.read()
            tree = ast.parse(code)
        except Exception as e:
            print(f"Syntax error in file {file_path}: {e}")
            return []
        self.functions = [node for node in ast.walk(tree) if isinstance(node, ast.FunctionDef)]
        return self.functions
    
    def has_function_call(self, function_node, function_return_types):      # checks if the function has another function call within it
        checker = FunctionCallChecker(function_return_types)
        checker.visit(function_node)
        return checker.has_function_call
    
    def return_function_calls(self, function_node, function_return_types):  # returns the function calls within a function
        checker = FunctionCallChecker(function_return_types)
        checker.visit(function_node)
        return checker.calls
    
    def is_integer_function(self, function_node):           # checks if the function has integer arguments and integer return type -> right now works accurately for annotated functions only
        flag = False
        if not self.exclude_integer_parameters:
            for arg in function_node.args.args:
                if not arg.annotation:
                    return False
                
        if not function_node.returns:
            return False
        
        if not self.exclude_integer_parameters:
            for arg in function_node.args.args:
                if arg.annotation:
                    if isinstance(arg.annotation, ast.Name) and arg.annotation.id == 'int':
                        flag = True

        if function_node.returns:
            if isinstance(function_node.returns, ast.Name) and function_node.returns.id == 'int':
                flag = True
        return flag

class CallReplacer(ast.NodeTransformer):
    def __init__(self, function_return_types):
        self.function_return_types = function_return_types

    def visit_Call(self, node):                              # visits a Call node and changes the function call itself with a value from the dictionary mapping in sample_values
        func_name = None
        if isinstance(node.func, ast.Name):
            func_name = node.func.id
        elif isinstance(node.func, ast.Attribute):
            value = node.func.value
            if isinstance(value, ast.Name):
                func_name = f"{value.id}.{node.func.attr}"
            else:
                func_name = node.func.attr
        
        return_type = self.function_return_types.get(func_name, "Unknown")
        if return_type != "Unknown" and return_type in sample_values:
            value_to_replace = sample_values[return_type]
            return ast.copy_location(ast.Constant(value_to_replace), node)
        return self.generic_visit(node)

def extract_function_return_types(functions):                # returns the return type of the function node passed as an argument. If there is no annotation, then "Unknown" is returned
    return_types = {}
    for function in functions:
        if function.returns and isinstance(function.returns, ast.Name):
            return_types[function.name] = function.returns.id
        else:
            return_types[function.name] = "Unknown"
    return return_types

def extract_function_parameters(function_node):
    parameters = []
    
    for arg in function_node.args.args:
        param_name = arg.arg
        param_type = ast.unparse(arg.annotation) if arg.annotation else None
        parameters.append((param_name, param_type))
    
    if hasattr(function_node.args, 'kwonlyargs'):
        for arg in function_node.args.kwonlyargs:
            param_name = arg.arg
            param_type = ast.unparse(arg.annotation) if arg.annotation else None
            parameters.append((param_name, param_type))
   
    if function_node.args.vararg:
        vararg_name = function_node.args.vararg.arg
        vararg_type = ast.unparse(function_node.args.vararg.annotation) if function_node.args.vararg.annotation else None
        parameters.append((vararg_name, vararg_type))

    if function_node.args.kwarg:
        kwarg_name = function_node.args.kwarg.arg
        kwarg_type = ast.unparse(function_node.args.kwarg.annotation) if function_node.args.kwarg.annotation else None
        parameters.append((kwarg_name, kwarg_type))
    return parameters

class FunctionDB:
    def __init__(self, path):
        self.extractor = FunctionExtractor(exclude_integer_parameters=False)

        files = self.extractor.extract_python_files(path)
        self.function_database = []

        all_functions = []
        for file in files:
            functions = self.extractor.extract_function_declarations(file)
            all_functions.extend(functions)

        function_return_types = extract_function_return_types(all_functions)
        builtins_set = set(dir(builtins))

        for function in all_functions:
            flag = True
            if self.extractor.has_function_call(function, function_return_types):    # if function has function call within it
                calls = self.extractor.return_function_calls(function, function_return_types)
                for call, return_type in calls:
                    if call not in builtins_set:                                # if the current call in question is not a builtin
                        value_to_be_replaced = None
                        if return_type != "Unknown":                            # if return type is not unknown then replace the call (if the datatype is in the sample_values dict)
                            try:
                                value_to_be_replaced = sample_values[return_type]
                            except:
                                flag = False
                            # Replace the function call with the sample value
                            replacer = CallReplacer(function_return_types)
                            function = replacer.visit(function)
                        else:                                                   # if not then this function is not included in the database
                            flag = False
            if flag:
                self.function_database.append(function)

        self.function_list = []

        for function in self.function_database:
            function_node = function
            dictionary = dict()
            dictionary["source"] = function_node
            dictionary["params"] = extract_function_parameters(function_node)
            self.function_list.append(dictionary)

    def simulate(self, ind, args, keywords):
        # index of function in function_list, args and kwargs
        src = self.function_list[ind]["source"]
        name = src.name
        exec(ast.unparse(src))
        print_code(ast.Expr(value=ast.Call(func=ast.Name(id=name, ctx=ast.Load()), args=args, keywords=keywords)))
        return eval(ast.unparse(ast.Expr(value=ast.Call(func=ast.Name(id=name, ctx=ast.Load()), args=args, keywords=keywords))))


# To access the source of a function --> function_list[<index>]["source"]
# To access the parameters of a function --> function_list[<index>]["params"]


"""
Drawbacks:
    1. The code currently works correctly only for those functions where the return type of the functions are annotated.
    2. If the -e flag is not provided, the parameters of the function also need to be annotated for the program to work correctly.
"""

'\nDrawbacks:\n    1. The code currently works correctly only for those functions where the return type of the functions are annotated.\n    2. If the -e flag is not provided, the parameters of the function also need to be annotated for the program to work correctly.\n'

In [1175]:
db = FunctionDB(".")

Syntax error in file .\potentialFunctionDatabaseProjects\potentialFunctionDatabaseProjects\ansible\test\integration\targets\ansible-doc\broken-docs\collections\ansible_collections\testns\testcol\plugins\cache\notjsonfile.py: [Errno 2] No such file or directory: '.\\potentialFunctionDatabaseProjects\\potentialFunctionDatabaseProjects\\ansible\\test\\integration\\targets\\ansible-doc\\broken-docs\\collections\\ansible_collections\\testns\\testcol\\plugins\\cache\\notjsonfile.py'
Syntax error in file .\potentialFunctionDatabaseProjects\potentialFunctionDatabaseProjects\ansible\test\integration\targets\ansible-doc\broken-docs\collections\ansible_collections\testns\testcol\plugins\inventory\statichost.py: [Errno 2] No such file or directory: '.\\potentialFunctionDatabaseProjects\\potentialFunctionDatabaseProjects\\ansible\\test\\integration\\targets\\ansible-doc\\broken-docs\\collections\\ansible_collections\\testns\\testcol\\plugins\\inventory\\statichost.py'
Syntax error in file .\potenti

KeyboardInterrupt: 

In [None]:
for i in db.function_list:
    print(i["params"])

In [None]:
print_ast(ast.parse('''
def f(d,a,b):   
    return a + b + d             
f(6,a=1,b=2)'''))

In [None]:
ast.Expr(value=ast.Call(func=ast.Name(id='name', ctx=ast.Load()), args=[], keywords=[ast.keyword(arg='a', value=ast.Constant(value=1))]))

In [None]:
print_code(alg_tree)
PythonMutator().inject_functions(alg_tree, db)
print("==POST MUTATION==")
print_code(alg_tree)
