In [2]:
import ast
import astunparse

In [3]:

from pyrsistent import v


class nodeMutate(ast.NodeTransformer):

    def visit_Num(self, node):
        return ast.copy_location(ast.Constant(n=node.n+1), node)
    
    def visit_Constant(self, node):
        return ast.copy_location(ast.Constant(n=node.n+1), node)

    def visit_Str(self, node):
        return ast.copy_location(ast.Constant(s=node.s+"1"), node)

    def visit_Name(self, node):
        return ast.copy_location(ast.Name(id=node.id+"1", ctx=node.ctx), node)

    def visit_List(self, node):
        return ast.copy_location(ast.List(elts=[self.visit(x) for x in node.elts], ctx=node.ctx), node)

    def visit_Tuple(self, node):
        return ast.copy_location(ast.Tuple(elts=[self.visit(x) for x in node.elts], ctx=node.ctx), node)

    def visit_Set(self, node):
        return ast.copy_location(ast.Set(elts=[self.visit(x) for x in node.elts], ctx=node.ctx), node)

    def visit_Dict(self, node):
        return ast.copy_location(ast.Dict(keys=[self.visit(x) for x in node.keys], values=[self.visit(x) for x in node.values]), node)

    def visit_Attribute(self, node):
        return ast.copy_location(ast.Attribute(value=self.visit(node.value), attr=node.attr+"1", ctx=node.ctx), node)

    def visit_Subscript(self, node):
        return ast.copy_location(ast.Subscript(value=self.visit(node.value), slice=self.visit(node.slice), ctx=node.ctx), node)

    def visit_Index(self, node):
        return ast.copy_location(ast.Index(value=self.visit(node.value)), node)

    def visit_Slice(self, node):
        return ast.copy_location(ast.Slice(lower=self.visit(node.lower), upper=self.visit(node.upper), step=self.visit(node.step)), node)

    def visit_ExtSlice(self, node):
        return ast.copy_location(ast.ExtSlice(dims=[self.visit(x) for x in node.dims]), node)

    def visit_IfExp(self, node):
        return ast.copy_location(ast.IfExp(test=self.visit(node.test), body=self.visit(node.body), orelse=self.visit(node.orelse)), node)

    def visit_Compare(self, node):
        return ast.copy_location(ast.Compare(left=self.visit(node.left), ops=node.ops, comparators=[self.visit(x) for x in node.comparators]), node)

    def visit_Call(self, node):
        return ast.copy_location(ast.Call(func=self.visit(node.func), args=[self.visit(x) for x in node.args], keywords=[self.visit(x) for x in node.keywords]), node)
    def visit_keyword(self, node):
        return ast.copy_location(ast.keyword(arg=node.arg, value=self.visit(node.value)), node)
    def visit_Starred(self, node):
        return ast.copy_location(ast.Starred(value=self.visit(node.value), ctx=node.ctx), node)
    
    def visit_NameConstant(self, node):
        return ast.copy_location(ast.NameConstant(value=node.value), node)
    
    def visit_UnaryOp(self, node):
        return ast.copy_location(ast.UnaryOp(op=node.op, operand=self.visit(node.operand)), node)
    
    def visit_BinOp(self, node):
        return ast.copy_location(ast.BinOp(left=self.visit(node.left), op=node.op, right=self.visit(node.right)), node)
    
    def visit_BoolOp(self, node):
        return ast.copy_location(ast.BoolOp(op=node.op, values=[self.visit(x) for x in node.values]), node)
    
    def visit_If(self, node):
        return ast.copy_location(ast.If(test=self.visit(node.test), body=[self.visit(x) for x in node.body], orelse=[self.visit(x) for x in node.orelse]), node)
    
    def visit_For(self, node):
        return ast.copy_location(ast.For(target=self.visit(node.target), iter=self.visit(node.iter), body=[self.visit(x) for x in node.body], orelse=[self.visit(x) for x in node.orelse]), node)
    
    def visit_While(self, node):
        return ast.copy_location(ast.While(test=self.visit(node.test), body=[self.visit(x) for x in node.body], orelse=[self.visit(x) for x in node.orelse]), node)
    
    def visit_With(self, node):
        return ast.copy_location(ast.With(items=[self.visit(x) for x in node.items], body=[self.visit(x) for x in node.body]), node)
    
    def visit_withitem(self, node):
        return ast.copy_location(ast.withitem(context_expr=self.visit(node.context_expr), optional_vars=self.visit(node.optional_vars)), node)
    
    def visit_FunctionDef(self, node):
        return ast.copy_location(ast.FunctionDef(name=node.name+"1", args=self.visit(node.args), body=[self.visit(x) for x in node.body], decorator_list=[self.visit(x) for x in node.decorator_list], returns=self.visit(node.returns)), node)
    
    def visit_Lambda(self, node):
        return ast.copy_location(ast.Lambda(args=self.visit(node.args), body=self.visit(node.body)), node)
    def visit_arguments(self, node):
        return ast.copy_location(ast.arguments(posonlyargs=[self.visit(x) for x in node.posonlyargs], args=[self.visit(x) for x in node.args], vararg=self.visit(node.vararg), kwonlyargs=[self.visit(x) for x in node.kwonlyargs], kw_defaults=[self.visit(x) for x in node.kw_defaults], kwarg=self.visit(node.kwarg), defaults=[self.visit(x) for x in node.defaults]), node)
    def visit_arg(self, node):
        return ast.copy_location(ast.arg(arg=node.arg+"1", annotation=self.visit(node.annotation)), node)
    def visit_Return(self, node):
        return ast.copy_location(ast.Return(value=self.visit(node.value)), node)
    def visit_Delete(self, node):
        return ast.copy_location(ast.Delete(targets=[self.visit(x) for x in node.targets]), node)
    def visit_Assign(self, node):
        return ast.copy_location(ast.Assign(targets=[self.visit(x) for x in node.targets], value=self.visit(node.value)), node)
    def visit_AnnAssign(self, node):
        return ast.copy_location(ast.AnnAssign(target=self.visit(node.target), annotation=self.visit(node.annotation), value=self.visit(node.value), simple=node.simple), node)
    def visit_AugAssign(self, node):
        return ast.copy_location(ast.AugAssign(target=self.visit(node.target), op=node.op, value=self.visit(node.value)), node)
    def visit_Print(self, node):
        return ast.copy_location(ast.Print(dest=self.visit(node.dest), values=[self.visit(x) for x in node.values], nl=node.nl), node)
    def visit_Raise(self, node):
        return ast.copy_location(ast.Raise(exc=self.visit(node.exc), cause=self.visit(node.cause)), node)
    def visit_Assert(self, node):
        return ast.copy_location(ast.Assert(test=self.visit(node.test), msg=self.visit(node.msg)), node)
    def visit_Import(self, node):
        return ast.copy_location(ast.Import(names=[self.visit(x) for x in node.names]), node)
    def visit_ImportFrom(self, node):
        return ast.copy_location(ast.ImportFrom(module=node.module, names=[self.visit(x) for x in node.names], level=node.level), node)
    def visit_alias(self, node):
        return ast.copy_location(ast.alias(name=node.name, asname=node.asname), node)
    def visit_Exec(self, node):
        return ast.copy_location(ast.Exec(body=self.visit(node.body), globals=self.visit(node.globals), locals=self.visit(node.locals)), node)
    def visit_Global(self, node):
        return ast.copy_location(ast.Global(names=[self.visit(x) for x in node.names]), node)
    def visit_Nonlocal(self, node):
        return ast.copy_location(ast.Nonlocal(names=[self.visit(x) for x in node.names]), node)
    def visit_Pass(self, node):
        return ast.copy_location(ast.Pass(), node)
    def visit_Break(self, node):
        return ast.copy_location(ast.Break(), node)
    def visit_Continue(self, node):
        return ast.copy_location(ast.Continue(), node)
    def visit_Try(self, node):
        return ast.copy_location(ast.Try(body=[self.visit(x) for x in node.body], handlers=[self.visit(x) for x in node.handlers], orelse=[self.visit(x) for x in node.orelse], finalbody=[self.visit(x) for x in node.finalbody]), node)
    def visit_ExceptHandler(self, node):
        return ast.copy_location(ast.ExceptHandler(type=self.visit(node.type), name=node.name, body=[self.visit(x) for x in node.body]), node)
    def visit_ClassDef(self, node):
        return ast.copy_location(ast.ClassDef(name=node.name+"1", bases=[self.visit(x) for x in node.bases], keywords=[self.visit(x) for x in node.keywords], body=[self.visit(x) for x in node.body], decorator_list=[self.visit(x) for x in node.decorator_list]), node)
    def visit_keyword(self, node):
        return ast.copy_location(ast.keyword(arg=node.arg, value=self.visit(node.value)), node)
    

    def visit_Module(self, node):
        return ast.copy_location(ast.Module(body=[self.visit(x) for x in node.body]), node)

try:
    code = ast.parse("a = 1\nb = 2\nc = a + 3") # this code has the following attributes: Num (1, 2, 3), Name (a, b), Assign, BinOp ( a+ 3)
    # mutator = nodeMutate()
    c = nodeMutate().generic_visit(code)
    print(astunparse.to_source(c))
    # print(astunparse.to_source(c))
    # mutator.visit(code)
    # code = mutator.visit(code)
    # print(astunparse.to_source(code))
    
    # for node in ast.walk(code):
    #     print(node)
except Exception as e:
    print(e)
    # print("Error in parsing")

jyjyrryh
data['a'] = 2
b = 3
c = a + 4


In [4]:
# from typing import List
# import itertools

# def findTypeLine(line):
#     tokensArrSplit = [',', '(', ')',":"] # may result in empty element in list
#     def flatten(lst):
#         return list(itertools.chain.from_iterable(flatten(item) if isinstance(item, list) else [item] for item in lst))

#     def splitMe(code: List | str, tokensArrSplit: List, index:int):
#         if (index == len(tokensArrSplit)): return code
#         listCodeTokens = []
#         if (type(code) is list):
#             for (i, c) in enumerate(code):
#                 if (c == ""): continue
#                 splitted = c.split(tokensArrSplit[index])
#                 # print(splitted)
#                 if (not splitted): continue
#                 listCodeTokens.append(splitted)
#             listCodeTokens  = flatten(listCodeTokens)
#             return splitMe(listCodeTokens, tokensArrSplit, index + 1)
#         else:
#             listCodeTokens = code.split(tokensArrSplit[index])
#             return splitMe(listCodeTokens, tokensArrSplit, index + 1)
    
#     testFunc = "def func(y , x , z): return 2 + 3 * 5"
#     print(splitMe(testFunc, tokensArrSplit, 0))

# findTypeLine("p")



['def func', 'y ', ' x ', ' z', '', ' return u']


In [17]:

# import random
# import string

# # Define a list of words
# words = ['apple', 'banana', 'orange', 'grape', 'kiwi', 'pineapple', 'strawberry', 'melon', 'peach', 'pear']

# # Generate 100 random words separated by spaces
# random_words = ' '.join(random.choices(words, k=1000))
# print(random_words)

grape banana orange peach pineapple orange kiwi strawberry pear apple kiwi apple pineapple strawberry pear grape pear pear strawberry grape banana melon grape melon pear peach pineapple grape kiwi banana grape melon melon kiwi melon peach pear strawberry apple melon strawberry strawberry melon grape apple grape banana pineapple peach strawberry grape pear pineapple pineapple pear apple melon apple banana pineapple melon pineapple grape banana pineapple pineapple banana pineapple pear grape banana pineapple apple peach melon orange orange pineapple pear pear strawberry grape melon orange apple grape grape grape grape strawberry kiwi apple pear melon apple apple peach strawberry banana melon melon peach strawberry grape strawberry pear pear pear melon kiwi pear pineapple strawberry orange pineapple kiwi grape pineapple orange melon peach orange pineapple apple peach banana pear strawberry banana banana peach kiwi melon orange banana banana pear pineapple apple grape strawberry pineapple 

In [54]:
# # test split timing
# def omarSplit(string: str, splitToken: str):
#     lst = []
#     ln = len(string)
#     i = 0
#     temp = ""
#     while (i < ln):
#         if (string[i] == splitToken):
#             lst.append(temp)
#             temp = ""
#         else:
#             temp += string[i]
#         i += 1
#     else:
#         lst.append(temp)

#     return lst
# import time

# exp = "a b c d e f g h i j k l m n o p q r s t u v w x y z"
# exp = random_words
# start = time.time()
# m = omarSplit(exp, " ")
# print(time.time() - start)

# start = time.time()
# m2 = exp.split(" ")
# print(time.time() - start)

# print(m == m2)

0.000514984130859375
0.0
True


In [56]:
cd = """
import random

# Generate a list of 10 random numbers between 1 and 100
random_numbers = [random.randint(1, 100) for _ in range(10 + 1)]

# Print the unsorted list
print("Unsorted numbers:", random_numbers)

# Sort the list
random_numbers.sort()

# Print the sorted list
print("Sorted numbers:", random_numbers)
"""

In [68]:
def segmentLine(line):
    segmentors = {' ', '(', ')', '[', ']', '{', '}', ':', ',', '='}
    i = 0
    ln = len(line)
    lst = []
    st = set()
    temp = ""
    while(i < ln):
        if (line[i] == " "): # I do not need spaces
            if (i - 1 != 0 and line[i - 1] != " "):
                lst.append(temp)
                st.add(temp)
                temp = ""
            i += 1
            continue
        elif (line[i] == "\n"): # break loop has to be added in the scope as we are considering only one line and remove the outer else, however, Ignore for now
            if (temp != ""):
                lst.append(temp)
                st.add(temp)
                temp = ""
        elif (line[i] == "\t"): # Ignore tabs
            i += 1
            continue
        elif (line[i] == "#"): # Ignore comments
            while (line[i] != "\n"):
                i += 1
        elif (line[i] in segmentors):
            if (temp != ""):
                lst.append(temp)
                st.add(temp)
                temp = ""
            lst.append(line[i])
            st.add(temp)
        elif (line[i] == "-"): # Check if it is a unary sub
            if (i + 1 < ln and line[i + 1].isdigit()):
                temp += line[i]
            else:
                if (temp != ""):
                    lst.append(temp) #add the previous accumulated
                    st.add(temp)
                lst.append(line[i]) # add the current
                st.add(line[i])
                temp = ""

        else:
            temp += line[i]
        i += 1
    else:
        if (temp != ""):
            lst.append(temp)
            st.add(temp)
    return lst, st


segmentLst, segmentSet = segmentLine(cd)

In [69]:
print(segmentSet)

{'', 'numbers', 'print', 'random', '1', 'random_numbers', 'range', 'for', '+', 'in', '"', '"Unsorted', '100', '"Sorted', '_', 'random_numbers.sort', 'random.randint', 'import', '10'}


In [70]:
joinedSegmentedLst = "".join(segmentLst)
print(joinedSegmentedLst)

importrandomrandom_numbers=[random.randint(1,100)for_inrange(10+1)]print("Unsortednumbers:",random_numbers)random_numbers.sort()print("Sortednumbers:",random_numbers)


In [77]:
def mutationsCanBeApplied(setTokens: set):
    """
    Iterate over the set of segmented parts in the faulty location and insert the mutations that can be applied
    Args: 
        setTokens: set of tokens in the faulty location
    Returns:
        list of mutations that can be applied
    """
    lstMutations = [] # list of mutations that can be applied
    if '+' in setTokens: lstMutations.append(('AR', 'ADD')) # the only mutations coupled with other binary operators that are encompassed in a list to accomodate the operation name
    # if '-' in setTokens: lstMutations.append('AR')
    # if '*' in setTokens: lstMutations.append('AR')
    # if '/' in setTokens: lstMutations.append('AR')
    # if '%' in setTokens: lstMutations.append('AR')
    # if '**' in setTokens: lstMutations.append('AR')
    # if '//' in setTokens: lstMutations.append('AR')
    # if '==' in setTokens: lstMutations.append('CR')
    # if '!=' in setTokens: lstMutations.append('CR')
    # if '<' in setTokens: lstMutations.append('CR')
    # if '>' in setTokens: lstMutations.append('CR')
    # if '<=' in setTokens: lstMutations.append('CR')
    # if '>=' in setTokens: lstMutations.append('CR')
    # if 'and' in setTokens: lstMutations.append('CR')
    # if 'or' in setTokens: lstMutations.append('CR')
    # if 'not' in setTokens: lstMutations.append('CR')
    # if 'is' in setTokens: lstMutations.append('CR')
    # if 'in' in setTokens: lstMutations.append('CR')
    # if 'not in' in setTokens: lstMutations.append('CR')
    # if 'is not' in setTokens: lstMutations.append('CR')
    # if '()' in setTokens: lstMutations.append('MR')
    # if '[]' in setTokens: lstMutations.append('MR')
    # if '{}' in setTokens: lstMutations.append('MR')
    weights = [1] * len(lstMutations) # the weights are all equal for now

    return lstMutations, weights

mutationsCanBeApplied(segmentSet)

([('AR', 'ADD')], [1])

In [5]:
s = astunparse.to_source(code)
print(s)
a  = (ast.parse(s))
print(ast.dump(a, indent=4))

data['a'] = 2
b = 3
c = a + 4
Module(
    body=[
        Assign(
            targets=[
                Subscript(
                    value=Name(id='data', ctx=Load()),
                    slice=Constant(value='a'),
                    ctx=Store())],
            value=Constant(value=2)),
        Assign(
            targets=[
                Name(id='b', ctx=Store())],
            value=Constant(value=3)),
        Assign(
            targets=[
                Name(id='c', ctx=Store())],
            value=BinOp(
                left=Name(id='a', ctx=Load()),
                op=Add(),
                right=Constant(value=4)))],
    type_ignores=[])


In [6]:
# every visitor class has its implementation of visitor and its visit_name;
# this way each class has its own implementation of the visitor and the visit_name

class BinOpMutate(ast.NodeTransformer):
    def __init__(self, target_node_lineno):
        self.target_node_lineno = target_node_lineno
        self.changedAnOperator = False

    def visit_BinOp(self, node):
        if node.lineno == self.target_node_lineno and not self.changedAnOperator:  # However if two operations are on the same line, it will change both
            self.changedAnOperator = True
            return ast.BinOp(left=self.visit(node.left), op=ast.Sub(), right=self.visit(node.right))
        else:
            # If it's not the target node, continue visiting other nodes without modifications
            return self.generic_visit(node)

code = ast.parse("a = 1\nb = 2\nc = a + 3;m = 5 + 4") # this code has the following attributes: Num (1, 2, 3), Name (a, b), Assign, BinOp ( a+ 3)

res = BinOpMutate(target_node_lineno=3).visit(code)
print(astunparse.to_source(res))

a = 1
b = 2
c = a - 3
m = 5 + 4


In [78]:
%load_ext autoreload
%autoreload 2

import mutationClasses
import ast
import astunparse

In [13]:
codeString = """
if len(S)<26:
    for i in m[::-1]:
        a = 1 + 2
        if i not in S:
            S.append(i)
            break
    print(''.join(S))
else:
    print(''.join(S))
"""
code = ast.parse(codeString)
codeStringSplitted = codeString.split("\n")
testParse = codeStringSplitted[3]
testAst = ast.parse(testParse.lstrip())
print(ast.dump(testAst, indent=4))

# print(code)
# binOpMutate = mutationClasses.BinOpMutate(target_node_lineno=3, code=code)
code =  mutationClasses.BinOpMutate(target_node_lineno=4, code=code).visitC()

# res =  mutationClasses.BinOpMutate(target_node_lineno=4).visitC(code)
print(astunparse.to_source(code))

Module(
    body=[
        Assign(
            targets=[
                Name(id='a', ctx=Store())],
            value=BinOp(
                left=Constant(value=1),
                op=Add(),
                right=Constant(value=2)))],
    type_ignores=[])

if len(S) < 26:
    for i in m[::-1]:
        a = 1 - 2
        if i not in S:
            S.append(i)
            break
    print(''.join(S))
else:
    print(''.join(S))
