In [1]:
from utils.refactor import *
from utils.preprocess import *
from copy import copy
import ast
import time
from tqdm import tqdm

# Demo

In [2]:
refactorers = {
    'IFF':   GroupRefactor(IfBranchFliper()),
    'Loop':  GroupRefactor(While2For(), For2While()),
    'Iter':  GroupRefactor(List2Range()),
    'Comm':  GroupRefactor(CommLaw()),
    'Deco':  GroupRefactor(FnDecorator('@timeing', '@measure_memory_usage')),
    'Param': GroupRefactor(FnVarargAppender(), FnKwargAppender()),
    'Renm':  GroupRefactor(VarRenamer()),
    'Styl':  GroupRefactor(CamelSnakeExchange()),
 }

In [3]:
code = '''\
def encode_data(self, data, attributes):
    current_row = 0
    num_attributes = len(attributes)
    for row in data:
        new_data = []
        if len(row) > 0 and max(row) >= num_attributes:
            raise BadObject('Instance %d has %d attributes, expected %d' % (current_row, max(row) + 1, num_attributes))
        for col in sorted(row):
            v = row[col]
            if v is None or v == '' or v != v:
                s = '?'
            else:
                s = encode_string(str(v))
            new_data.append('%d %s' % (col, s))
        current_row += 1
        yield ' '.join(['{', ','.join(new_data), '}'])
'''
print(code)

def encode_data(self, data, attributes):
    current_row = 0
    num_attributes = len(attributes)
    for row in data:
        new_data = []
        if len(row) > 0 and max(row) >= num_attributes:
            raise BadObject('Instance %d has %d attributes, expected %d' % (current_row, max(row) + 1, num_attributes))
        for col in sorted(row):
            v = row[col]
            if v is None or v == '' or v != v:
                s = '?'
            else:
                s = encode_string(str(v))
            new_data.append('%d %s' % (col, s))
        current_row += 1
        yield ' '.join(['{', ','.join(new_data), '}'])



In [4]:
for refactor_name, refactorer in refactorers.items():
    root = ast.parse(code)
    root, count = refactorer.refactor(root, rand=True, max_count=1)
    refactored_code = ast.unparse(root)
    refactor_succ = count > 0
    
    print(" =" * 20, f"Opr: {refactor_name}\t Succ: {refactor_succ}")
    print(refactored_code)
    print()

 = = = = = = = = = = = = = = = = = = = = Opr: IFF	 Succ: True
def encode_data(self, data, attributes):
    current_row = 0
    num_attributes = len(attributes)
    for row in data:
        new_data = []
        if not (len(row) > 0 and max(row) >= num_attributes):
            pass
        else:
            raise BadObject('Instance %d has %d attributes, expected %d' % (current_row, max(row) + 1, num_attributes))
        for col in sorted(row):
            v = row[col]
            if v is None or v == '' or v != v:
                s = '?'
            else:
                s = encode_string(str(v))
            new_data.append('%d %s' % (col, s))
        current_row += 1
        yield ' '.join(['{', ','.join(new_data), '}'])

 = = = = = = = = = = = = = = = = = = = = Opr: Loop	 Succ: True
def encode_data(self, data, attributes):
    current_row = 0
    num_attributes = len(attributes)
    _iter0 = data
    while True:
        try:
            row = next(_iter0)
        except StopIteration

# Reproduction Script

In [None]:
# Refactor the-stack-dedup-python-preprocessed-2021-sample384

import json
with open(f"data/the-stack-dedup-python-preprocessed-2021-sample384.jsonl", 'r') as f:
    data = [json.loads(line) for line in f.readlines()]

for refector_name, refectorer in refectorers.items():
    refectored_data = []
    print(refector_name)
    for d in tqdm(data):
        dd = copy(d)
        dd['original_content'] = dd['content']
        root = ast.parse(dd['content'])
        root, count = refectorer.refactor(root, rand=True, max_count=1)
        dd['content'] = ast.unparse(root)
        assert ast.parse(dd['content'])
        dd['refactored'] = count > 0
        refectored_data.append(dd)

    with open(f'data/the-stack-dedup-python-preprocessed-2021-sample384-refactor-{refector_name}.jsonl', 'w') as f:
        for d in refectored_data:
            f.write(json.dumps(d))
            f.write('\n')