### Normalize code using AST

In [1]:
import ast

### Visit function node

In [103]:
def getCallDetails(node):
    elementDict = dict()    
    elementDict['fname'] = node.func.attr
    elementDict['caller'] = node.func.value.id
    fixParams = list()
    for p in node.args:
        fixParams.append(p.id)
    elementDict['fixParams'] = fixParams
    optParams = list()
    for p in list(node.keywords):
        optParam = p.arg+" = "
        if isinstance(p.value, ast.NameConstant):
            optParam = optParam + str(p.value)
        if isinstance(p.value, ast.Num):
            optParam = optParam + str(p.value.n)
        if isinstance(p.value, ast.Name):
            optParam = optParam + str(p.value.id)
        optParams.append(optParam)
    elementDict['optParams'] = optParams
    elementDict['nodeType']= "function_call"
    
    return elementDict
    
def getForDetails(node):
    elementDict = dict()
    elementDict['collection_name'] = node.iter.id
    elementDict['collection_id'] = node.target.id
    elementDict['nodeType']= "for_loop"
    return elementDict

In [67]:
def getNormalizedStatments(tree):
    statements = dict()
    for visting_node in ast.walk(tree):
        if isinstance(visting_node, ast.Module):
            allNodes = visting_node.body
            for node in allNodes:
                lineNodes = list()
                for vnode in ast.walk(node):
                    if isinstance(vnode, ast.Call):
                        lineNodes.append(getCallDetails(vnode))
                    if isinstance(vnode, ast.For):
                        lineNodes.append(getForDetails(vnode))
                statements[node.lineno] = lineNodes
    return statements

In [104]:
code = "frame = pd.DataFrame()\nlist = []\nfor file in allFiles:\n\tdf = pd.readcsv(file,indexcol=abc, header=0)\nlist.append(df)\nframe = pd.concat(list_)"
tree = ast.parse(code)

getNormalizedStatments(tree)

{1: [{'caller': 'pd',
   'fixParams': [],
   'fname': 'DataFrame',
   'nodeType': 'function_call',
   'optParams': []}],
 2: [],
 3: [{'collection_id': 'file',
   'collection_name': 'allFiles',
   'nodeType': 'for_loop'},
  {'caller': 'pd',
   'fixParams': ['file'],
   'fname': 'readcsv',
   'nodeType': 'function_call',
   'optParams': ['indexcol = abc', 'header = 0']}],
 5: [{'caller': 'list',
   'fixParams': ['df'],
   'fname': 'append',
   'nodeType': 'function_call',
   'optParams': []}],
 6: [{'caller': 'pd',
   'fixParams': ['list_'],
   'fname': 'concat',
   'nodeType': 'function_call',
   'optParams': []}]}