In [14]:
#### Sample code using String Preprocessing

method = "df.to_csv('a.txt', sep='\t', use=file)"
s = method.find('.')
t = method.find('(')
u = method.find(')')
api = method[:s]
method_name = method[s+1:t]
paramsStr = method[t+1:u]
params = paramsStr.split(',')

for par in params:
    print(par)

'a.txt'
 sep='	'
 use=file


#### Using AST to normalize the code

In [1]:
import ast

In [None]:
# Create a list of nodes which can have blocks
# Refer https://docs.python.org/2.4/lib/module-compiler.ast.html
# While,TryFinally, TryExcept, For

In [35]:

from collections import deque

# Class to extract method as well as attribute calls. Each token after '.' is called attribute 
# be it function call or anything else

class AttributeVisitor(ast.NodeVisitor):
    def __init__(self):
        self._name = deque()
        self._pos = -1 
    
    @property
    def name(self):
        return '.'.join(self._name)
    
    @property
    def lineno(self):
        return self._pos
    
    @name.deleter
    def name(self):
        self._name.clear()
    
    def visit_Name(self, node):
        self._pos = node.lineno # line number
        self._name.appendleft(node.id)
    
    def visit_Attribute(self, node):
        try:
            self._pos = node.lineno # line number
            self._name.appendleft(node.attr)
            self._name.appendleft(node.value.id)
        except AttributeError:
            self.generic_visit(node)
            
def get_all_calls(tree):
    all_calls = []
    for node in ast.walk(tree):
        if isinstance(node, ast.Attribute):
            callvisitor = AttributeVisitor()
            callvisitor.visit(node)
            all_calls.append((callvisitor.name, callvisitor.lineno))
    return all_calls

# Visitin method calls only
class FunctionCallVisitor(ast.NodeVisitor):
    def __init__(self):
        self._name = deque()
        self._pos = -1 
    
    @property
    def name(self):
        return '.'.join(self._name)
    
    @property
    def lineno(self):
        return self._pos
    
    @name.deleter
    def name(self):
        self._name.clear()
    
    def visit_Name(self, node):
        self._pos = node.lineno # line number
        self._name.appendleft(node.id)
    
    def visit_Attribute(self, node):
        try:
            self._pos = node.lineno # line number
            self._name.appendleft(node.attr)
            self._name.appendleft(node.value.id)
        except AttributeError:
            self.generic_visit(node)
            
def get_func_calls(tree):
    func_calls = []
    for node in ast.walk(tree):
        if isinstance(node, ast.Module):
            body = node.body
            for items in body:
                callvisitor = FunctionCallVisitor()
                callvisitor.visit(node)
            func_calls.append((callvisitor.name, callvisitor.lineno))
    return func_calls

In [36]:
code = 'frame = pd.DataFrame()\nlist = []\ndf = pd.readcsv(file,indexcol=None, header=0)'
tree = ast.parse(code)
all_calls = get_func_calls(tree)
print(all_calls)

#for node in ast.walk(tree):
 #   if isinstance(node, ast.Module):
  #      print('body node')
   #     ast.Module.node
    #else:
     #   print('No module node')


[('file.pd.readcsv.df.list.pd.DataFrame.frame', 3)]
