In [1]:
import os
import sys
sys.setrecursionlimit(10000)

language_name = {'C': 'c', 
                 'C++': 'cpp', 
                 'Java': 'java', 
                 'Python': 'python', 
                 'Go': 'golang', 
                 'C#': 'csharp', 
                 'PHP': 'php'}

cwd = os.getcwd()

def gen_files(root_folder):
    for path, folders, files in os.walk(root_folder):
        yield from (os.path.normpath(os.path.join(path,f)) for f in files)
        #print(path)
        #print(folders)
        #print(files)
        #print()

class fun_extracter():
    def __init__(self, processor):
        self.processor = processor
    
    def proj_to_funs(self, proj_folder):
        self.file_gun = gen_files(proj_folder)
        for file in self.file_gun:
            if self.processor.file_check(file):
                #print(file)
                code = self.processor.file_to_code(file)
                nodes, pos = self.processor.extract_functions(code)
                if pos is not None:
                    yield from ((n, pos[i])  for i, n in enumerate(nodes))
                
                
class node_statistic():
    def __init__(self, walker, threshold=5):
        self.walker = walker
        self.threshold = threshold
        self.info = {}
        #walker = Preprocessor_java.trans_to_sequences
        
    def add(self, token):
        if len(token) == 0 or token[0] == "'" or token[0] == "\"":
            token = "\"\""
        if token in self.info.keys():
            self.info[token] += 1
        else:
            self.info[token] = 1
        
    def add_info(self, node_token):
        if isinstance(node_token, str):
            self.add(node_token)
        else:
            for t in node_token:
                self.add(t)
            
    def walk(self, ast):
        node_token = self.walker(ast)
        self.add_info(node_token)
        
    def sort_info(self):
        keys = list(self.info.keys())
        for k in keys:
            if self.info[k] < self.threshold:
                del self.info[k]
        self.info = dict(sorted(self.info.items(), key=lambda d: d[1], reverse=True))
            
            
    
        

In [None]:
from Preprocessor import Preprocessor_c

lang = "C"
root = "database/" + language_name[lang] + "/"
abs_root = os.path.normpath(os.path.join(cwd, root))

f = fun_extracter(Preprocessor_c([None]))
node_stat = node_statistic(f.processor.trans_to_sequences)

for n, p in f.proj_to_funs(root):
    node_stat.walk(n)
node_stat.sort_info()
with open(os.path.normpath(os.path.join(cwd, 'node_stat_c.json')), 'w') as f:
    json.dump(node_stat.info, f, indent=4, separators=(',', ': '))

node_stat.info

In [3]:
from Preprocessor import Preprocessor_python

lang = "Python"
root = "database/" + language_name[lang] + "/"
abs_root = os.path.normpath(os.path.join(cwd, root))

f = fun_extracter(Preprocessor_python([None]))
node_stat = node_statistic(f.processor.trans_to_sequences)

for n, p in f.proj_to_funs(root):
    node_stat.walk(n)
node_stat.sort_info()
with open(os.path.normpath(os.path.join(cwd, 'node_stat_python.json')), 'w') as f:
    json.dump(node_stat.info, f, indent=4, separators=(',', ': '))

node_stat.info

{'Load': 30855948,
 'Name': 23334088,
 'Constant': 10242408,
 'Attribute': 9591567,
 'Call': 7643941,
 'Store': 4835287,
 'self': 4578809,
 'Assign': 3917486,
 'arg': 2849686,
 'Expr': 2364127,
 'keyword': 1848255,
 'BinOp': 1782205,
 'arguments': 1398426,
 'Subscript': 1387729,
 'FunctionDef': 1339023,
 'Index': 1308377,
 'If': 1104437,
 'Compare': 1043479,
 'Return': 861107,
 '1': 722142,
 'Tuple': 709843,
 'List': 667566,
 'Add': 578102,
 'Mult': 576444,
 'kwargs': 566716,
 'Dict': 504578,
 'Eq': 438823,
 'x': 437042,
 '2': 406668,
 'UnaryOp': 406374,
 'str': 325384,
 'url': 313297,
 'Assert': 309707,
 'S': 307218,
 'name': 303568,
 'response': 301974,
 'True': 298506,
 'get': 275325,
 '__init__': 244279,
 'For': 225629,
 'Pow': 222558,
 'data': 221741,
 'Not': 207706,
 'BoolOp': 206039,
 'cls': 203066,
 'assertEqual': 200002,
 'USub': 194271,
 'a': 193863,
 '_serialize': 189047,
 'np': 188275,
 '3': 183822,
 'value': 182170,
 'i': 176591,
 'b': 174900,
 'Raise': 172018,
 'Div': 170

In [3]:
from Preprocessor import Preprocessor_java

lang = "Java"
root = "database/" + language_name[lang] + "/"
abs_root = os.path.normpath(os.path.join(cwd, root))

f = fun_extracter(Preprocessor_java([None]))
node_stat = node_statistic(f.processor.trans_to_sequences)

for n, p in f.proj_to_funs(root):
    node_stat.walk(n)
node_stat.sort_info()

In [8]:
import json

with open(os.path.normpath(os.path.join(cwd, 'node_stat_java.json')), 'w') as f:
    json.dump(node_stat.info, f, indent=4, separators=(',', ': '))

In [4]:
node_stat.info

{'MethodInvocation': 17288460,
 'MemberReference': 16563433,
 'ReferenceType': 14197134,
 'Literal': 10327876,
 'StatementExpression': 8283707,
 '""': 5026509,
 'FormalParameter': 4968128,
 'Modifier': 4651638,
 'MethodDeclaration': 3936800,
 'BinaryOperation': 3653271,
 'VariableDeclarator': 3544995,
 'public': 3370188,
 'LocalVariableDeclaration': 3185658,
 'BasicType': 3092864,
 'Annotation': 2758406,
 'ReturnStatement': 2452349,
 'BlockStatement': 2240054,
 'End': 2153114,
 'ClassCreator': 2082952,
 'Assignment': 2010972,
 'String': 1952514,
 '=': 1919528,
 'TypeArgument': 1912770,
 'IfStatement': 1710226,
 'int': 1487655,
 'null': 1213890,
 '+': 1212371,
 'This': 1202719,
 'Override': 1148200,
 '0': 916030,
 'Cast': 812231,
 'final': 675436,
 '1': 655322,
 'i': 620048,
 '==': 597321,
 'long': 595200,
 'ClassReference': 557708,
 'static': 553647,
 'boolean': 520545,
 'get': 514938,
 '!=': 498120,
 'assertEquals': 476884,
 'true': 429857,
 'Test': 426240,
 'add': 411853,
 'false': 4

In [30]:
from Preprocessor import *

f = fun_extracter(Preprocessor_java([None]))
get_fun_name_function = f.processor.get_function_name

i = 100
for n, p in f.proj_to_funs(root+'9GAG/'):
    if i == 0:
        break
    print(p, get_fun_name_function(n))
    i -= 1

database\java\9GAG\app\src\main\java\me\storm\ninegag\App.java
(19, 23) onCreate
(25, 27) getContext
(30, 38) initImageLoader
database\java\9GAG\app\src\main\java\me\storm\ninegag\api\GagApi.java
database\java\9GAG\app\src\main\java\me\storm\ninegag\dao\BaseDataHelper.java
(15, 17) BaseDataHelper
(19, 21) getContext
(23, 23) getContentUri
(25, 27) notifyChange
(29, 29) query
(35, 35) query
(41, 43) insert
(45, 47) bulkInsert
(49, 51) update
(53, 55) delete
(57, 57) getList
(63, 65) getCursorLoader
(67, 67) getCursorLoader
database\java\9GAG\app\src\main\java\me\storm\ninegag\dao\DataProvider.java
(49, 54) getDBHelper
(57, 59) onCreate
(62, 80) query
(85, 92) getType
(95, 116) insert
(119, 134) delete
(138, 153) update
(156, 166) matchTable
database\java\9GAG\app\src\main\java\me\storm\ninegag\dao\DBHelper.java
(17, 19) DBHelper
(22, 24) onCreate
(27, 28) onUpgrade
database\java\9GAG\app\src\main\java\me\storm\ninegag\dao\FeedsDataHelper.java
(25, 28) FeedsDataHelper
(85, 86) FeedsDBInf