In [1]:
import os
import logging
import datetime

import maven as mvn
import spotbugs as sb


logging.basicConfig(level=logging.INFO)

currentDT = datetime.datetime.now()
print ("Started at :: {}".format(str(currentDT)))

def get_reused_tops(spotbugs_xml, module_lst):
    class_dict = {}
    for m in module_lst:
        class_dict[m.get_m2_path()] = m.get_class_list()
    
    all_vs = sb.collect_vulnerabilities(spotbugs_xml, class_dict)
    count_vs = {}
    
    for k, vs in all_vs.items():
        flat_vs = [b for c in vs for r in c for b in r]
        for v in flat_vs:
            count_vs[k] = count_vs.get(k,{})
            count_vs[k][v['@type']] = count_vs[k].get(v['@type'], 0) + 1
        
    return count_vs


def update_reused_tops(d1, d2):
    for d,bc in d2.items():
        for b,c in bc.items():
            d1[d] = d1.get(d,{}) 
            d1[d][b] = d1[d].get(b,0) + c
    
    return d1
    
    

def get_native_tops(spotbugs_xml, module_lst):
    project_classes = [c for m in module_lst for c in m.get_class_list()]
    
    vs = sb.collect_vulnerabilities(spotbugs_xml, {'n': project_classes})
    count_vs = {}
    
    flat_vs = [b for c in vs['n'] for r in c for b in r]
    for v in flat_vs:
        count_vs[v['@type']] = count_vs.get(v['@type'], 0) + 1
        
    return sorted(count_vs.items(), key=lambda x: x[1], reverse=True)


def get_artifacts(file_project_trees):
    trees = mvn.get_compiled_modules(file_project_trees)
    proj_name = os.path.basename(os.path.splitext(file_project_trees)[0])
    
    if not trees:
        logging.warning(f'No modules to analyze: {file_project_trees}.')
        return None
    
    modules = [m.artifact for m in trees]
    dep_modules = [m.artifact for t in trees for m in t.deps if m.artifact not in modules]
    dep_modules = list(set(dep_modules)) # remove duplicates
    
    return (proj_name, modules, dep_modules)


            
path_to_data = os.path.abspath('../data')
projects_tress = [f for f in os.listdir(path_to_data) if f.endswith('.trees')]

ntops_per_project = []
rtops_per_project = {}

for f in projects_tress:
    trees_filepath = path_to_data + os.path.sep + f
    spotbugs_xml = f'{os.path.splitext(trees_filepath)[0]}.xml'
    
    proj_arts = get_artifacts(trees_filepath)
    
    ntops = get_native_tops(spotbugs_xml, proj_arts[1])
    ntops_per_project.append(ntops)
    
    rtops = get_reused_tops(spotbugs_xml, proj_arts[2])
    rtops_per_project = update_reused_tops(rtops_per_project, rtops)
    

ntop_dict = {}
for tp in ntops_per_project:
    for idx_t in range(5):
        if idx_t < len(tp):
            ntop_dict[tp[idx_t][0]] = ntop_dict.get(tp[idx_t][0], 0) + 1

rtop_dict = {}
for d,tp in rtops_per_project.items():
    tp = list(tp.items())
    for idx_t in range(5):
        if idx_t < len(tp):
            rtop_dict[tp[idx_t][0]] = rtop_dict.get(tp[idx_t][0], 0) + 1



print('Top 5 vulnerabilities in native code')
overall_top = sorted(ntop_dict.items(), key=lambda x: x[1], reverse=True)
print(overall_top[:6])

print('Top 5 vulnerabilities in reused code')
overall_top = sorted(rtop_dict.items(), key=lambda x: x[1], reverse=True)
print(overall_top[:6])
    

currentDT = datetime.datetime.now()
print ("Finished at :: {}".format(str(currentDT)))

Started at :: 2019-02-15 22:55:11.034625
Top 5 vulnerabilities in native code
[('EI_EXPOSE_REP', 159), ('EI_EXPOSE_REP2', 154), ('MS_SHOULD_BE_FINAL', 120), ('MS_PKGPROTECT', 109), ('DP_DO_INSIDE_DO_PRIVILEGED', 58), ('DP_CREATE_CLASSLOADER_INSIDE_DO_PRIVILEGED', 27)]
Top 5 vulnerabilities in reused code
[('EI_EXPOSE_REP2', 955), ('EI_EXPOSE_REP', 932), ('MS_SHOULD_BE_FINAL', 542), ('MS_PKGPROTECT', 455), ('DP_DO_INSIDE_DO_PRIVILEGED', 256), ('MS_MUTABLE_COLLECTION_PKGPROTECT', 190)]
Finished at :: 2019-02-15 22:56:58.444000
