## Grepping functions from the vulnerability context of the file.

In [8]:

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import lizard
import subprocess as sub
from pylibsrcml import srcml
import os
import xml.etree.ElementTree as et 
import sys
import csv
# from lxml import etree
import pandas as pd
import subprocess as sub
import sys

# add parent dir to access modules
sys.path.append('../')

# Fetching the functions which have given line context/statement.

### Parsing CppCheck output:

In [15]:
# (Optional) can be removed for cleaning the code
def xml2df_using_pandas(xml):
    """ convert xml file generated by the CppCheck tool to dataframe
    """
    df_loc = pd.read_xml(xml, encoding='utf-8', xpath='./errors/error/location')
    df_err = pd.read_xml(xml, encoding='utf-8', xpath='./errors/error')

    assert df_err['file0'].equals(df_loc['file']), 'Error! dataframe and nested location dataframe are not equals.'
    # add location metrics to the table
    df_err[['file', 'line', 'column', 'info']] = df_loc
    df_err = df_err.drop('file0', axis=1).drop_duplicates().reset_index(drop=True)
    return df_err

def cppcheck_flaws(file_or_dir, xmlfile=None):
    """ find flaws in the file using CppCheck tool
        example commands:
        !cppcheck --template=gcc ../data/projects/contiki-2.4/apps/ 2> err.txt
        !cppcheck --template="{file}; {line}; {severity}; {message}; {code}" 
        --template-location=" {file};{line}; {info};{code}\n" ../data/projects/contiki-2.4/apps/ 2> err.txt
    """
    cmd = ['cppcheck ' + file_or_dir + ' --xml 2>> output.xml']
    process = sub.Popen(cmd,  shell=True, stdout=sub.PIPE) 
    output = process.stdout.read() 
    return output


chk_dir = '../data/projects/contiki-2.4/apps/'
xml = cppcheck_flaws(chk_dir)
# xtree = et.parse("students.xml")
# xroot = xtree.getroot()

In [18]:
def fetch_location(err):
    """ get locations of all the error list generated by CppCeck
    """ 
    dt_loc = {'file': [], 'line':[], 'column':[], 'info':[]}
    for loc in err.findall('location'): 
        for key, val in (loc.attrib).items():
            dt_loc[key].append(val)
    return dt_loc
                        
def xml2df(file):
    if os.path.isfile(file):
        xtree = et.fromstring(open(file).read())
    else:
        xtree = et.fromstring(open(file).read())
    
    df = pd.DataFrame()
    for errors in xtree.findall('.//errors'):
        for err in errors.findall('error'):
            dt_err = err.attrib
            dt_err.update(fetch_location(err))
            df = pd.concat([df, pd.DataFrame([dt_err])], ignore_index=True).drop(columns=['file'], axis=1)
    return df.rename(columns={'file0':'file'})

df_flaw = xml2df('output.xml')
df_flaw.to_csv('contiki24_cppcheck.csv')

In [24]:
from src.zip2flaw import file2metrics

def project_flaws(df):
    """ find flaw entries of all the complete project scanning each unique file.
    """
    df_prj = pd.DataFrame()
    
    for f in list(set(df.file)): # on every unique files
        lines = list(df[df.file==f]['line'])
        cwes = list(df[df.file==f]['cwe']) 
        # vul_statements = list(df_flaw[df_flaw.file==x]['cwe'])
        lines = [x[0] if len(x)==1 else [x[0], x[1]] for x in lines]
        df_file = file2metrics(f, lines, cwes, tool='cppcheck')
        df_prj = pd.concat([df_prj, df_file])
        
    return df_prj.reset_index(drop=True).drop_duplicates()


project_flaws(df_flaw)


Unnamed: 0,cyclomatic_complexity,nloc,token_count,name,long_name,start_line,end_line,filename,code,fun_name,content,is_vul,cwe
0,1,8,34,quit,quit( void),105,112,../data/projects/contiki-2.4/apps/irc/irc.c,quit( void){\n ctk_window_close(&window);\n ...,quit,,False,benign
1,6,32,223,ircc_text_output,"ircc_text_output( struct ircc_state * s , char...",115,154,../data/projects/contiki-2.4/apps/irc/irc.c,"ircc_text_output( struct ircc_state * s , char...",ircc_text_output,"['\n', ' strncpy(&log[LOG_WIDTH * (LOG_HEIG...",True,unknown
2,8,32,315,parse_line,parse_line( void),157,190,../data/projects/contiki-2.4/apps/irc/irc.c,parse_line( void){\n int i;\n for(i = 0; i <...,parse_line,,False,benign
3,1,5,21,ircc_sent,ircc_sent( struct ircc_state * s),193,198,../data/projects/contiki-2.4/apps/irc/irc.c,ircc_sent( struct ircc_state * s){\n /* ctk_...,ircc_sent,,False,benign
4,20,77,525,PROCESS_THREAD,"PROCESS_THREAD( irc_process , ev , data)",200,295,../data/projects/contiki-2.4/apps/irc/irc.c,"PROCESS_THREAD( irc_process , ev , data){\n c...",PROCESS_THREAD,,False,benign
5,1,4,18,ircc_closed,ircc_closed( struct ircc_state * s),298,301,../data/projects/contiki-2.4/apps/irc/irc.c,ircc_closed( struct ircc_state * s){\n ircc_t...,ircc_closed,,False,benign
6,1,4,18,ircc_connected,ircc_connected( struct ircc_state * s),304,307,../data/projects/contiki-2.4/apps/irc/irc.c,ircc_connected( struct ircc_state * s){\n irc...,ircc_connected,,False,benign
7,5,21,112,PROCESS_THREAD,"PROCESS_THREAD( webserver_process , ev , data)",63,90,../data/projects/contiki-2.4/apps/webserver/we...,"PROCESS_THREAD( webserver_process , ev , data)...",PROCESS_THREAD,,False,benign
8,1,13,110,webserver_log_file,"webserver_log_file( uip_ipaddr_t * requester ,...",93,113,../data/projects/contiki-2.4/apps/webserver/we...,"webserver_log_file( uip_ipaddr_t * requester ,...",webserver_log_file,['\n'],True,unknown
9,1,6,52,webserver_log,webserver_log( char * msg),116,126,../data/projects/contiki-2.4/apps/webserver/we...,webserver_log( char * msg){\n /* Scroll previ...,webserver_log,['\n'],True,unknown
