# Text-Based Matching

## Functions

In [1]:
import pandas as pd
import numpy as np
import os
import sys
import csv
import re
import xml.etree.ElementTree as ET
import filecmp
from lxml import etree
from pathlib import Path
import shutil
import collections
from itertools import count, groupby
import ast
import warnings
from collections import Counter
from tqdm import tqdm
import math

In [2]:
def readXMLFile(filePath):
    parser = etree.XMLParser(strip_cdata=False,recover=True)
    with open(filePath, "rb") as source:
        tree = etree.parse(source, parser=parser)
    xmlroot = tree.getroot()
    return xmlroot

def getSummaryFiles(filesPath):
    allFiles = []
    for path, subdirs, files in os.walk(filesPath):
        for file in files:
            if (file.startswith('summary-of-')):
                allFiles.append(os.path.join(path,file))
    return allFiles

def find_index_starts_with(stacktraces, stoppedLines):
    index = next((index for index, element in enumerate(stacktraces) if any(element.startswith(prefix) for prefix in stoppedLines)), -1)
    return index

def remove_elements_starts_with(stacktraces, removedLines):
    return [element for element in stacktraces if not any(element.startswith(prefix) for prefix in removedLines)]


def getmutantsFailures(xmlFile,testName,className):
    execludedLines = ['java.lang.invoke.LambdaForm','sun.reflect.GeneratedMethodAccessor','sun.reflect.GeneratedConstructorAccessor','com.sun.proxy.']
    stoppedLines = ['junit.framework.TestCase.runBare(','sun.reflect.NativeMethodAccessorImpl.invoke0(','org.junit.rules.ExternalResource$1.evaluate(']
    result = []
    for mutant in xmlFile.iter('mutant'):
        if (mutant.find('mutant_name').attrib['status'] == 'KILLED'):
            mutantException = mutant.find('mutant_exception').text

            lines = []
            for line in mutant.iter('line'):
                stackTrace = line.text.replace(" ", "").replace("\t", "").replace("\n", "")
                lines.append(stackTrace.strip())

            
            if(all(not any(item.startswith(prefix) for prefix in [testName,className]) for item in lines)):
                ind = find_index_starts_with(lines, stoppedLines)
                if (ind > 0):
                    lines = lines[:ind]
            
            # remove non-determinist lines
            lines = remove_elements_starts_with(lines, execludedLines)
            result.append(mutantException + '|#|'+'|'.join(lines) )
    return result

def getTestFailure(failure,testName,className):
    execludedLines = ['java.lang.invoke.LambdaForm','sun.reflect.GeneratedMethodAccessor','sun.reflect.GeneratedConstructorAccessor','com.sun.proxy.']
    stoppedLines = ['junit.framework.TestCase.runBare(','sun.reflect.NativeMethodAccessorImpl.invoke0(','org.junit.rules.ExternalResource$1.evaluate(']

    failureExc =  failure.find('test_exception').text
    lines = []
    for line in failure.iter('line'):
        stackTrace = line.text.replace(" ", "").replace("\t", "").replace("\n", "")
        lines.append(stackTrace.strip())
    
    if(all(not any(item.startswith(prefix) for prefix in [testName,className]) for item in lines)):
        ind = find_index_starts_with(lines, stoppedLines)
        if (ind > 0):
            lines = lines[:ind]
    
    # remove non-deterministic lines
    lines = remove_elements_starts_with(lines, execludedLines)
        
    return failureExc + '|#|'+'|'.join(lines) 

def most_frequent(List):
    occurence_count = Counter(List)
    if (len(List)>0):
        return occurence_count.most_common(1)[0][0],occurence_count.most_common(1)[0][1]
    else:
        return 'NA',0

def getTotalMutants(data_df):
    totalMutants = []
    for test in data_df['Test'].unique():
        perProjectPerTest = data_df[data_df['Test'] == test]
        totalMutants.append(int(perProjectPerTest['TotalMutants'].unique()[0]))
    return totalMutants

def getStat(data_df):
    if (len(data_df)>0):
        return max(data_df),min(data_df),round(np.median(sorted(data_df)[int(len(data_df)/2)]),0)
    else:
        return 0,0,0


def getChangedFile(filesDF,project,bugID):
    perFile = filesDF[(filesDF['project']==project)&(filesDF['bugId']==int(bugID))]
    return perFile['fileName'].unique()


def latexFormat(data):
    for i, r in data.iterrows():
        perProject = []
        for value in r:
            if (str(value).endswith('-%') or str(value).endswith('-')):
                perProject.append('')
            elif (str(value).endswith('%')):
                perProject.append(str(value).split('%')[0]+'\\%')
            elif(str(value).endswith('nan')):
                perProject.append('')
            else:
                perProject.append(value)
        
        for p in perProject:
                try:
                    int(p)
                    perProject[perProject.index(p)] = '{:,.0f}'.format(p)
                except ValueError:
                    pass
        
        if (i%2 == 1):
            print ('&'.join(['\cellcolor{gray!6}{'+str(r)+'}' for r in perProject])+'\\\\')
        else:
            print ('&'.join([str(r) for r in perProject])+'\\\\')
            
def getSetOfFailures(testFailures):
    updatedFailures = {}
    for v in set(testFailures.values()):
        perFailure = {k:v1 for k, v1 in testFailures.items() if v1 == v}
        totalFailures = [k for k in perFailure.keys()]
        if (len(totalFailures)>1):
            total = sum([int(k.rsplit('|',1)[1]) for k in totalFailures])
            newKey = totalFailures[0].rsplit('|',1)[0] + '|' + str(total)
            updatedFailures[newKey] = v
        else:
            updatedFailures[totalFailures[0]] = v
    return updatedFailures


def appendSumRow(data,ignoreColumnsNames):
    sumRows = {}
    for c in data.columns:
        if (c not in ignoreColumnsNames):
            sumRows [c] = data[c].sum()

    updated_data = data.append(sumRows, ignore_index=True)

    for col in updated_data.columns:
        # Check if the column is of type float and if all its non-NaN values are integers
        if updated_data[col].dtype == "float64" and all(updated_data[col].dropna().apply(lambda x: x.is_integer())):
            updated_data[col] = updated_data[col].astype(int)
    
    return updated_data


## Inputs

In [3]:
# data = 'PATH/TO/22projects' # change this to summary-of files (22 projects)
logs = getSummaryFiles(data)
output = 'Result'
Path(output).mkdir(parents=True, exist_ok=True)

projectNames = {"spring-boot":"spring-projects-spring-boot",
                "alluxio" : "Alluxio-alluxio",
                "http-request": "kevinsawicki-http-request",
                "hbase" : "apache-hbase",
                "ambari": "apache-ambari",
                "java-webSocket" : "tootallnate-java-websocket",
                "wildfly": "wildfly-wildfly",
                "okhttp":"square-okhttp",
                "hector" : "hector-client-hector",
                "wro4j": "wro4j-wro4j",
                "incubator-dubbo" : "apache-incubator-dubbo",
                "logback" : "qos-ch-logback",
                "activiti" : "activiti-activiti",
                "httpcore" : "apache-httpcore",
                "commons-exec" : "apache-commons-exec",
                "io-undertow" : "undertow-io-undertow",
                "orbit" : "orbit-orbit",
                "assertj-core" : "assertj-core",
                "achilles" : "doanduyhai-Achilles",
                "handlebars.java" : "handlebars.java",
                "elastic-job-lite" : "elasticjob-elastic-job-lite",
                "zxing" : "zxing-zxing",
                "ninja" : "ninja-ninja"}

## Part 1) Get the result per failure per test ... 

In [4]:
fullResult = []
repetitive = {}
for log in tqdm(logs):
    if (os.path.getsize(log) >0):
        # if (log.endswith('summary-of-com.squareup.okhttp.internal.http.HttpResponseCacheTest.responseSourceHeaderFetched.xml')):
            logXML = readXMLFile(log)
            test_name = log.rsplit('/',1)[1].split('summary-of-')[1].rsplit('.xml')[0]
            mutantsFailures = getmutantsFailures(logXML,test_name,test_name.rsplit('.',1)[0])
            
            testFailures = {}
            for failure in logXML.iter('test'):
                perProject = []
                Failure = getTestFailure(failure,test_name,test_name.rsplit('.',1)[0])
                k = failure.find('test_name').attrib['project']+'|'+failure.find('test_name').attrib['id']+'|'+failure.find('test_name').attrib['frequency']
                testFailures[k] =Failure
            
            setTestFailures = getSetOfFailures(testFailures) # This to get the set of failures ... 

            for k,Failure in setTestFailures.items():
                perProject = []
                perProject.append(k.split('|')[0]) # project name
                perProject.append(test_name)
                perProject.append(k.split('|')[1]) # test id
                perProject.append(k.split('|')[2]) # failure ferq
                perProject.append(Failure.split('|#|')[0])

                perProject.append(1 if test_name in Failure else 0) # test name in stacktrace lines .. 
                perProject.append(1 if test_name.rsplit('.',1)[0] in Failure else 0) # test classname in stacktrace lines .. 
                perProject.append(len(Failure.split('|#|')[1].split('|'))) # How many lines ... 
                perProject.append(len(mutantsFailures)) # total killed mutants
                perProject.append(len(set(mutantsFailures)))
                perProject.append(len([i for i in mutantsFailures if i.startswith(Failure.split('|#|')[0]+'|#|')])) # total match with exception
                perProject.append(len([i for i in mutantsFailures if i.endswith('|#|' + Failure.split('|#|')[1])])) # total match with stacktraces
                perProject.append(mutantsFailures.count(Failure)) # total match

                mostMutantExc = "NA"
                mostMutantFreq = 0
                if (len(mutantsFailures)>0):
                    mostMutantExc, mostMutantFreq = most_frequent([x.split('|#|')[0] for x in mutantsFailures])
                perProject.append(mostMutantExc) # most exception in mutants 
                perProject.append(mostMutantFreq) # exception rate  

                fullResult.append(perProject)

                # Repetitive failures 
                if (len(mutantsFailures)>0):
                    r_key = k.split('|')[0] + '|' + test_name + '|' + k.split('|')[1] + '|' + k.split('|')[2]
                    repetitive[r_key] = Failure


# dataframe columns .. 
columnList = ['Project','Test','TestID','FailureFreq','FailureException','TestNameInStacktrace','TestClassNameInStacktrace','TotalStacktraceLines','TotalMutants','set(TotalMutants)','TotalMatchException','TotalMatchStacktrace','TotalMatch',
              'MostExceptionInMutants','MostExceptionInMutantsFreq']
finalResult = pd.DataFrame(fullResult,columns=columnList)
finalResult.to_csv(output+'/PerFailureResult.csv',index=False)

100%|██████████| 583/583 [00:05<00:00, 113.23it/s]


## Part 2) Find Repetitive Flaky Failures ... 

In [5]:
projectsList = [v.split('|')[0] for v in repetitive.keys()]
resultRQ1 = []
for p in set(projectsList):
    # if ('hbase' in p):
        perProjectData = {k:v for k, v in repetitive.items() if k.startswith(p+'|')}
        
        tests = list([k.split('|')[1] for k in perProjectData.keys()])

        
        perFailures = {k.rsplit('|',1)[0]:k.rsplit('|',1)[1] for k in perProjectData.keys()}
        totalFlakyFailures = [int(f) for f in perFailures.values()]

        FlakesOnce = len([k for k in perProjectData.keys() if k.endswith('|1')])

        reverseDict = {}
        for k, v in perProjectData.items():
            if (v not in reverseDict.keys()):
                reverseDict[v] = int(k.rsplit('|',1)[1])
            else:
                reverseDict[v] = reverseDict[v] + int(k.rsplit('|',1)[1])
            
        allFlakesOnce = len([v for v in reverseDict.values() if v == 1])
        allFlakesMore= sum([v for v in reverseDict.values() if v >1])



        # new version of Dictionary
        newPerProject ={}
        avoidLines = [t + '(' for t in tests]
        for k,v in perProjectData.items():
            e = v.split('|#|')[0]
            traces = v.split('|#|')[1].split('|')

            filtered_traces = [item for item in traces if not any(item.startswith(prefix) for prefix in avoidLines)]
            
            newPerProject[k] = e + '|#|' + '|'.join(filtered_traces)

        reverseDictWithoutTests = {}
        for k, v in newPerProject.items():
            if (v not in reverseDictWithoutTests.keys()):
                reverseDictWithoutTests[v] = int(k.rsplit('|',1)[1])
            else:
                reverseDictWithoutTests[v] = reverseDictWithoutTests[v] + int(k.rsplit('|',1)[1])

        allFlakesOnceWithoutTests = len([v for v in reverseDictWithoutTests.values() if v == 1])
        allFlakesMorWithoutTests = sum([v for v in reverseDictWithoutTests.values() if v >1])


        resultRQ1.append([p,
                        len(set(tests)),
                        sum(totalFlakyFailures),
                        len(perProjectData),
                        FlakesOnce,
                        sum(totalFlakyFailures) - FlakesOnce,
                        allFlakesOnceWithoutTests,
                        allFlakesMorWithoutTests])

resultRQ1_df = pd.DataFrame(resultRQ1,columns=['Project','Tests','Flaky Failures','Set(FlakyFailures)','withTest[1]','withTest(1:n)','acrossTestWithoutTestNames[1]','acrossTestWithoutTestNames(1:n)']).sort_values(by=['Tests'], ascending=False)

# Add the sum row 
repetitiveData = appendSumRow(resultRQ1_df,['Project'])
repetitiveData.to_csv(output+'/RepetitiveFlakyFailure.csv',index=False)

## Part 3) Text-Based Approach Result

In [6]:
result = pd.read_csv('Result/PerFailureResult.csv', index_col=False)
summary = []
exception_summary = []

for p in result['Project'].unique():
    perProject = result[(result['Project']== p)&(result['TotalMutants'].astype(int)>0)]
    if (len(perProject)>0):

        # get the total number of tests flakes once ... 
        TP_Data = perProject[(perProject['TotalMatch'].astype(int)==0) & (perProject['FailureFreq'].astype(int)>1)]

        FN_Data = perProject[(perProject['TotalMatch'].astype(int)>0) | (perProject['FailureFreq'].astype(int)==1)]
        
        FP_Data = perProject[perProject['TotalMatch']>0]

        
        # TN
        tn_tests = 0
        tn_failures = 0
        for t in perProject['Test'].unique():
            perProjectPerTest = perProject[perProject['Test']==t]

            TotalMutants = perProjectPerTest['TotalMutants'].tolist()[0]
            tn_failures = tn_failures + (TotalMutants - int (perProjectPerTest['TotalMatch'].sum()))
            if (TotalMutants > perProjectPerTest['TotalMatch'].sum()):
                tn_tests = tn_tests + 1


        # Get the result 
        summary.append([p,
                        len(perProject['Test'].unique()),
                        perProject.drop_duplicates(subset='Test', keep='first')['TotalMutants'].sum(),
                        perProject['FailureFreq'].sum(),
                        
                        # Single and de-duplicate failures .. 
                        # len((perProject[perProject['FailureFreq'].astype(int)==1])),
                        # perProject['FailureFreq'].sum() -  len((perProject[perProject['FailureFreq'].astype(int)==1])),
                        
                        perProject.drop_duplicates(subset='Test', keep='first')['set(TotalMutants)'].sum(),
                        len(perProject),
                        

                        # TP
                        TP_Data['FailureFreq'].sum(),
                        
                        # FN
                        FN_Data['FailureFreq'].sum(),

                        # FP
                        FP_Data['TotalMatch'].sum(),

                        # TN
                        tn_failures,

                        len(TP_Data['Test'].unique()),
                        len(FN_Data['Test'].unique()),
                        len(FP_Data['Test'].unique()),
                        tn_tests,

                        
                        # P
                        str(math.floor(TP_Data['FailureFreq'].sum()/(TP_Data['FailureFreq'].sum()+FP_Data['TotalMatch'].sum())*100) if len(TP_Data['FailureFreq'])>0 else 0 )+'%',

                        # R
                        str(math.floor(TP_Data['FailureFreq'].sum()/(TP_Data['FailureFreq'].sum()+FN_Data['FailureFreq'].sum())*100) if len(TP_Data['FailureFreq'])>0 else 0 )+'%',
                        
                        # # Spes.
                        str(math.floor(tn_failures/(tn_failures+FP_Data['TotalMatch'].sum())*100))+'%'
                        
                        ])


columnsList = ['Project','Test','True','Flaky','set(True)','set(Flaky)','TP','FN','FP','TN','t(tp)','t(fn)','t(fp)','t(tn)','P','R','SP']
textMatching = pd.DataFrame(summary,columns=columnsList).sort_values(by=['Test'], ascending=False)
for col in textMatching.columns:
    if textMatching[col].dtype == "float64":
        textMatching[col] = textMatching[col].astype(int)

# Append total row 
textMatchingWithSumRow = appendSumRow(textMatching,['Project','SP','R','P'])
textMatchingWithSumRow.to_csv('Result/TextBasedMatchingResult.csv',index=False)


## Part 4) Top Most Exceptions in Flaky and True Failures. 

In [7]:
fullResult = []
for log in tqdm(logs):
    if (os.path.getsize(log) >0):
        logXML = readXMLFile(log)
        test_name = log.rsplit('/',1)[1].split('summary-of-')[1].rsplit('.xml')[0]
        mutantsFailures = getmutantsFailures(logXML,test_name,test_name.rsplit('.',1)[0])
        
        testFailures = {}
        for failure in logXML.iter('test'):
            perProject = []
            Failure = getTestFailure(failure,test_name,test_name.rsplit('.',1)[0])
            k = failure.find('test_name').attrib['project']+'|'+failure.find('test_name').attrib['id']+'|'+failure.find('test_name').attrib['frequency']
            testFailures[k] =Failure


        setTestFailures = getSetOfFailures(testFailures) # This to get the set of failures ... 

        if (len(mutantsFailures)>0 and len(setTestFailures)>0):
            project = [k for k in setTestFailures.keys()][0].split('|')[0]
            # This is for Flaky failures 
            for failureKey, flakyFailure in setTestFailures.items():
                exception = flakyFailure.split('|#|')[0]
                exceptionFreq = int(failureKey.rsplit('|',1)[1])

                if (flakyFailure in mutantsFailures or exceptionFreq==1):
                    fullResult.append([project,test_name,exception,'WithStacktraces','FN',exceptionFreq])
                else:
                    fullResult.append([project,test_name,exception,'WithStacktraces','TP',exceptionFreq])

                # Without stacktraces 
                mutantsExceptions = [m.split('|#|')[0] for m in mutantsFailures]    

                if (exception in mutantsExceptions or exceptionFreq ==1):
                    fullResult.append([project,test_name,exception,'WithOutStacktraces','FN',exceptionFreq])
                else:
                    fullResult.append([project,test_name,exception,'WithOutStacktraces','TP',exceptionFreq])


            # This is for non-flaky failures ... 
            for mutantFail in mutantsFailures:
                mutantException = mutantFail.split('|#|')[0]
                
                if (mutantFail in setTestFailures.values()):
                    fullResult.append([project,test_name,mutantException,'WithStacktraces','FP',1])
                else:
                    fullResult.append([project,test_name,mutantException,'WithStacktraces','TN',1])

                # Without stacktraces 
                failureException = [s.split('|#|')[0] for s in setTestFailures.values()]    

                if (mutantException in failureException):
                    fullResult.append([project,test_name,mutantException,'WithOutStacktraces','FP',1])
                else:
                    fullResult.append([project,test_name,mutantException,'WithOutStacktraces','TN',1])

fullResult_df = pd.DataFrame(fullResult,columns=['Project','Test','Exception','CompareBy','Tag','Freq'])

100%|██████████| 583/583 [00:05<00:00, 108.97it/s]


In [8]:
# Get most exceptions in both flaky and non-flaky failures .. 

oneTypeData = fullResult_df[fullResult_df['CompareBy']=="WithOutStacktraces"]

uniqueExceptions = {}
for exception in oneTypeData['Exception'].unique():
    uniqueExceptions[exception] = oneTypeData[oneTypeData['Exception']==exception]['Freq'].sum()

# Get top most exception
topTenException = dict(sorted(uniqueExceptions.items(), key=lambda x: x[1], reverse=True)[:10])

summaryExceptions = []
for k,v in topTenException.items():        
        perExceptionwithStacktrace = fullResult_df[(fullResult_df['Exception'] == k) & (fullResult_df['CompareBy']=='WithStacktraces')]

        project_num = len(perExceptionwithStacktrace['Project'].unique())
        test_num = len(perExceptionwithStacktrace['Test'].unique())

        total_failures = perExceptionwithStacktrace['Freq'].sum()
        tp = perExceptionwithStacktrace[perExceptionwithStacktrace['Tag']=='TP']['Freq'].sum()
        tpTests = len(perExceptionwithStacktrace[perExceptionwithStacktrace['Tag']=='TP']['Test'].unique())
        tpProject = len(perExceptionwithStacktrace[perExceptionwithStacktrace['Tag']=='TP']['Project'].unique())

        fn = perExceptionwithStacktrace[perExceptionwithStacktrace['Tag']=='FN']['Freq'].sum()
        fnTests = len(perExceptionwithStacktrace[perExceptionwithStacktrace['Tag']=='FN']['Test'].unique())
        fnProject = len(perExceptionwithStacktrace[perExceptionwithStacktrace['Tag']=='FN']['Project'].unique())
        
        fp = perExceptionwithStacktrace[perExceptionwithStacktrace['Tag']=='FP']['Freq'].sum()
        fpTests = len(perExceptionwithStacktrace[perExceptionwithStacktrace['Tag']=='FP']['Test'].unique())
        fpProject = len(perExceptionwithStacktrace[perExceptionwithStacktrace['Tag']=='FP']['Project'].unique())

        tn = perExceptionwithStacktrace[perExceptionwithStacktrace['Tag']=='TN']['Freq'].sum()
        tnTests = len(perExceptionwithStacktrace[perExceptionwithStacktrace['Tag']=='TN']['Test'].unique())
        tnProject = len(perExceptionwithStacktrace[perExceptionwithStacktrace['Tag']=='TN']['Project'].unique())


        perExceptionwithoutStacktraces = fullResult_df[(fullResult_df['Exception'] == k) & (fullResult_df['CompareBy']=='WithOutStacktraces')]

        tp1 = perExceptionwithoutStacktraces[perExceptionwithoutStacktraces['Tag']=='TP']['Freq'].sum()
        tp1Tests = len(perExceptionwithoutStacktraces[perExceptionwithoutStacktraces['Tag']=='TP']['Test'].unique())
        tp1Project = len(perExceptionwithoutStacktraces[perExceptionwithoutStacktraces['Tag']=='TP']['Project'].unique())
        
        fn1 = perExceptionwithoutStacktraces[perExceptionwithoutStacktraces['Tag']=='FN']['Freq'].sum()
        fn1Tests = len(perExceptionwithoutStacktraces[perExceptionwithoutStacktraces['Tag']=='FN']['Test'].unique())
        fn1Project = len(perExceptionwithoutStacktraces[perExceptionwithoutStacktraces['Tag']=='FN']['Project'].unique())
        
        fp1 = perExceptionwithoutStacktraces[perExceptionwithoutStacktraces['Tag']=='FP']['Freq'].sum()
        fp1Tests = len(perExceptionwithoutStacktraces[perExceptionwithoutStacktraces['Tag']=='FP']['Test'].unique())
        fp1Project = len(perExceptionwithoutStacktraces[perExceptionwithoutStacktraces['Tag']=='FP']['Project'].unique())
        
        tn1 = perExceptionwithoutStacktraces[perExceptionwithoutStacktraces['Tag']=='TN']['Freq'].sum()
        tn1Tests = len(perExceptionwithoutStacktraces[perExceptionwithoutStacktraces['Tag']=='TN']['Test'].unique())
        tn1Project = len(perExceptionwithoutStacktraces[perExceptionwithoutStacktraces['Tag']=='TN']['Project'].unique())

        summaryExceptions.append([k,
                                  project_num,
                                  test_num,
                                  tp+fp+fn+tn,
                                  tn+fp,
                                  fn+tp,
                                  tp,
                                  tpTests,
                                  tpProject,
                                  fn,
                                  fnTests,
                                  fnProject,
                                  fp,
                                  fpTests,
                                  fpProject,
                                  tn,
                                  tnTests,
                                  tnProject,
                                  tp1,
                                  tp1Tests,
                                  tp1Project,
                                  fn1,
                                  fn1Tests,
                                  fn1Project,
                                  fp1,
                                  fp1Tests,
                                  fp1Project,
                                  tn1,
                                  tn1Tests,
                                  tn1Project
                                  ])


# t refers to the total number of tests (e.g. tTP : the number of tests that have at least on TP failure)
# p refers to the total number of project (e.g. tTP : the number of projects that have at least on TP failure)
# _noTrace refers to matching without considering the stacktraces 

summaryExceptions_df = pd.DataFrame(summaryExceptions,columns=['Exception','#Projects','#Tests','TotalFailures','True','Flaky','TP','tTP','pTP','FN','tFN','pFN','FP','tFP','pFP','TN','tTN','pTN','TP_noTraces','tTP_noTraces','pTP_noTraces','FN_noTraces','tFN_noTraces','pFN_noTraces','FP_noTraces','tFP_noTraces','pFP_noTraces','TN_noTraces','tTN_noTraces','pTN_noTraces'])


# Final Result .. 

finalShapeResultColumns =  ['Exception', '#Projects', '#Tests', 'TotalFailures','True','Flaky', 'TP','FN', 'FP','TN','tTP','tFN', 'tFP', 'tTN', 'TP_noTraces','FN_noTraces',  'FP_noTraces', 'TN_noTraces', 'tTP_noTraces', 'tFN_noTraces',  'tFP_noTraces',     'tTN_noTraces']
updatedSummaryExceptions_df = summaryExceptions_df[finalShapeResultColumns]

updatedSummaryExceptions_df.to_csv(output+'/TopMostExceptions.csv',index=False)