### Extract functions from LL files

In [1]:
import CodePreprocessing as preprocessing
import json

In [2]:

# ----------------- user functions  --------------------
preprocessing.functions_preprocessing( llvm_file='pairs/UserCode.ll', json_file='UserCode' )
with open( 'UserCode.json', 'r' ) as f:
    user_code = json.load( f ) 

# ----------------- Vulnerable function ------------------
preprocessing.functions_preprocessing(llvm_file='pairs/VulnerableCode2.ll', json_file='VulnerableCode' )
with open( 'VulnerableCode.json', 'r' ) as f:
    vulnerable_code = json.load( f )

vulnerable_function=[]
for key in vulnerable_code:
    vulnerable_function.append(vulnerable_code[key])


### Matching

In [3]:
import matchers as matcher
import LLNormalizer as normalizer
import Winnowing

In [4]:
def check_function_vulnerable(threshold,score1,score2,score3):
    return score1 > threshold and score2 > threshold and score3 > threshold

In [5]:
#applying KNN but with similarity measures, we take the top 3 scores in all similarity measures, if those top 3 passed the threshold, we do the ultimate test, MOSS.
#K here equals 3

threshold=0.7
k=3

#this is a dictionary of key: vulnerable function (which is in our database)        value: code_scores for this vulnerable function
Vulnerable_Matches = dict()

for k,v in vulnerable_code.items():
    vuln_func = v
    vuln_head=k
    code_scores=dict()


    for key in user_code:
        fn=user_code[key] 
        #fn=normalizer.NormalizeLLVM(fn)

        jaro_winkler=matcher.jaro_winkler_similarity(fn, vuln_func)
        levenshtein=matcher.levenshtein_similarity(fn, vuln_func)
        ratcliff_obershelp=matcher.ratcliff_obershelp_similarity(fn, vuln_func)
        trigram=matcher.trigram_similarity(fn, vuln_func)
        sorensen_dice=matcher.sorensen_dice_similarity(fn, vuln_func)
        jaccard_distance=matcher.jaccard_distance(fn, vuln_func)

        scores=[jaro_winkler,levenshtein,ratcliff_obershelp,trigram,sorensen_dice,jaccard_distance]
        scores.sort(reverse=True) 
        code_scores[key]=scores
    
    Vulnerable_Matches[vuln_head] = code_scores

with open('code_scores.json', 'w') as f:
    f.write(json.dumps(Vulnerable_Matches, indent=6))

# MOSS

In [6]:
for k,v in vulnerable_code.items():
    vuln_func = v
    vuln_head = k

    #Normalizing the vulnerable function with us (not user code)
    normalizedvuln = normalizer.NormalizeLLVM(vuln_func)

    #for each function passing the threshold, do MOSS.
    for key in Vulnerable_Matches[vuln_head]:
        code_scores = Vulnerable_Matches[vuln_head]
        if check_function_vulnerable(threshold,code_scores[key][0],code_scores[key][1],code_scores[key][2]):
            fn = user_code[key]

            #sometimes normalizing behaves good, sometimes bad.
            normalizedfn = normalizer.NormalizeLLVM(fn)
            
            #MOSS Metrics (defined in Winnowing.py), Parameters passed: k=20, ws = 10, P=10
            MOSS_Acc_metric1, MOSS_Acc_metric2, hits, misses1, misses2 = Winnowing.diff(normalizedfn, normalizedvuln, K= 20, WindowSize= 10, P= 10)
            
            print("\n\nVulnerable function found:",key)

            #MOSS Thresholds, 0.7 for Metric1, 0.7 for Metric2, those thresholds are highly dependent on the vulnerability type unfortunately.
            if(MOSS_Acc_metric1>0.7 or MOSS_Acc_metric2>0.7):
                print(f"MOSS Caught this <3 !!!")
                print(f"Accuracy_Metric 1 = {MOSS_Acc_metric1}  ||  Accuracy_Metric 2 = {MOSS_Acc_metric2}\nhits: {hits} , misses: {misses1}, misses2: {misses2}")
            print('-----------------------')



Vulnerable function found: define void @"CWE23_Relative_Path_Traversal__char_environment_fopen_41::bad"() local_unnamed_addr {

MOSS Caught this <3 !!!
Accuracy_Metric 1 = 1.0  ||  Accuracy_Metric 2 = 0.5
hits: 4471 , misses: 0, misses2: 4471
-----------------------


Vulnerable function found: define void @"CWE23_Relative_Path_Traversal__char_environment_fopen_41::goodG2B"() local_unnamed_addr {

MOSS Caught this <3 !!!
Accuracy_Metric 1 = 0.889532293986637  ||  Accuracy_Metric 2 = 0.5317534283051525
hits: 3994 , misses: 496, misses2: 3517
-----------------------


Vulnerable function found: define void @"CWE23_Relative_Path_Traversal__char_environment_fopen_41::badSink"(i8* %data) local_unnamed_addr {

MOSS Caught this <3 !!!
Accuracy_Metric 1 = 1.0  ||  Accuracy_Metric 2 = 0.5
hits: 408 , misses: 0, misses2: 408
-----------------------


Vulnerable function found: define void @"CWE23_Relative_Path_Traversal__char_environment_fopen_41::goodG2BSink"(i8* %data) local_unnamed_addr {



In [1]:
#Cleanup

import os
files = os.listdir('./')
files = [ fi for fi in files if fi.endswith(".json") ]
for f in files:
    os.remove(f)