In [1]:
##############################################################################
#                                                                            #
#  Code for the USENIX Security '22 paper:                                   #
#  How Machine Learning Is Solving the Binary Function Similarity Problem.   #
#                                                                            #
#  MIT License                                                               #
#                                                                            #
#  Copyright (c) 2019-2022 Cisco Talos                                       #
#                                                                            #
#  Permission is hereby granted, free of charge, to any person obtaining     #
#  a copy of this software and associated documentation files (the           #
#  "Software"), to deal in the Software without restriction, including       #
#  without limitation the rights to use, copy, modify, merge, publish,       #
#  distribute, sublicense, and/or sell copies of the Software, and to        #
#  permit persons to whom the Software is furnished to do so, subject to     #
#  the following conditions:                                                 #
#                                                                            #
#  The above copyright notice and this permission notice shall be            #
#  included in all copies or substantial portions of the Software.           #
#                                                                            #
#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,           #
#  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF        #
#  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND                     #
#  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE    #
#  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION    #
#  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION     #
#  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.           #
#                                                                            #
#  Vulnerability task eval                                                   #
#                                                                            #
##############################################################################

In [2]:
import pandas as pd
import os
import json

from collections import defaultdict

In [3]:
vulnerable_functions_netgear = [
    'BN_bn2dec',                # CVE-2016-2182
    'CMS_decrypt',              # CVE-2019-1563
    'MDC2_Update',              # CVE-2016-6303
    'PKCS7_dataDecode',         # CVE-2019-1563
]

In [4]:
vulnerable_functions_tplink = [
    'BN_bn2dec',                # CVE-2016-2182
    'BN_dec2bn',                # CVE-2016-0797
    'BN_hex2bn',                # CVE-2016-0797
    'CMS_decrypt',              # CVE-2019-1563
    'EVP_EncodeUpdate',         # CVE-2016-2105
    'EVP_EncryptUpdate',        # CVE-2016-2106
    'PKCS7_dataDecode',         # CVE-2019-1563
    'SRP_VBASE_get_by_user',    # CVE-2016-0798
    'X509_NAME_oneline',        # CVE-2016-2176
]

In [5]:
VULN_DICT = {
    'NETGEAR_R7000': {
        'functions': vulnerable_functions_netgear,
        'idb_path': 'IDBs/Dataset-Vulnerability/libcrypto.so.1.0.0_NETGEAR_R7000_1.0.2h_arm32.i64'
    },
    'TP-Link_Deco-M4': {
        'functions': vulnerable_functions_tplink,
        'idb_path': 'IDBs/Dataset-Vulnerability/libcrypto.so.1.0.0_TP-Link_Deco-M4_1.0.2d_mips32.i64'
    }
}

In [6]:
VULNERABLE_LIBS = [
    'IDBs/Dataset-Vulnerability/libcrypto.so.1.0.0_openssl_1.0.2d_arm32.i64',
    'IDBs/Dataset-Vulnerability/libcrypto.so.1.0.0_openssl_1.0.2d_mips32.i64',
    'IDBs/Dataset-Vulnerability/libcrypto.so.1.0.0_openssl_1.0.2d_x64.i64',
    'IDBs/Dataset-Vulnerability/libcrypto.so.1.0.0_openssl_1.0.2d_x86.i64'
]

### Utility functions

In [7]:
def compute_rankings(df_merged, fw_name):
    result_dict = defaultdict(list)

    # Search vulnerable functions in this library
    target_lib = VULN_DICT[fw_name]['idb_path']

    # Iterate over all the vulnerable function names
    for source_func in VULN_DICT[fw_name]['functions']:
        result_dict["funcs"].append(source_func)

        # Iterate over the compiled libraries
        for source_lib in VULNERABLE_LIBS:
            c1 = (df_merged['idb_path_1'] == source_lib)
            c2 = (df_merged['func_name_1'] == source_func)
            c3 = (df_merged['idb_path_2'] == target_lib)
            df_temp = df_merged[c1 & c2 & c3]
            target_func_list = df_temp.sort_values('sim', ascending=False)['func_name_2']
            for cnt, target_func in enumerate(target_func_list):
                if target_func == source_func:
                    arch = source_lib.split("_")[-1].split(".")[0]
                    result_dict[arch].append(cnt)

    return result_dict

In [8]:
def process_results(df_testing, results_dir):
    mrr_dict = defaultdict(list)
    ranking_dict = defaultdict(list)
    arch_list = ['x86', 'x64', 'arm32', 'mips32']

    for csv_file in sorted(os.listdir(results_dir)):
        if (not csv_file.endswith(".csv")):
            continue

        print("[D] Processing {}".format(csv_file))
        df_sim = pd.read_csv(
            os.path.join(results_dir, csv_file))

        df_merged = df_testing.merge(
            df_sim,
            how='left',
            left_on=['idb_path_1', 'fva_1', 'idb_path_2', 'fva_2'],
            right_on=['idb_path_1', 'fva_1', 'idb_path_2', 'fva_2'])

        test_name = csv_file.replace("testing_Dataset-Vulnerability_", "")
        test_name = test_name.replace(".csv", "")

        mrr_dict['model'].append(test_name)
        ranking_dict['model'].append(test_name)

        for fw_name in VULN_DICT.keys():
            result_dict = compute_rankings(df_merged, fw_name)
            df = pd.DataFrame.from_dict(result_dict)

            for arch in arch_list:
                df[arch] = df[arch].apply(lambda x: x + 1)
                rank_list = df[arch].values

                # MRR@10 metric
                tmp_list = [1 / x if x <= 10 else 0 for x in rank_list]
                MRR = sum(tmp_list) / len(tmp_list)

                key = "{}:{}".format(arch, fw_name)
                mrr_dict[key].append(MRR)
                ranking_dict[key].append(';'.join([str(x) for x in rank_list]))

    return mrr_dict, ranking_dict

In [9]:
# Create output folders
!mkdir -p metrics_and_plots/Dataset-Vulnerability

In [10]:
RESULTS_DIR = "../data/Dataset-Vulnerability/"
OUTPUT_DIR = "metrics_and_plots/Dataset-Vulnerability/"

base_path = "../../DBs/Dataset-Vulnerability/pairs/"

df_testing = pd.read_csv(
    os.path.join(base_path, "pairs_testing_Dataset-Vulnerability.csv"),
    index_col=0)

mrr_dict, ranking_dict = process_results(df_testing, RESULTS_DIR)

df_mrr = pd.DataFrame.from_dict(mrr_dict)
df_mrr.to_csv(os.path.join(OUTPUT_DIR, "df_MRR@10.csv"))

df_ranking = pd.DataFrame.from_dict(ranking_dict)
df_ranking.to_csv(os.path.join(OUTPUT_DIR, "df_ranking.csv"))

[D] Processing testing_Dataset-Vulnerability_GGSNN_NoFeatures_e10.csv
[D] Processing testing_Dataset-Vulnerability_GGSNN_OPC-200_e10.csv
[D] Processing testing_Dataset-Vulnerability_GMN_NoFeatures_e16.csv
[D] Processing testing_Dataset-Vulnerability_GMN_OPC-200_e16.csv
[D] Processing testing_Dataset-Vulnerability_GNN-s2v_ArithMean_e5.csv
[D] Processing testing_Dataset-Vulnerability_GNN-s2v_AttentionMean_e5.csv
[D] Processing testing_Dataset-Vulnerability_GNN-s2v_GeminiNN_GeminiFeatures_e5.csv
[D] Processing testing_Dataset-Vulnerability_GNN-s2v_GeminiNN_NoFeatures_e5.csv
[D] Processing testing_Dataset-Vulnerability_GNN-s2v_GeminiNN_OPC-200_e5.csv
[D] Processing testing_Dataset-Vulnerability_GNN-s2v_RNN_ASM_e7.csv
[D] Processing testing_Dataset-Vulnerability_IMM:0.00_MNEM:0.00_GRAPH:1.00.csv
[D] Processing testing_Dataset-Vulnerability_IMM:0.00_MNEM:1.00_GRAPH:1.00.csv
[D] Processing testing_Dataset-Vulnerability_IMM:1.00_MNEM:1.00_GRAPH:1.00.csv
[D] Processing testing_Dataset-Vulnerabi

In [11]:
df_mrr

Unnamed: 0,model,x86:NETGEAR_R7000,x64:NETGEAR_R7000,arm32:NETGEAR_R7000,mips32:NETGEAR_R7000,x86:TP-Link_Deco-M4,x64:TP-Link_Deco-M4,arm32:TP-Link_Deco-M4,mips32:TP-Link_Deco-M4
0,GGSNN_NoFeatures_e10,0.25,0.027778,0.53125,0.25,0.219577,0.203704,0.122222,0.265873
1,GGSNN_OPC-200_e10,0.333333,0.319444,0.5625,0.3,0.492593,0.555556,0.355996,0.611111
2,GMN_NoFeatures_e16,0.645833,0.425,0.6875,0.535714,0.444444,0.472222,0.318519,0.316667
3,GMN_OPC-200_e16,0.875,0.535714,1.0,0.785714,0.666667,0.734568,0.703704,0.777778
4,GNN-s2v_ArithMean_e5,0.1,0.083333,0.5,0.0,0.05,0.055556,0.027778,0.175926
5,GNN-s2v_AttentionMean_e5,0.05,0.03125,0.035714,0.0,0.0,0.029762,0.040123,0.268519
6,GNN-s2v_GeminiNN_GeminiFeatures_e5,0.333333,0.041667,0.375,0.25,0.105556,0.259259,0.277778,0.108025
7,GNN-s2v_GeminiNN_NoFeatures_e5,0.0,0.0,0.027778,0.0,0.0,0.0,0.111111,0.0
8,GNN-s2v_GeminiNN_OPC-200_e5,0.33125,0.309028,0.666667,0.335714,0.388889,0.277778,0.361111,0.592593
9,GNN-s2v_RNN_ASM_e7,0.0,0.0,0.025,0.0,0.027778,0.111111,0.077778,0.138889


In [12]:
df_ranking

Unnamed: 0,model,x86:NETGEAR_R7000,x64:NETGEAR_R7000,arm32:NETGEAR_R7000,mips32:NETGEAR_R7000,x86:TP-Link_Deco-M4,x64:TP-Link_Deco-M4,arm32:TP-Link_Deco-M4,mips32:TP-Link_Deco-M4
0,GGSNN_NoFeatures_e10,59;23;1;12,82;28;523;9,24;1;1;8,537;62;1;12,7;2;3;58;51;142;17;1;18,40;3;2;60;56;142;15;1;18,23;16;12;10;18;144;13;1;14,265;7;101;77;2;2;21;1;4
1,GGSNN_OPC-200_e10,18;3;1;32,40;9;1;6,44;4;1;1,138;97;1;5,26;1;2;42;1;2;3;10;1,45;1;2;48;1;1;2;14;1,50;4;5;35;7;1;2;9;1,135;1;1;111;1;1;2;38;1
2,GMN_NoFeatures_e16,3;4;1;1,2;5;52;1,4;2;1;1,12;7;1;1,1;1;29;84;109;3;3;1;3,1;1;3;104;104;3;4;1;3,6;1;40;67;116;4;5;1;4,33;4;118;69;32;1;2;1;10
3,GMN_OPC-200_e16,2;1;1;1,7;1;30;1,1;1;1;1,7;1;1;1,22;1;1;24;2;1;2;1;1,9;1;1;79;2;1;1;1;1,3;1;1;60;33;1;1;1;1,1;1;3;3;1;1;3;1;1
4,GNN-s2v_ArithMean_e5,5;37;5;76,39;3;45;35,70;30;1;1,100;76;54;74,91;4;157;131;52;15;29;393;5,129;52;136;420;277;42;34;404;2,56;94;101;416;129;157;78;102;4,14;43;136;17;1;34;4;355;3
5,GNN-s2v_AttentionMean_e5,5;1111;275;50,14;195;158;8,7;1123;54;13,14;438;661;25,169;295;71;1561;92;78;74;31;13,148;53;32;530;98;54;28;8;7,119;219;139;1066;130;200;67;4;9,22;1;49;273;2;73;2;6;4
6,GNN-s2v_GeminiNN_GeminiFeatures_e5,155;432;1;3,60;372;66;6,77;356;1;2,308;339;1;15,64;59;125;863;142;166;5;2;4,13;20;1;588;193;53;3;1;16,23;76;117;1429;62;208;2;1;1,203;9;146;325;11;21;4;2;9
7,GNN-s2v_GeminiNN_NoFeatures_e5,160;1295;12;13,475;1159;169;535,690;1149;9;232,198;1169;852;67,152;60;407;767;303;446;335;388;94,562;106;260;108;603;185;110;450;336,1045;709;330;1084;1;339;115;170;92,365;371;70;619;347;215;276;614;139
8,GNN-s2v_GeminiNN_OPC-200_e5,36;8;5;1,8;9;14;1,6;2;1;1,7;35;1;5,26;1;14;32;1;51;1;28;2,6;2;16;21;1;19;2;16;3,1;1;13;36;2;15;4;65;2,1;1;19;37;1;19;1;3;1
9,GNN-s2v_RNN_ASM_e7,102;113;312;43,102;297;423;83,10;153;145;15,19;56;389;12,111;24;17;408;168;115;23;4;90,107;29;29;667;134;197;54;1;196,12;29;28;556;145;162;5;2;73,42;45;27;449;42;64;4;1;116
