In [3]:
##############################################################################
#                                                                            #
#  Code for the USENIX Security '22 paper:                                   #
#  How Machine Learning Is Solving the Binary Function Similarity Problem.   #
#                                                                            #
#  MIT License                                                               #
#                                                                            #
#  Copyright (c) 2019-2022 Cisco Talos                                       #
#                                                                            #
#  Permission is hereby granted, free of charge, to any person obtaining     #
#  a copy of this software and associated documentation files (the           #
#  "Software"), to deal in the Software without restriction, including       #
#  without limitation the rights to use, copy, modify, merge, publish,       #
#  distribute, sublicense, and/or sell copies of the Software, and to        #
#  permit persons to whom the Software is furnished to do so, subject to     #
#  the following conditions:                                                 #
#                                                                            #
#  The above copyright notice and this permission notice shall be            #
#  included in all copies or substantial portions of the Software.           #
#                                                                            #
#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,           #
#  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF        #
#  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND                     #
#  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE    #
#  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION    #
#  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION     #
#  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.           #
#                                                                            #
#  Evaluation notebook                                                       #
#                                                                            #
##############################################################################

In [4]:
import json
import numpy as np
import os
import pandas as pd

from IPython.display import display
from collections import defaultdict
from sklearn import metrics

import matplotlib as mpl
import matplotlib.pyplot as plt
%config InlineBackend.figure_format = 'retina'

### Utils functions

In [5]:
def process_df(df_pairs, df_similarity, is_pos):
    df_pairs = df_pairs.merge(
        df_similarity,
        how='left',
        left_on=['idb_path_1', 'fva_1',
                 'idb_path_2', 'fva_2'],
        right_on=['idb_path_1', 'fva_1', 'idb_path_2', 'fva_2'])

    if is_pos:
        # If positive pairs, the perfect similarity is 1
        df_pairs['gt'] = [1] * df_pairs.shape[0]
    else:
        # if negative pairs, the perfect similarity is 0
        df_pairs['gt'] = [-1] * df_pairs.shape[0]

    return df_pairs

### Utility functions for plotting

In [6]:
def plot_roc_values(df_pos_input, df_neg_input, plot=None, plt_cosine_sim=False):
    task_list = sorted(list(set(df_pos_input['db_type'])))
    result_list = list()

    for task in task_list:
        # Filter test data by test case
        df_pos = df_pos_input[df_pos_input['db_type'] == task]
        df_neg = df_neg_input[df_neg_input['db_type'] == task]

        if plt_cosine_sim:
            print("Processing task: {}".format(task))
            display(df_pos['cs'].hist(bins=200))
            display(df_neg['cs'].hist(bins=200, alpha=0.8))
            plt.show()
            continue

        # Get predicted and ground truth value
        pred_list = list(df_pos['cs'].values) + \
            list(df_neg['cs'].values)
        gt_list = list(df_pos['gt'].values) + list(df_neg['gt'].values)

        # Compute the ROC AUC
        fpr, tpr, thresholds = metrics.roc_curve(gt_list, pred_list)
        roc_auc = metrics.roc_auc_score(gt_list, pred_list)
        print('%20s - AUC = %0.2f' % (task, roc_auc))
        result_list.append([
            "%20s" % (task),
            "%0.2f" % (roc_auc)])

        if plot:
            plt.plot(fpr,
                     tpr,
                     linewidth=1.0,
                     label='AUC = %0.2f' % (roc_auc))

        if plot:
            plt.title("Task: {}".format(task))
            plt.legend(loc='lower right')
            plt.plot([0, 1], [0, 1], 'k--')
            plt.xlim([0, 1])
            plt.ylim([0, 1])
            plt.ylabel('True Positive Rate')
            plt.xlabel('False Positive Rate')
            # plt.savefig("output/{}split_{}.png".format(split, task), dpi=300)
            plt.show()

    return result_list

## Compute ranking metrics

In [7]:
def get_ranking(pos, neg_results):

    # Sort the negative results in descending mode
    neg_results = sorted(neg_results, reverse=True)

    # BUG FIX:
    if pos == 0:
        return len(neg_results) + 1

    # Find the first position where the pos element is >=
    for c, i in enumerate(neg_results):
        if pos >= i:
            return c +1

    # Otherwise return the last position +1 
    return len(neg_results) + 1

In [8]:
def compute_ranking_metrics(df_pos_ranking_input, df_neg_ranking_input):
    ranking_result_dict = defaultdict(list)
    TOP_LIST = [1] + list(range(5, 55, 5))
    task_list = list(set(df_pos_ranking_input['db_type']))

    for task in sorted(task_list):
        # Filter test data by task
        df_pos = df_pos_ranking_input[
            df_pos_ranking_input['db_type'] == task]
        df_neg = df_neg_ranking_input[
            df_neg_ranking_input['db_type'] == task]

        # display(df_pos['cs'].hist(bins=200))
        # display(df_neg['cs'].hist(bins=200, alpha=0.8))
        # plt.show()

        num_test_cases = df_pos.shape[0]

        # Compute the ranking for all the positive test cases
        ranking_list = list()
        for idx, group in df_neg.groupby(['idb_path_1', 'fva_1']):
            c1 = (df_pos['idb_path_1'] == idx[0])
            c2 = (df_pos['fva_1'] == idx[1])
            pos_pred = df_pos[c1 & c2]['cs'].values[0]
            neg_pred = sorted(group['cs'].values, reverse=True)
            rank = get_ranking(pos_pred, neg_pred)
            ranking_list.append(rank)

        # Compute the ranking list
        top_results = list()
        for tt in TOP_LIST:
            r = len([x for x in ranking_list if x <= tt])
            top_results.append(r)

        # MRR metric
        MRR_list = [1 / x if x <= 10 else 0 for x in ranking_list]
        MRR = sum(MRR_list) / len(MRR_list)

        # Save data in a temporary dictionary
        ranking_result_dict["task"].append(task)
        ranking_result_dict["num_test_cases"].append(num_test_cases)
        for tt, tr in zip(TOP_LIST, top_results):
            ranking_result_dict["cc_top_{}".format(tt)].append(tr)
        ranking_result_dict["MRR"].append(MRR)

    df_ranking = pd.DataFrame.from_dict(ranking_result_dict)
    return df_ranking

## NN results (similarity is already available)

In [14]:
!mkdir -p _output/Dataset-2

In [15]:
RESULTS_DIR = "../data/Dataset-2/"
OUTPUT_DIR = "_output/Dataset-2/"

global_results_tc0 = dict()
global_results_tc1 = dict()
global_results_tc2 = dict()

mrr_recall_tc0 = dict()
mrr_recall_tc1 = dict()
mrr_recall_tc2 = dict()

for csv_file in os.listdir(RESULTS_DIR):
    if (not csv_file.endswith(".csv")) or \
        (not csv_file.startswith("pos_")) or \
            (not "_rank_" in csv_file):
        continue

    print("Processing {}".format(csv_file))

    # Load data
    df_pos_ranking_input = pd.read_csv(
        os.path.join(RESULTS_DIR, csv_file))

    df_neg_ranking_input = pd.read_csv(
        os.path.join(RESULTS_DIR, csv_file.replace("pos_", "neg_")))

    if 'cs' not in set(df_pos_ranking_input.columns):
        df_pos_ranking_input.rename(columns={'sim': 'cs'}, inplace=True)
        df_neg_ranking_input.rename(columns={'sim': 'cs'}, inplace=True)

    # Plot the similarity values
    print(
        df_pos_ranking_input['cs'].mean(),
        df_neg_ranking_input['cs'].mean())

    # display(df_pos_ranking_input['cs'].hist(bins=200, range=[-10, 0]))
    # display(df_neg_ranking_input['cs'].hist(bins=200, alpha=0.8, range=[-10, 0]))
    # plt.show()

    # Check that similarity values are not 0
    assert(df_pos_ranking_input.isna().sum()['cs'] == 0)
    assert(df_neg_ranking_input.isna().sum()['cs'] == 0)

    rank_df = compute_ranking_metrics(df_pos_ranking_input, df_neg_ranking_input)
    key = csv_file.replace("pos_", "").replace("neg_", "").replace(".csv", "")

    global_results_tc0[key] = list(rank_df.loc[0][3:-1].values)
    global_results_tc1[key] = list(rank_df.loc[1][3:-1].values)
    global_results_tc2[key] = list(rank_df.loc[2][3:-1].values)

    rank_df["recall@1"] = rank_df['cc_top_1']/200
    display(rank_df)

    mrr_recall_tc0[key] = list(rank_df.loc[0][['MRR', "recall@1"]])
    mrr_recall_tc1[key] = list(rank_df.loc[1][['MRR', "recall@1"]])
    mrr_recall_tc2[key] = list(rank_df.loc[2][['MRR', "recall@1"]])

    print(key)
    display(rank_df.loc[0][['MRR', "recall@1"]])
    display(rank_df.loc[1][['MRR', "recall@1"]])
    display(rank_df.loc[2][['MRR', "recall@1"]])

    print("\n")

Processing pos_rank_testing_Dataset-2_SAFE_ASM-list_Trainable_e10.csv
0.6142685669880001 -0.5273025614075466


Unnamed: 0,task,num_test_cases,cc_top_1,cc_top_5,cc_top_10,cc_top_15,cc_top_20,cc_top_25,cc_top_30,cc_top_35,cc_top_40,cc_top_45,cc_top_50,MRR,recall@1
0,1,200,51,119,150,162,173,179,183,183,187,191,194,0.397887,0.255
1,2,200,49,134,169,184,190,192,192,195,197,197,198,0.426571,0.245
2,3,200,46,109,143,157,168,181,183,185,189,191,191,0.372282,0.23


rank_testing_Dataset-2_SAFE_ASM-list_Trainable_e10


MRR         0.397887
recall@1    0.255000
Name: 0, dtype: float64

MRR         0.426571
recall@1    0.245000
Name: 1, dtype: float64

MRR         0.372282
recall@1    0.230000
Name: 2, dtype: float64



Processing pos_rank_testing_Dataset-2_pvdm_e10.csv
0.39526670283399257 0.2670229608998305


Unnamed: 0,task,num_test_cases,cc_top_1,cc_top_5,cc_top_10,cc_top_15,cc_top_20,cc_top_25,cc_top_30,cc_top_35,cc_top_40,cc_top_45,cc_top_50,MRR,recall@1
0,XA,200,9,26,42,60,74,91,103,118,130,141,151,0.081915,0.045
1,XA+XO,200,37,56,74,92,104,117,124,132,138,142,153,0.23129,0.185
2,XO,200,102,162,181,183,188,191,193,193,195,195,196,0.638127,0.51


rank_testing_Dataset-2_pvdm_e10


MRR         0.081915
recall@1       0.045
Name: 0, dtype: object

MRR         0.23129
recall@1      0.185
Name: 1, dtype: object

MRR         0.638127
recall@1        0.51
Name: 2, dtype: object



Processing pos_rank_testing_Dataset-2_SAFE_ASM-list_e5.csv
0.46631403645633335 -0.4201421523804313


Unnamed: 0,task,num_test_cases,cc_top_1,cc_top_5,cc_top_10,cc_top_15,cc_top_20,cc_top_25,cc_top_30,cc_top_35,cc_top_40,cc_top_45,cc_top_50,MRR,recall@1
0,1,200,28,95,129,155,164,170,180,187,189,191,192,0.273724,0.14
1,2,200,35,96,144,163,172,181,188,191,196,199,199,0.304744,0.175
2,3,200,39,94,124,151,163,171,179,181,183,186,190,0.307157,0.195


rank_testing_Dataset-2_SAFE_ASM-list_e5


MRR         0.273724
recall@1    0.140000
Name: 0, dtype: float64

MRR         0.304744
recall@1    0.175000
Name: 1, dtype: float64

MRR         0.307157
recall@1    0.195000
Name: 2, dtype: float64



Processing pos_rank_testing_Dataset-2_catalog1_16.csv
0.023557072855637313 0.003120488413768352


Unnamed: 0,task,num_test_cases,cc_top_1,cc_top_5,cc_top_10,cc_top_15,cc_top_20,cc_top_25,cc_top_30,cc_top_35,cc_top_40,cc_top_45,cc_top_50,MRR,recall@1
0,XA,200,11,13,13,13,13,13,13,13,13,13,13,0.06,0.055
1,XA+XO,200,27,29,29,29,29,29,29,29,29,29,29,0.13875,0.135
2,XO,200,75,103,109,109,109,109,109,109,109,109,109,0.433437,0.375


rank_testing_Dataset-2_catalog1_16


MRR          0.06
recall@1    0.055
Name: 0, dtype: object

MRR         0.13875
recall@1      0.135
Name: 1, dtype: object

MRR         0.433437
recall@1       0.375
Name: 2, dtype: object



Processing pos_rank_testing_Dataset-2_SAFE_ASM-list_250_e5.csv
0.5369679162516667 -0.2834012997230734


Unnamed: 0,task,num_test_cases,cc_top_1,cc_top_5,cc_top_10,cc_top_15,cc_top_20,cc_top_25,cc_top_30,cc_top_35,cc_top_40,cc_top_45,cc_top_50,MRR,recall@1
0,1,200,31,83,129,146,157,165,175,181,186,188,191,0.27728,0.155
1,2,200,37,101,130,157,174,183,187,192,195,197,198,0.317887,0.185
2,3,200,38,79,110,134,158,167,171,172,179,184,187,0.277381,0.19


rank_testing_Dataset-2_SAFE_ASM-list_250_e5


MRR         0.27728
recall@1    0.15500
Name: 0, dtype: float64

MRR         0.317887
recall@1    0.185000
Name: 1, dtype: float64

MRR         0.277381
recall@1    0.190000
Name: 2, dtype: float64



Processing pos_rank_testing_Dataset-2_GNN-s2v_ArithMean_e5.csv
0.6592411937849999 0.09153301796981893


Unnamed: 0,task,num_test_cases,cc_top_1,cc_top_5,cc_top_10,cc_top_15,cc_top_20,cc_top_25,cc_top_30,cc_top_35,cc_top_40,cc_top_45,cc_top_50,MRR,recall@1
0,1,200,75,115,138,153,158,168,176,180,184,187,190,0.45678,0.375
1,2,200,35,96,138,156,166,172,182,185,188,189,190,0.307538,0.175
2,3,200,65,112,131,141,152,165,172,176,180,180,182,0.418254,0.325


rank_testing_Dataset-2_GNN-s2v_ArithMean_e5


MRR         0.45678
recall@1    0.37500
Name: 0, dtype: float64

MRR         0.307538
recall@1    0.175000
Name: 1, dtype: float64

MRR         0.418254
recall@1    0.325000
Name: 2, dtype: float64



Processing pos_rank_testing_Dataset-2_GMN_NoFeatures_e16.csv
-0.28580187775194565 -8.453742438169021


Unnamed: 0,task,num_test_cases,cc_top_1,cc_top_5,cc_top_10,cc_top_15,cc_top_20,cc_top_25,cc_top_30,cc_top_35,cc_top_40,cc_top_45,cc_top_50,MRR,recall@1
0,1,200,101,151,158,172,178,182,188,190,193,193,196,0.611117,0.505
1,2,200,136,172,185,187,188,191,192,192,193,193,193,0.759115,0.68
2,3,200,117,157,169,172,178,181,186,187,188,191,197,0.668885,0.585


rank_testing_Dataset-2_GMN_NoFeatures_e16


MRR         0.611117
recall@1    0.505000
Name: 0, dtype: float64

MRR         0.759115
recall@1    0.680000
Name: 1, dtype: float64

MRR         0.668885
recall@1    0.585000
Name: 2, dtype: float64



Processing pos_rank_testing_Dataset-2_catalog1_64.csv
0.024141974429289236 0.003503835585841498


Unnamed: 0,task,num_test_cases,cc_top_1,cc_top_5,cc_top_10,cc_top_15,cc_top_20,cc_top_25,cc_top_30,cc_top_35,cc_top_40,cc_top_45,cc_top_50,MRR,recall@1
0,XA,200,12,22,27,31,32,33,33,33,33,33,33,0.078153,0.06
1,XA+XO,200,31,43,56,58,58,58,58,58,58,58,58,0.183554,0.155
2,XO,200,79,120,143,153,164,166,170,175,175,175,175,0.481558,0.395


rank_testing_Dataset-2_catalog1_64


MRR         0.078153
recall@1        0.06
Name: 0, dtype: object

MRR         0.183554
recall@1       0.155
Name: 1, dtype: object

MRR         0.481558
recall@1       0.395
Name: 2, dtype: object



Processing pos_rank_testing_Dataset-2_GMN_OPC-200_e16.csv
-1.1329834932313168 -40.521035114894666


Unnamed: 0,task,num_test_cases,cc_top_1,cc_top_5,cc_top_10,cc_top_15,cc_top_20,cc_top_25,cc_top_30,cc_top_35,cc_top_40,cc_top_45,cc_top_50,MRR,recall@1
0,1,200,132,170,182,189,191,193,194,194,196,196,197,0.746484,0.66
1,2,200,154,186,192,196,196,197,197,197,197,198,198,0.837728,0.77
2,3,200,122,169,179,181,184,188,189,190,190,191,192,0.705401,0.61


rank_testing_Dataset-2_GMN_OPC-200_e16


MRR         0.746484
recall@1    0.660000
Name: 0, dtype: float64

MRR         0.837728
recall@1    0.770000
Name: 1, dtype: float64

MRR         0.705401
recall@1    0.610000
Name: 2, dtype: float64



Processing pos_rank_testing_Dataset-2_GGSNN_OPC-200_e10.csv
-0.3878217642658504 -4.10225586837797


Unnamed: 0,task,num_test_cases,cc_top_1,cc_top_5,cc_top_10,cc_top_15,cc_top_20,cc_top_25,cc_top_30,cc_top_35,cc_top_40,cc_top_45,cc_top_50,MRR,recall@1
0,1,200,114,166,181,187,190,192,197,197,198,199,199,0.674242,0.57
1,2,200,145,177,189,192,194,196,198,199,199,199,200,0.794752,0.725
2,3,200,114,159,175,179,185,187,188,189,190,191,191,0.671808,0.57


rank_testing_Dataset-2_GGSNN_OPC-200_e10


MRR         0.674242
recall@1    0.570000
Name: 0, dtype: float64

MRR         0.794752
recall@1    0.725000
Name: 1, dtype: float64

MRR         0.671808
recall@1    0.570000
Name: 2, dtype: float64



Processing pos_rank_testing_Dataset-2_GNN-s2v_GeminiNN_OPC-200_e5.csv
0.7502357609443334 0.0737592036181557


Unnamed: 0,task,num_test_cases,cc_top_1,cc_top_5,cc_top_10,cc_top_15,cc_top_20,cc_top_25,cc_top_30,cc_top_35,cc_top_40,cc_top_45,cc_top_50,MRR,recall@1
0,1,200,95,149,166,177,183,186,189,191,192,195,199,0.584246,0.475
1,2,200,84,156,179,186,189,194,196,197,197,198,199,0.566784,0.42
2,3,200,94,147,164,174,179,179,184,185,186,187,189,0.584488,0.47


rank_testing_Dataset-2_GNN-s2v_GeminiNN_OPC-200_e5


MRR         0.584246
recall@1    0.475000
Name: 0, dtype: float64

MRR         0.566784
recall@1    0.420000
Name: 1, dtype: float64

MRR         0.584488
recall@1    0.470000
Name: 2, dtype: float64



Processing pos_rank_testing_Dataset-2_GGSNN_NoFeatures_e10.csv
-0.5086053852772437 -4.098323877258298


Unnamed: 0,task,num_test_cases,cc_top_1,cc_top_5,cc_top_10,cc_top_15,cc_top_20,cc_top_25,cc_top_30,cc_top_35,cc_top_40,cc_top_45,cc_top_50,MRR,recall@1
0,1,200,87,136,154,164,175,185,186,189,189,190,193,0.539976,0.435
1,2,200,123,166,176,180,183,187,190,192,194,194,195,0.707573,0.615
2,3,200,98,145,159,165,171,175,180,187,189,189,189,0.588673,0.49


rank_testing_Dataset-2_GGSNN_NoFeatures_e10


MRR         0.539976
recall@1    0.435000
Name: 0, dtype: float64

MRR         0.707573
recall@1    0.615000
Name: 1, dtype: float64

MRR         0.588673
recall@1    0.490000
Name: 2, dtype: float64



Processing pos_rank_testing_Dataset-2_GNN-s2v_AttentionMean_e5.csv
0.6317907053650667 0.09324779275972171


Unnamed: 0,task,num_test_cases,cc_top_1,cc_top_5,cc_top_10,cc_top_15,cc_top_20,cc_top_25,cc_top_30,cc_top_35,cc_top_40,cc_top_45,cc_top_50,MRR,recall@1
0,1,200,57,96,125,140,151,160,167,172,176,181,183,0.374486,0.285
1,2,200,34,95,123,145,162,173,179,180,185,187,188,0.291028,0.17
2,3,200,54,97,120,135,145,155,163,169,172,177,180,0.35872,0.27


rank_testing_Dataset-2_GNN-s2v_AttentionMean_e5


MRR         0.374486
recall@1    0.285000
Name: 0, dtype: float64

MRR         0.291028
recall@1    0.170000
Name: 1, dtype: float64

MRR         0.35872
recall@1    0.27000
Name: 2, dtype: float64



Processing pos_rank_testing_Dataset-2_catalog1_128.csv
0.023952992871835676 0.0037233321955235216


Unnamed: 0,task,num_test_cases,cc_top_1,cc_top_5,cc_top_10,cc_top_15,cc_top_20,cc_top_25,cc_top_30,cc_top_35,cc_top_40,cc_top_45,cc_top_50,MRR,recall@1
0,XA,200,11,18,27,31,32,36,38,38,38,38,38,0.070879,0.055
1,XA+XO,200,28,41,53,65,68,69,69,69,69,69,69,0.173113,0.14
2,XO,200,83,120,145,160,166,172,175,181,186,186,188,0.496663,0.415


rank_testing_Dataset-2_catalog1_128


MRR         0.070879
recall@1       0.055
Name: 0, dtype: object

MRR         0.173113
recall@1        0.14
Name: 1, dtype: object

MRR         0.496663
recall@1       0.415
Name: 2, dtype: object



Processing pos_rank_testing_Dataset-2_Trex.csv
0.6712788228411227 0.14755692012165436


Unnamed: 0,task,num_test_cases,cc_top_1,cc_top_5,cc_top_10,cc_top_15,cc_top_20,cc_top_25,cc_top_30,cc_top_35,cc_top_40,cc_top_45,cc_top_50,MRR,recall@1
0,1,200,100,152,171,181,187,188,188,190,191,192,194,0.607512,0.5
1,2,200,75,135,166,179,186,187,191,193,194,195,198,0.500726,0.375
2,3,200,92,126,143,158,172,178,179,181,186,188,190,0.527923,0.46


rank_testing_Dataset-2_Trex


MRR         0.607512
recall@1    0.500000
Name: 0, dtype: float64

MRR         0.500726
recall@1    0.375000
Name: 1, dtype: float64

MRR         0.527923
recall@1    0.460000
Name: 2, dtype: float64



Processing pos_rank_testing_Dataset-2_IMM:1.00_MNEM:1.00_GRAPH:1.00.csv
0.593125 0.54632578125


Unnamed: 0,task,num_test_cases,cc_top_1,cc_top_5,cc_top_10,cc_top_15,cc_top_20,cc_top_25,cc_top_30,cc_top_35,cc_top_40,cc_top_45,cc_top_50,MRR,recall@1
0,XA,200,19,42,73,87,96,109,119,126,138,146,153,0.158385,0.095
1,XA+XO,200,27,53,74,87,98,104,116,123,134,139,146,0.199474,0.135
2,XO,200,46,82,98,108,122,131,135,151,158,165,173,0.301831,0.23


rank_testing_Dataset-2_IMM:1.00_MNEM:1.00_GRAPH:1.00


MRR         0.158385
recall@1       0.095
Name: 0, dtype: object

MRR         0.199474
recall@1       0.135
Name: 1, dtype: object

MRR         0.301831
recall@1        0.23
Name: 2, dtype: object



Processing pos_rank_testing_Dataset-2_Zeek.csv
0.8433154203150001 0.250410992916975


Unnamed: 0,task,num_test_cases,cc_top_1,cc_top_5,cc_top_10,cc_top_15,cc_top_20,cc_top_25,cc_top_30,cc_top_35,cc_top_40,cc_top_45,cc_top_50,MRR,recall@1
0,1,200,56,129,152,169,176,185,187,188,190,192,193,0.415024,0.28
1,2,200,61,129,163,175,185,191,194,195,195,196,196,0.454284,0.305
2,3,200,42,117,148,162,170,176,178,185,188,192,192,0.356452,0.21


rank_testing_Dataset-2_Zeek


MRR         0.415024
recall@1    0.280000
Name: 0, dtype: float64

MRR         0.454284
recall@1    0.305000
Name: 1, dtype: float64

MRR         0.356452
recall@1    0.210000
Name: 2, dtype: float64



Processing pos_rank_testing_Dataset-2_GNN-s2v_GeminiNN_NoFeatures_e5.csv
0.5818228831783333 0.07064103525037034


Unnamed: 0,task,num_test_cases,cc_top_1,cc_top_5,cc_top_10,cc_top_15,cc_top_20,cc_top_25,cc_top_30,cc_top_35,cc_top_40,cc_top_45,cc_top_50,MRR,recall@1
0,1,200,23,49,75,84,94,106,125,140,153,159,166,0.177333,0.115
1,2,200,25,62,81,89,101,113,121,132,144,154,161,0.199875,0.125
2,3,200,31,65,91,106,112,123,130,138,146,155,162,0.231339,0.155


rank_testing_Dataset-2_GNN-s2v_GeminiNN_NoFeatures_e5


MRR         0.177333
recall@1    0.115000
Name: 0, dtype: float64

MRR         0.199875
recall@1    0.125000
Name: 1, dtype: float64

MRR         0.231339
recall@1    0.155000
Name: 2, dtype: float64



Processing pos_rank_testing_Dataset-2_GNN-s2v_RNN_ASM_e7.csv
0.3684139767936 -0.12886565391437324


Unnamed: 0,task,num_test_cases,cc_top_1,cc_top_5,cc_top_10,cc_top_15,cc_top_20,cc_top_25,cc_top_30,cc_top_35,cc_top_40,cc_top_45,cc_top_50,MRR,recall@1
0,1,200,37,102,134,158,168,176,179,180,183,187,190,0.319742,0.185
1,2,200,36,111,147,160,176,182,186,190,193,197,197,0.348812,0.18
2,3,200,45,104,130,148,159,167,174,183,185,187,188,0.347129,0.225


rank_testing_Dataset-2_GNN-s2v_RNN_ASM_e7


MRR         0.319742
recall@1    0.185000
Name: 0, dtype: float64

MRR         0.348812
recall@1    0.180000
Name: 1, dtype: float64

MRR         0.347129
recall@1    0.225000
Name: 2, dtype: float64



Processing pos_rank_testing_Dataset-2_GNN-s2v_GeminiNN_GeminiFeatures_e5.csv
0.7897264380288334 0.05200536373876809


Unnamed: 0,task,num_test_cases,cc_top_1,cc_top_5,cc_top_10,cc_top_15,cc_top_20,cc_top_25,cc_top_30,cc_top_35,cc_top_40,cc_top_45,cc_top_50,MRR,recall@1
0,1,200,94,138,159,166,171,178,186,188,190,191,192,0.56995,0.47
1,2,200,127,180,191,194,195,197,198,199,200,200,200,0.74099,0.635
2,3,200,98,134,152,164,171,176,184,186,188,191,193,0.573058,0.49


rank_testing_Dataset-2_GNN-s2v_GeminiNN_GeminiFeatures_e5


MRR         0.56995
recall@1    0.47000
Name: 0, dtype: float64

MRR         0.74099
recall@1    0.63500
Name: 1, dtype: float64

MRR         0.573058
recall@1    0.490000
Name: 2, dtype: float64



Processing pos_rank_testing_Dataset-2_SAFE_ASM-list_Rand_Trainable_e10.csv
0.43983728911 -0.5606094369772907


Unnamed: 0,task,num_test_cases,cc_top_1,cc_top_5,cc_top_10,cc_top_15,cc_top_20,cc_top_25,cc_top_30,cc_top_35,cc_top_40,cc_top_45,cc_top_50,MRR,recall@1
0,1,200,27,97,128,152,163,171,175,180,181,182,185,0.282762,0.135
1,2,200,33,115,152,170,177,181,187,189,193,195,198,0.332353,0.165
2,3,200,41,89,131,146,160,167,174,179,180,182,186,0.309502,0.205


rank_testing_Dataset-2_SAFE_ASM-list_Rand_Trainable_e10


MRR         0.282762
recall@1    0.135000
Name: 0, dtype: float64

MRR         0.332353
recall@1    0.165000
Name: 1, dtype: float64

MRR         0.309502
recall@1    0.205000
Name: 2, dtype: float64



Processing pos_rank_testing_Dataset-2_catalog1_32.csv
0.02391061084872201 0.003371267345890956


Unnamed: 0,task,num_test_cases,cc_top_1,cc_top_5,cc_top_10,cc_top_15,cc_top_20,cc_top_25,cc_top_30,cc_top_35,cc_top_40,cc_top_45,cc_top_50,MRR,recall@1
0,XA,200,14,22,23,24,25,25,25,25,25,25,25,0.085792,0.07
1,XA+XO,200,34,44,48,48,48,48,48,48,48,48,48,0.193214,0.17
2,XO,200,77,118,138,141,143,144,144,144,144,144,144,0.4715,0.385


rank_testing_Dataset-2_catalog1_32


MRR         0.085792
recall@1        0.07
Name: 0, dtype: object

MRR         0.193214
recall@1        0.17
Name: 1, dtype: object

MRR         0.4715
recall@1     0.385
Name: 2, dtype: object



Processing pos_rank_testing_Dataset-2_IMM:4.00_MNEM:0.05_GRAPH:1.00.csv
0.6152994791666667 0.5333334635416667


Unnamed: 0,task,num_test_cases,cc_top_1,cc_top_5,cc_top_10,cc_top_15,cc_top_20,cc_top_25,cc_top_30,cc_top_35,cc_top_40,cc_top_45,cc_top_50,MRR,recall@1
0,XA,200,45,78,106,123,130,140,144,147,157,159,168,0.304218,0.225
1,XA+XO,200,57,88,107,119,130,144,155,163,167,175,178,0.357482,0.285
2,XO,200,71,104,118,129,134,141,149,156,164,166,170,0.425532,0.355


rank_testing_Dataset-2_IMM:4.00_MNEM:0.05_GRAPH:1.00


MRR         0.304218
recall@1       0.225
Name: 0, dtype: object

MRR         0.357482
recall@1       0.285
Name: 1, dtype: object

MRR         0.425532
recall@1       0.355
Name: 2, dtype: object



Processing pos_rank_testing_Dataset-2_IMM:0.00_MNEM:1.00_GRAPH:1.00.csv
0.5947005208333334 0.54725


Unnamed: 0,task,num_test_cases,cc_top_1,cc_top_5,cc_top_10,cc_top_15,cc_top_20,cc_top_25,cc_top_30,cc_top_35,cc_top_40,cc_top_45,cc_top_50,MRR,recall@1
0,XA,200,17,48,67,81,99,114,123,129,140,151,156,0.149855,0.085
1,XA+XO,200,29,59,73,86,95,102,112,125,134,140,142,0.20579,0.145
2,XO,200,46,75,90,109,120,129,139,151,158,164,170,0.289071,0.23


rank_testing_Dataset-2_IMM:0.00_MNEM:1.00_GRAPH:1.00


MRR         0.149855
recall@1       0.085
Name: 0, dtype: object

MRR         0.20579
recall@1      0.145
Name: 1, dtype: object

MRR         0.289071
recall@1        0.23
Name: 2, dtype: object



Processing pos_rank_testing_Dataset-2_asm2vec_e10.csv
0.29192020466353796 0.13485785988781598


Unnamed: 0,task,num_test_cases,cc_top_1,cc_top_5,cc_top_10,cc_top_15,cc_top_20,cc_top_25,cc_top_30,cc_top_35,cc_top_40,cc_top_45,cc_top_50,MRR,recall@1
0,XA,200,3,29,46,65,82,93,108,116,125,131,146,0.066135,0.015
1,XA+XO,200,35,52,82,94,103,111,120,124,132,139,150,0.224536,0.175
2,XO,200,98,151,171,178,182,186,188,190,191,194,196,0.601079,0.49


rank_testing_Dataset-2_asm2vec_e10


MRR         0.066135
recall@1       0.015
Name: 0, dtype: object

MRR         0.224536
recall@1       0.175
Name: 1, dtype: object

MRR         0.601079
recall@1        0.49
Name: 2, dtype: object



Processing pos_rank_testing_Dataset-2_IMM:0.00_MNEM:0.00_GRAPH:1.00.csv
0.6206770833333334 0.5493744791666667


Unnamed: 0,task,num_test_cases,cc_top_1,cc_top_5,cc_top_10,cc_top_15,cc_top_20,cc_top_25,cc_top_30,cc_top_35,cc_top_40,cc_top_45,cc_top_50,MRR,recall@1
0,XA,200,51,90,106,128,142,148,159,165,169,172,175,0.346323,0.255
1,XA+XO,200,51,79,103,116,129,136,144,152,157,163,168,0.318266,0.255
2,XO,200,35,75,98,108,123,133,141,151,156,160,165,0.262012,0.175


rank_testing_Dataset-2_IMM:0.00_MNEM:0.00_GRAPH:1.00


MRR         0.346323
recall@1       0.255
Name: 0, dtype: object

MRR         0.318266
recall@1       0.255
Name: 1, dtype: object

MRR         0.262012
recall@1       0.175
Name: 2, dtype: object



Processing pos_rank_testing_Dataset-2_pvdbow_e10.csv
0.3947160890936025 0.26665465302400787


Unnamed: 0,task,num_test_cases,cc_top_1,cc_top_5,cc_top_10,cc_top_15,cc_top_20,cc_top_25,cc_top_30,cc_top_35,cc_top_40,cc_top_45,cc_top_50,MRR,recall@1
0,XA,200,6,27,38,56,66,86,103,118,126,145,153,0.072512,0.03
1,XA+XO,200,39,53,73,85,98,111,123,132,138,146,154,0.229296,0.195
2,XO,200,100,159,176,182,188,189,190,193,195,196,197,0.625468,0.5


rank_testing_Dataset-2_pvdbow_e10


MRR         0.072512
recall@1        0.03
Name: 0, dtype: object

MRR         0.229296
recall@1       0.195
Name: 1, dtype: object

MRR         0.625468
recall@1         0.5
Name: 2, dtype: object





In [16]:
df_mrr_recall = pd.DataFrame(mrr_recall_tc0)
df_mrr_recall = df_mrr_recall.transpose()
df_mrr_recall = df_mrr_recall.rename(columns={0:"MRR", 1:"recall@1"})
df_mrr_recall.to_csv(os.path.join(OUTPUT_DIR, "df_mrr_recall_tc1.csv"))

df_mrr_recall = pd.DataFrame(mrr_recall_tc1)
df_mrr_recall = df_mrr_recall.transpose()
df_mrr_recall = df_mrr_recall.rename(columns={0:"MRR", 1:"recall@1"})
df_mrr_recall.to_csv(os.path.join(OUTPUT_DIR, "df_mrr_recall_tc2.csv"))

df_mrr_recall = pd.DataFrame(mrr_recall_tc2)
df_mrr_recall = df_mrr_recall.transpose()
df_mrr_recall = df_mrr_recall.rename(columns={0:"MRR", 1:"recall@1"})
df_mrr_recall.to_csv(os.path.join(OUTPUT_DIR, "df_mrr_recall_tc3.csv"))

df_rank = pd.DataFrame(global_results_tc0)/200
df_rank.index = [(x+1) * 5 for x in df_rank.index]
df_rank.to_csv(os.path.join(OUTPUT_DIR, "df_rank_tc1.csv"))

df_rank = pd.DataFrame(global_results_tc1)/200
df_rank.index = [(x+1) * 5 for x in df_rank.index]
df_rank.to_csv(os.path.join(OUTPUT_DIR, "df_rank_tc2.csv"))

df_rank = pd.DataFrame(global_results_tc2)/200
df_rank.index = [(x+1) * 5 for x in df_rank.index]
df_rank.to_csv(os.path.join(OUTPUT_DIR, "df_rank_tc3.csv"))

## AUC

In [17]:
base_path = "../../DBs/Dataset-2/pairs/"

df_pos_testing = pd.read_csv(
    os.path.join(base_path, "pos_testing_Dataset-2.csv"),
    index_col=0)

df_neg_testing = pd.read_csv(
    os.path.join(base_path, "neg_testing_Dataset-2.csv"),
    index_col=0)

global_results = list()

for csv_file in os.listdir(RESULTS_DIR):
    if (not csv_file.endswith(".csv")) or \
        (not "df_pos" in csv_file) or \
        ("rank" in csv_file):
        continue

    print("Processing {}".format(csv_file))

    df_pos_test = pd.read_csv(
        os.path.join(RESULTS_DIR, csv_file),
        index_col=0)

    df_neg_test = pd.read_csv(
        os.path.join(RESULTS_DIR, csv_file.replace("df_pos", "df_neg")),
        index_col=0)
    
    if 'cs' not in set(df_pos_test.columns):
        df_pos_test.rename(columns={'sim':'cs'}, inplace=True)
        df_neg_test.rename(columns={'sim':'cs'}, inplace=True)

    if 'db_type' in df_pos_test.columns:
        del df_pos_test['db_type']

    if 'db_type' in df_neg_test.columns:
        del df_neg_test['db_type']

    # Merge
    df_pos_testing_m = process_df(
        df_pos_testing, df_pos_test, is_pos=True)
    df_neg_testing_m = process_df(
        df_neg_testing, df_neg_test, is_pos=False)
    
#     display(df_pos_test['cs'].hist(bins=200, range=[-2, +2]))
#     display(df_neg_test['cs'].hist(bins=200, alpha=0.8, range=[-2, +2]))
#     plt.show()

    display(df_pos_test['cs'].hist(bins=200))
    display(df_neg_test['cs'].hist(bins=200, alpha=0.8))
    plt.show()

    # Debug
    assert(df_pos_testing_m.isna().sum()['cs'] == 0)
    assert(df_neg_testing_m.isna().sum()['cs'] == 0)

    result_list = list()
    
    result_list.append(['title', csv_file.replace("pos_", "").replace("neg_", "").replace(".csv", "")])

    ll = plot_roc_values(df_pos_testing_m, df_neg_testing_m, plot=False)
    result_list.extend(ll)
    print("\n")

    global_results.append(result_list)


In [18]:
pd_temp_dict = defaultdict(list)
for xr in global_results:
    columns_set = set()
    columns = [x[0].replace(' - all', '') for x in xr]
    values = [x[1].replace(' - all', '') for x in xr]
    for c, v in zip(columns, values):
        if c in columns_set:
            continue
        columns_set.add(c)
        pd_temp_dict[c].append(v)
df_auc = pd.DataFrame.from_dict(pd_temp_dict)
df_auc = df_auc.rename(columns={"title":"model_name",
                                df_auc.columns[1]:"AUC X-OPT",
                                df_auc.columns[2]:"X-ARCH",
                                df_auc.columns[3]:"X-MIXED"})
display(df_auc)
df_auc.to_csv(os.path.join(OUTPUT_DIR, "df_auc.csv"))

IndexError: index 1 is out of bounds for axis 0 with size 0