In [1]:
import os
import subprocess
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tabulate import tabulate
from collections import defaultdict
#import seaborn as sns

In [2]:
# Chose the benchmark from spec, spec-stripped, spec-opt and spec-opt-stripped.

class BenchmarkType():
    path = None
    clean_dict = {}
    callsite_dict = {}
    fun_dict = {}
    model_stat = {}

In [3]:
def ParaseBenchmarks(bench):
    benchmark_type = BenchmarkType()
    path = f"/volatile/zephyr/Ruturaj/cfi_project/test_suite/{bench}/"
    benchmarks = ["400.perlbench", "401.bzip2", "403.gcc.hs", "445.gobmk", "456.hmmer", \
                  "458.sjeng.hs", "464.h264ref.hs"]
    binaries = ["perlbench-debug.exe", "bzip2.exe", "gcc.exe", "gobmk.exe", "hmmer.exe", \
                "sjeng.exe", "h264ref.exe"]

    clean_dict = {}
    callsite_dict = {}
    fun_dict = {}

    for i, benchmark in enumerate(benchmarks):

        clean_df = pd.read_csv(os.path.join(path+benchmark, binaries[i] + "-clean.csv"), index_col=0, header=[0])
        fun_df = pd.read_csv(os.path.join(path+benchmark, binaries[i] + "-fun-clean.csv"), \
                             header=[0])
        callsite_df = pd.read_csv(os.path.join(path+benchmark, binaries[i] + "-callsite-clean.csv"), \
                                  index_col=0, header=[0])

        ######## Clean clean_df and store. ########

        # Drop rows with Nan values. Note: this could remove the ligimate callsites.
        clean_df = clean_df.dropna(how="any")

        # Sort the dataframe on "Params".
        clean_df = clean_df.sort_values("Params")

        # add SrcType (IFCC)_ida if absent
        if "SrcType (IFCC)_ida" not in clean_df:
            clean_df["SrcType (IFCC)_ida"] = 0
        if "SafeSrcType (IFCC-safe)_ida" not in clean_df:
            clean_df["SafeSrcType (IFCC-safe)_ida"] = 0

        # Convert the dataframe type to int (for all the columns).
        columns = ["Params_ida", "SrcType (IFCC)_ida", "SafeSrcType (IFCC-safe)_ida", "BinType (TypeArmor)_ida", \
                  "Baseline_ida"]
        clean_df[columns] = clean_df[columns].astype(int)

        # Calculate differences in each column.
        clean_df["Param_match"] = np.where(clean_df["Params"] == clean_df["Params_ida"], 1, 0)
        clean_df["SrcType_match"] = np.where(clean_df["SrcType (IFCC)"] == clean_df["SrcType (IFCC)_ida"], 1, 0)
        clean_df["SafeSrcType_match"] = np.where(clean_df["SafeSrcType (IFCC-safe)"] \
                                        == clean_df["SafeSrcType (IFCC-safe)_ida"], 1, 0)
        clean_df["BinType_match"] = np.where(clean_df["BinType (TypeArmor)"] \
                                             == clean_df["BinType (TypeArmor)_ida"], 1, 0)

        clean_dict[benchmark] = clean_df

        ######## Clean callsite_df and store. ########

        # Drop rows with Nan values. Note: this could remove the ligimate callsites.
        callsite_df = callsite_df.dropna(how="any")

        # Convert the dataframe type to int (for all the columns).
        columns = ["Arg1_ida", "Arg2_ida", "Arg3_ida", "Arg4_ida", "Arg5_ida", \
                   "Arg6_ida", "Arg7_ida", "Return_ida"]
        callsite_df[columns] = callsite_df[columns].astype(int)

        callsite_dict[benchmark] = callsite_df

        ######## Clean fun_df and store. ########

        fun_dict[benchmark] = fun_df
    benchmark_type.path = path
    benchmark_type.clean_dict = clean_dict
    benchmark_type.callsite_dict = callsite_dict
    benchmark_type.fun_dict = fun_dict
    
    return benchmark_type

spec = ParaseBenchmarks("spec")
spec_str = ParaseBenchmarks("spec-stripped")
spec_opt = ParaseBenchmarks("spec-opt")
spec_opt_str = ParaseBenchmarks("spec-opt-stripped")

In [4]:
def decode(encoding, recurse=False, arm=False):
    # check whether the type is void or non-void for typearmor.
    if arm:
        return "0" if encoding == 1 else "1"
    ret = ""
    if encoding == 0:
        return "0"
    if encoding == 1:
        ret = "Void"
    elif encoding == 2:
        ret = "Int1"
    elif encoding == 3:
        ret = "Int8"
    elif encoding == 4:
        ret = "Int16"
    elif encoding == 5:
        ret = "Int32"
    elif encoding == 6:
        ret = "Int64"
    elif encoding == 7:
        ret = "Half"
    elif encoding == 8:
        ret = "Float"
    elif encoding == 9:
        ret = "Double"
    elif encoding == 10:
        ret = "Float80128"
    elif encoding == 12:
        ret = "Struct"
    elif encoding == 13:
        ret = "Array"
    elif encoding == 14:
        ret = "Undef."
    elif encoding == 32:
        ret = "unc."
    else:
        if recurse: 
            ret = decode(encoding-16, False)
        ret += "*"
    return ret

final = {"spec":{}, "spec-opt-stripped":{}}

def matchTargets(bench, obj):
    for benchmark, df in obj.callsite_dict.items():
        
        # function hashes for each policy.
        funs_typearm = defaultdict(list)
        funs_ifcc = defaultdict(list)
        funs_mcfi = defaultdict(list)
        
        funs_ida_typearm = defaultdict(list)
        funs_ida_ifcc = defaultdict(list)
        funs_ida_mcfi = defaultdict(list)
        
        # callsite hashes for each policy.
        calls_typearm = {}
        calls_ifcc = {}
        calls_mcfi = {}
        
        calls_ida_typearm = {}
        calls_ida_ifcc = {}
        calls_ida_mcfi = {}
        
        #ignore = set()
        print(benchmark)
        for i, row in df.iterrows():
            #if row[0] in ignore:
            #    continue
            #ignore.add(row[0])
            call = row.iloc[1:8].to_numpy()
            ret = row.iloc[8]
            call_ida = row.iloc[10:17].to_numpy()
            ret_ida = row.iloc[17]
            
            # TypeArmor
            args = [x for x in call if x>0]
            ida_args = [x for x in call_ida if x>0]
            
            # Keep callsites and function with more than or equal to 7 args.
            # with same encoding 32.
            if len(args) >= 7:
                args = [32 for _ in range(6)]
            if len(ida_args) >= 7:
                ida_args = [32 for _ in range(6)]
            
            key = str(len(args)) + decode(ret, arm=True)
            key_ida = str(len(ida_args)) + decode(ret_ida, arm=True)
            
            calls_typearm[key] = calls_typearm.get(key, 0) + 1
            calls_ida_typearm[key_ida] = calls_ida_typearm.get(key_ida, 0) + 1
            
            # IFCC
            key = "".join([decode(x, False) for x in call])
            #key += decode(ret, False)
            key_ida = "".join([decode(x, False) for x in call_ida])
            #key_ida += decode(ret_ida, False)
            calls_ifcc[key] = calls_ifcc.get(key, 0) + 1
            calls_ida_ifcc[key_ida] = calls_ida_ifcc.get(key_ida, 0) + 1
            
            # MCFI
            key = "".join([decode(x, True) for x in call])
            #key += decode(ret, True)
            key_ida = "".join([decode(x, True) for x in call_ida])
            #key_ida += decode(ret_ida, True)
            calls_mcfi[key] = calls_mcfi.get(key, 0) + 1
            calls_ida_mcfi[key_ida] = calls_ida_mcfi.get(key_ida, 0) + 1

            
        for i, row in obj.fun_dict[benchmark].iterrows():
            name = row.iloc[0]
            fun = row.iloc[1:8].to_numpy()
            fun_ret = row.iloc[8]
            fun_ida = row.iloc[9:16].to_numpy()
            fun_ret_ida = row.iloc[16]
            
            # TypeArmor
            args = [x for x in fun if x>0]
            ida_args = [x for x in fun_ida if x>0]
            
            # Keep callsites and function with more than or equal to 7 args.
            # with same encoding 32.
            if len(args) >= 7:
                args = [32 for _ in range(6)]
            if len(ida_args) >= 7:
                ida_args = [32 for _ in range(6)]
            
            key = str(len(args)) + decode(fun_ret, arm=True)
            key_ida = str(len(ida_args)) + decode(fun_ret_ida, arm=True)
            
            funs_typearm[key].append(name)
            funs_ida_typearm[key_ida].append(name)
            
            # IFCC
            key = "".join([decode(x, False) for x in fun])
            #key += decode(fun_ret, False)
            key_ida = "".join([decode(x, False) for x in fun_ida])
            #key_ida += decode(fun_ret_ida, False)
            funs_ifcc[key].append(name)
            funs_ida_ifcc[key_ida].append(name)
            
            # MCFI
            key = "".join([decode(x, True) for x in fun])
            #key += decode(fun_ret, True)
            key_ida = "".join([decode(x, True) for x in fun_ida])
            #key_ida += decode(fun_ret_ida, True)
            funs_mcfi[key].append(name)
            funs_ida_mcfi[key_ida].append(name)
        
        final[bench][benchmark] = {}
        final[bench][benchmark]["call"] = {}
        final[bench][benchmark]["fun"] = {}
        # Store callsites
        final[bench][benchmark]["call"]["typearmor"] = calls_typearm
        final[bench][benchmark]["call"]["typearmor_ida"] = calls_ida_typearm
        
        final[bench][benchmark]["call"]["ifcc"] = calls_ifcc
        final[bench][benchmark]["call"]["ifcc_ida"] = calls_ida_ifcc
        
        final[bench][benchmark]["call"]["mcfi"] = calls_mcfi
        final[bench][benchmark]["call"]["mcfi_ida"] = calls_ida_mcfi
        
        # Store Functions
        final[bench][benchmark]["fun"]["typearmor"] = funs_typearm
        final[bench][benchmark]["fun"]["typearmor_ida"] = funs_ida_typearm
        
        final[bench][benchmark]["fun"]["ifcc"] = funs_ifcc
        final[bench][benchmark]["fun"]["ifcc_ida"] = funs_ida_ifcc
        
        final[bench][benchmark]["fun"]["mcfi"] = funs_mcfi
        final[bench][benchmark]["fun"]["mcfi_ida"] = funs_ida_mcfi

matchTargets("spec", spec)
matchTargets("spec-opt-stripped", spec_opt_str)

400.perlbench
401.bzip2
403.gcc.hs
445.gobmk
456.hmmer
458.sjeng.hs
464.h264ref.hs
400.perlbench
401.bzip2
403.gcc.hs
445.gobmk
456.hmmer
458.sjeng.hs
464.h264ref.hs


In [5]:
def get_typearmor(dic, match_type):
    llvm_arr = []
    ida_arr = []
    for encoding, callsites in dic["call"][match_type].items():
        for i in range(callsites):
            funs = 0
            for j in range(int(encoding[0])+1):
                if str(j)+encoding[1] in dic["fun"][match_type]:
                    funs += len(dic["fun"][match_type][str(j)+encoding[1]])
            llvm_arr.append(funs)
        
    for encoding, callsites in dic["call"][f"{match_type}_ida"].items():
        for i in range(callsites):
            funs = 0
            for j in range(int(encoding[0])+1):
                if str(j)+encoding[1] in dic["fun"][f"{match_type}_ida"]:
                    funs += len(dic["fun"][f"{match_type}_ida"][str(j)+encoding[1]])
            ida_arr.append(funs)
    return llvm_arr, ida_arr

        
def get_ifcc(dic, match_type):
    llvm_arr = []
    ida_arr = []
    for encoding, callsites in dic["call"][match_type].items():
        for i in range(callsites):
            if encoding in dic["fun"][match_type]:
                llvm_arr.append(len(dic["fun"][match_type][encoding]))
            else:
                llvm_arr.append(0)
    for encoding, callsites in dic["call"][f"{match_type}_ida"].items():
        for i in range(callsites):
            if encoding in dic["fun"][f"{match_type}_ida"]:
                ida_arr.append(len(dic["fun"][f"{match_type}_ida"][encoding]))
            else:
                ida_arr.append(0)
    return llvm_arr, ida_arr


def drawTable(bench, obj):
    print("\\begin{table*}[h!]")
    print("\\centering")
    
    def matchTable(match_type, label):
        match_count = {}
        for benchmark, dic in final[bench].items():
            if match_type == "typearmor":
                llvm_arr, ida_arr = get_typearmor(dic, match_type)
            else:
                llvm_arr, ida_arr = get_ifcc(dic, match_type)
            match_count[benchmark] = pd.Series(llvm_arr).describe(percentiles=[.9]).to_list()
            match_count[benchmark].extend(pd.Series(ida_arr).describe(percentiles=[.9]).to_list()[1:])
        headers = ["Benchmark", "Targets", "Count", "Mean", "Std", "Min", "Med", "90thp", "Max", \
                  "Mean", "Std", "Min", "Med", "90thp", "Max"]
        table = tabulate([[k]+[len(obj.fun_dict[k].index)]+\
                        [f"{x:.2f}" if not np.isnan(x) else x for x in v] \
                        for k,v in match_count.items()], \
                        headers=headers, tablefmt="latex")
        print("\\resizebox{12cm}{!}{")
        print("\\begin{subtable}[]{1.2\\textwidth}")
        print("\\centering")
        table = table.split("\n")
        # replace first row with custom latex statement.
        table[0] = "\\begin{tabular}[]{|l||r||r||r|r|r|r|r|r||r|r|r|r|r|r|}"
        # First print only two rows.
        [print(line) for line in table[:2]]
        print("\\multicolumn{3}{|c||}{} & \\multicolumn{6}{|c||}{Source-CFI} "
        "& \\multicolumn{6}{|c|}{Binary-CFI}\\\\")
        print("\\hline")
        [print(line) for line in table[2:]]
        print(f"\\caption{{{match_type}}}\\label{{table:{bench}:{label}}}")
        print("\\end{subtable}")
        print("}")
    
    matchTable("typearmor", 1)
    matchTable("ifcc", 2)
    matchTable("mcfi", 3)
    
    debug = "out" if "stripped" in bench else ""
    print("\caption{Comparison of 3 different policies ({\it TypeArmor}, {\it IFCC}, {\it MCFI}) "
                  f"applied for binaries compiled with{debug} debug symbols using CTR metric. "
                   "The `count` depicts number of callsites, and subsequent columns display Mean, Standard "
                   "Deviation, Minimum, Median, 90\\textsuperscript{th} percentile and Maximum aggregated "
                   f"CTR results for each {bench} benchmark.}}")
    print(f"\\label{{table:{bench}:ctr}}")
    print("\\end{table*}")
    
drawTable("spec", spec)
print("-------")
drawTable("spec-opt-stripped", spec_opt_str)

\begin{table*}[h!]
\centering
\resizebox{12cm}{!}{
\begin{subtable}[]{1.2\textwidth}
\centering
\begin{tabular}[]{|l||r||r||r|r|r|r|r|r||r|r|r|r|r|r|}
\hline
\multicolumn{3}{|c||}{} & \multicolumn{6}{|c||}{Source-CFI} & \multicolumn{6}{|c|}{Binary-CFI}\\
\hline
 Benchmark      &   Targets &   Count &    Mean &    Std &   Min &   Med &   90thp &   Max &    Mean &    Std &   Min &   Med &   90thp &   Max \\
\hline
 400.perlbench  &      1293 &     240 &  646.07 & 196.38 &   213 &   590 &     893 &   975 &  568.46 & 283.46 &    11 &   599 &     906 &   995 \\
 401.bzip2      &        88 &      20 &   36.4  &   4.52 &    31 &    40 &      40 &    40 &   29.5  &   6.61 &    23 &    29 &      37 &    37 \\
 403.gcc.hs     &      2920 &     287 &  926.44 & 447.56 &    38 &  1281 &    1281 &  1906 & 1053.48 & 445.9  &    65 &  1296 &    1296 &  2008 \\
 445.gobmk      &      2198 &      40 & 1102.3  & 895.24 &    68 &  1942 &    1942 &  1942 &  970.7  & 901.23 &    64 &   231 &    1959 &  1959

In [6]:
def exactMatch_typearmor(bench, benchmark, key, key_ida, total_functions):
    match_type = "typearmor"
    llvm = set()
    ida = set()
    
    for k, funs in final[bench][benchmark]["fun"][match_type].items():
        if int(k[0]) <= int(key[0]) and k[1] == key[1]:
            llvm.update(funs)
            
    for k, funs in final[bench][benchmark]["fun"][f"{match_type}_ida"].items():
        if int(k[0]) <= int(key_ida[0]) and k[1] == key_ida[1]:
            ida.update(funs)
    
    #if benchmark == "458.sjeng.hs":
    #   print(llvm)
    #   print(len(llvm))
    #   print(ida)
    #   print(len(ida))
    #   print(llvm&ida)
    #   print(ida-llvm)
    #   print(llvm-ida)
    tp = len(llvm&ida)/len(llvm)
    #tp = len(llvm&ida)
    tn = len(ida-llvm)/len(ida)
    #tn = len(ida-llvm)
    
    return tp, tn

def exactMatch_ifcc(bench, benchmark, match_type, key, key_ida, total_functions):
    llvm = set()
    ida = set()
    
    if key in final[bench][benchmark]["fun"][match_type]:
        llvm.update(final[bench][benchmark]["fun"][match_type][key])
    
    if key_ida in final[bench][benchmark]["fun"][f"{match_type}_ida"]:
        ida.update(final[bench][benchmark]["fun"][f"{match_type}_ida"][key_ida])
        
    tp = len(llvm&ida)/len(llvm) if len(llvm) != 0 else 1
    #tp = len(llvm&ida)
    tn = len(ida-llvm)/len(ida) if len(ida) != 0 else 1
    #tn = len(ida-llvm)
    
    return tp, tn


def collectMetrics(bench, obj):
    model_stat = {}
    for benchmark, df in obj.callsite_dict.items():
        model_stat[benchmark] = {"typearmor":{"tp":[], "fp":[]}, \
                                 "ifcc":{"tp":[], "fp":[]}, \
                                 "mcfi":{"tp":[], "fp":[]}}
        total_functions = len(obj.fun_dict[benchmark].index)
        #ignore = set()
        for i, row in df.iterrows():
            #if row[0] in ignore:
            #    continue
            #ignore.add(row[0])
            call = row.iloc[1:8].to_numpy()
            ret = row.iloc[8]
            call_ida = row.iloc[10:17].to_numpy()
            ret_ida = row.iloc[17]
            
            # Typearmor
            args = [x for x in call if x>0]
            ida_args = [x for x in call_ida if x>0]
            
            # Keep callsites and function with more than or equal to 7 args.
            # with same encoding 32.
            if len(args) >= 7:
                args = [32 for _ in range(6)]
            if len(ida_args) >= 7:
                ida_args = [32 for _ in range(6)]
            
            key = str(len(args)) + decode(ret, arm=True)
            key_ida = str(len(ida_args)) + decode(ret_ida, arm=True)
            tp, fp = exactMatch_typearmor(bench, benchmark, key, key_ida, total_functions)
            model_stat[benchmark]["typearmor"]["tp"].append(tp)
            model_stat[benchmark]["typearmor"]["fp"].append(fp)
            
            # ifcc
            key = "".join([decode(x, False) for x in call])
            #key += decode(ret, False)
            key_ida = "".join([decode(x, False) for x in call_ida])
            #key_ida += decode(ret_ida, False)
            tp, fp = exactMatch_ifcc(bench, benchmark, "ifcc", key, key_ida, total_functions)
            model_stat[benchmark]["ifcc"]["tp"].append(tp)
            model_stat[benchmark]["ifcc"]["fp"].append(fp)
            
            # mcfi
            key = "".join([decode(x, True) for x in call])
            #key += decode(ret, True)
            key_ida = "".join([decode(x, True) for x in call_ida])
            #key_ida += decode(ret_ida, True)
            tp, fp = exactMatch_ifcc(bench, benchmark, "mcfi", key, key_ida, total_functions)
            model_stat[benchmark]["mcfi"]["tp"].append(tp)
            model_stat[benchmark]["mcfi"]["fp"].append(fp)
    obj.model_stat = model_stat

collectMetrics("spec", spec)
collectMetrics("spec-opt-stripped", spec_opt_str)

In [7]:
def displayMetrics(bench, obj):
    print("\\begin{table*}[h!]")
    print("\\centering")
    def exactMatchTable(match_type, label):
        model_stat = obj.model_stat

        match_count = {}
        for benchmark, model in model_stat.items():
            match_count[benchmark] = pd.Series(model[match_type]["tp"]).describe(percentiles=[.9]).to_list()
            match_count[benchmark].extend(pd.Series(model[match_type]["fp"]).describe(percentiles=[.9])[1:])
            match_count[benchmark]
        headers = ["Benchmark", "Count", "Mean", "Std", "Min", "Med", "90thp", "Max", \
            "Mean", "Std", "Min", "Med", "90thp", "Max"]
        table = tabulate([[k]+[f"{x:.2f}" if not np.isnan(x) else x for x in v]\
                            for k,v in match_count.items()], \
                            headers=headers, tablefmt="latex")
        print("\\resizebox{12cm}{!}{")
        print("\\begin{subtable}[]{1.1\\textwidth}")
        print("\\centering")
        table = table.split("\n")
        # replace first row with custom latex statement.
        table[0] = "\\begin{tabular}[]{|l|||r||r|r|r|r|r|r||r|r|r|r|r|r|r|}"
        # First print only two rows.
        [print(line) for line in table[:2]]
        print("\\multicolumn{2}{|c||}{} & \\multicolumn{6}{|c||}{$RelativeCTR_T$} "
            "& \\multicolumn{6}{|c|}{$RelativeCTR_F$}\\\\")
        print("\\hline")
        [print(line) for line in table[2:]]
        print(f"\\caption{{{match_type}}}\\label{{table:{bench}:rtcr:{label}}}")
        print("\\end{subtable}")
        print("}")
    
    
    exactMatchTable("typearmor", 1)
    exactMatchTable("ifcc", 2)
    exactMatchTable("mcfi", 3)

    debug = "out" if "stripped" in bench else ""
    print("\caption{Comparison of 3 different policies ({\it TypeArmor}, {\it IFCC}, {\it MCFI}) "
      f"applied for binaries compiled with{debug} debug symbols using {{\\tt RelativeCTR}} metrics."
       "The `count` depicts number of callsites, and subsequent columns display Mean, Standard "
       "Deviation, Minimum, Median, 90\\textsuperscript{th} percentile and Maximum aggregate "
       f"results for each {bench} benchmark.}}")
    print(f"\\label{{table:{bench}:rctr}}")
    print("\\end{table*}")

displayMetrics("spec", spec)

print("-------")
displayMetrics("spec-opt-stripped", spec_opt_str)

\begin{table*}[h!]
\centering
\resizebox{12cm}{!}{
\begin{subtable}[]{1.1\textwidth}
\centering
\begin{tabular}[]{|l|||r||r|r|r|r|r|r||r|r|r|r|r|r|r|}
\hline
\multicolumn{2}{|c||}{} & \multicolumn{6}{|c||}{$RelativeCTR_T$} & \multicolumn{6}{|c|}{$RelativeCTR_F$}\\
\hline
 Benchmark      &   Count &   Mean &    Std &   Min &   Med &   90thp &   Max &   Mean &    Std &   Min &   Med &   90thp &   Max \\
\hline
 400.perlbench  &     240 &   0.79 &   0.37 &  0    &  1    &    1    &  1    &   0.18 &   0.36 &  0    &  0.02 &    1    &  1    \\
 401.bzip2      &      20 &   0.68 &   0.22 &  0.2  &  0.57 &    0.9  &  0.9  &   0.17 &   0.22 &  0    &  0.12 &    0.29 &  0.72 \\
 403.gcc.hs     &     287 &   0.96 &   0.16 &  0    &  1    &    1    &  1    &   0.13 &   0.24 &  0    &  0.02 &    0.64 &  1    \\
 445.gobmk      &      40 &   0.89 &   0.26 &  0    &  0.99 &    1    &  1    &   0.09 &   0.26 &  0    &  0.01 &    0.04 &  1    \\
 456.hmmer      &       9 &   0.94 &   0.03 &  0.92 &  0

In [8]:
def typearmorplot(bench, obj):
    correct_void = 0
    incorrect_void = 0
    correct_nonvoid = 0
    incorrect_nonvoid = 0
    for benchmark, df in obj.callsite_dict.items():
        for i, row in df.iterrows():
            call = row.iloc[1:8].to_numpy()
            ret = row.iloc[8]
            call_ida = row.iloc[10:17].to_numpy()
            ret_ida = row.iloc[17]
            
            # Collect void or non-void callsites.
            key = decode(ret, arm=True)
            key_ida = decode(ret_ida, arm=True)
            
            # For void.
            if key == "0":
                if key == key_ida:
                    correct_void += 1
                else:
                    incorrect_void += 1
            # For non-void.
            else:
                if key == key_ida:
                    correct_nonvoid += 1
                else:
                    incorrect_nonvoid += 1
    
    y1 = [correct_void, incorrect_void, correct_nonvoid, incorrect_nonvoid]
    print(y1)
    
    correct_void = 0
    incorrect_void = 0
    correct_nonvoid = 0
    incorrect_nonvoid = 0
    # Todo: repeat this for function dictionary as well.
    for benchmark, df in obj.fun_dict.items():
        for i, row in df.iterrows():
            fun = row.iloc[1:8].to_numpy()
            fun_ret = row.iloc[8]
            fun_ida = row.iloc[9:16].to_numpy()
            fun_ret_ida = row.iloc[16]
            
            # Collect void or non-void callsites.
            key = decode(fun_ret, arm=True)
            key_ida = decode(fun_ret_ida, arm=True)
            
            # For void.
            if key == "0":
                if key == key_ida:
                    correct_void += 1
                else:
                    incorrect_void += 1
            # For non-void.
            else:
                if key == key_ida:
                    correct_nonvoid += 1
                else:
                    incorrect_nonvoid += 1
    
    y2 = [correct_void, incorrect_void, correct_nonvoid, incorrect_nonvoid]
    print(y2)
    
    labels = ["Void T", "Void F", "Non-Void T", "Non-Void F"]
    
    fig, ax = plt.subplots()
    
    fig.set_dpi(100)
    fig.set_size_inches(10, 4)
    
    # Set the bar width.
    w = 0.3
    
    bar1 = np.arange(len(labels))
    bar2 = [i+w for i in bar1]
    
    ax_bar1 = ax.bar(bar1, y1, w, color="violet", label="Callsite Returns")
    ax_bar2 = ax.bar(bar2, y2, w, color="purple", label="Function Returns")
    
    ax.set_xticks(bar1+w/2, labels)
    ax.set_ylabel(f"#Returns")
    ax.set_xlabel("Types")
    
    if "opt" in bench:
        ax.set_title("Void/Non-Void Return Types per Callsite (Setting II})")
    else:
        ax.set_title("Void/Non-Void Return Types per Callsite (Setting I})")
        
    def autolabel(rects):
        """
        Attach a text label above each bar displaying its height
        """
        for rect in rects:
            height = rect.get_height()
            ax.text(rect.get_x() + rect.get_width()/2., height+1, \
                    '%d' % int(height), \
                    ha='center', va='bottom')
    autolabel(ax_bar1)
    autolabel(ax_bar2)
    
    ax.legend()
    plt.grid(axis='y')
    #plt.show()
    
    #plt.savefig(f"/nfs/volatile/zephyr/Ruturaj/cfi_project/figures/{bench}_type_comp_isafe.png")
    plt.close(fig)

# typearmorplot("spec", spec)
# typearmorplot("spec-opt-stripped", spec_opt_str)

In [23]:
def plotCounts(bench, graph_type, dic):
    
    y1 = [v[0] for k,v in dic.items()]
    y2 = [v[1] for k,v in dic.items()]
    labels = [k for k in dic.keys()]
    labels[-1] = ">=7"
    
    fig, ax = plt.subplots()
    
    fig.set_dpi(100)
    fig.set_size_inches(10, 4)
    
    # Set the bar width.
    w = 0.3
    
    bar1 = np.arange(len(labels))
    bar2 = [i+w for i in bar1]
    
    ax_bar1 = ax.bar(bar1, y1, w, color="violet", label="#TP")
    ax_bar2 = ax.bar(bar2, y2, w, color="purple", label="#FP")
    
    ax.set_xticks(bar1+w/2, labels)
    ax.set_ylabel(f"#Signatures")
    ax.set_xlabel("Argument Count")
    
    if "opt" in bench:
        ax.set_title(f"Accurate/ Inaccurate Argument Count At {graph_type} Signature (Setting II)")
    else:
        ax.set_title(f"Accurate/ Inaccurate Argument Count At {graph_type} Signature (Setting I)")
        
    def autolabel(rects):
        """
        Attach a text label above each bar displaying its height
        """
        for rect in rects:
            height = rect.get_height()
            ax.text(rect.get_x() + rect.get_width()/2., height+1, \
                    '%d' % int(height), \
                    ha='center', va='bottom')
    autolabel(ax_bar1)
    autolabel(ax_bar2)
    
    ax.legend()
    plt.grid(axis='y')
    #plt.show()
    
    plt.savefig(f"/nfs/volatile/zephyr/Ruturaj/cfi_project/figures/{bench}_{graph_type}_args.png")
    plt.close(fig)

def typearmorCount(bench, obj):
    counts = {i:[0,0] for i in range(0,8)}
    for benchmark, df in obj.callsite_dict.items():
        for i, row in df.iterrows():
            call = row.iloc[1:8].to_numpy()
            ret = row.iloc[8]
            call_ida = row.iloc[10:17].to_numpy()
            ret_ida = row.iloc[17]
            
            # Collect void or non-void callsites.
            args = [x for x in call if x>0]
            ida_args = [x for x in call_ida if x>0]
            
            # Keep callsites and function with more than or equal to 7 args.
            # with same encoding 32.
            if len(args) >= 7:
                args = [32 for _ in range(6)]
            if len(ida_args) >= 7:
                ida_args = [32 for _ in range(6)]
            
            key = len(args)
            key_ida = len(ida_args)

            if args and args[0] == 32:
                if key == key_ida:
                    counts[7][0] += 1
                else:
                    counts[7][1] += 1
            else:
                if key == key_ida:
                    counts[key][0] += 1
                else:
                    counts[key][1] += 1
    
    counts_fun = {i:[0,0] for i in range(0,8)}
    # Todo: repeat this for function dictionary as well.
    for benchmark, df in obj.fun_dict.items():
        for i, row in df.iterrows():
            fun = row.iloc[1:8].to_numpy()
            fun_ret = row.iloc[8]
            fun_ida = row.iloc[9:16].to_numpy()
            fun_ret_ida = row.iloc[16]
            
            # Collect void or non-void callsites.
            args = [x for x in fun if x>0]
            ida_args = [x for x in fun_ida if x>0]
            
            # Keep callsites and function with more than or equal to 7 args.
            # with same encoding 32.
            if len(args) >= 7:
                args = [32 for _ in range(6)]
            if len(ida_args) >= 7:
                ida_args = [32 for _ in range(6)]
            
            key = len(args)
            key_ida = len(ida_args)
            
            if args and args[0] == 32:
                if key == key_ida:
                    counts_fun[7][0] += 1
                else:
                    counts_fun[7][1] += 1
            else:
                if key == key_ida:
                    counts_fun[key][0] += 1
                else:
                    counts_fun[key][1] += 1
    
    print(counts)
    #plotCounts(bench, "Callsite", counts)
    print(counts_fun)
    #plotCounts(bench, "Function", counts_fun)

typearmorCount("spec", spec)
typearmorCount("spec-opt-stripped", spec_opt_str)

{0: [19, 0], 1: [146, 90], 2: [217, 37], 3: [73, 2], 4: [33, 5], 5: [375, 1], 6: [5, 0], 7: [9, 0]}
{0: [588, 9], 1: [1545, 31], 2: [1610, 63], 3: [871, 31], 4: [2010, 12], 5: [193, 3], 6: [87, 1], 7: [121, 3]}
{0: [7, 11], 1: [121, 84], 2: [262, 42], 3: [211, 9], 4: [33, 0], 5: [372, 2], 6: [7, 0], 7: [7, 0]}
{0: [332, 253], 1: [909, 501], 2: [1107, 390], 3: [632, 201], 4: [774, 1197], 5: [103, 70], 6: [79, 1], 7: [113, 0]}


In [93]:
def plottypes(bench, graph_type, dic):
    
    y1 = [v[0] for k,v in dic.items()]
    y2 = [v[1] for k,v in dic.items()]
    final = [y1, y2]
    final = zip(y1, y2)
    labels = [k for k in dic.keys()]
    print(labels)
    print(y1)
    print(y2)
    table = tabulate([[setting]+[x for x in final]\
                        for i,setting in enumerate(["I", "II"])], \
                        headers=labels, tablefmt="latex")
    
#     fig, ax = plt.subplots()
    
#     fig.set_dpi(100)
#     fig.set_size_inches(10, 4)
    
#     # Set the bar width.
#     w = 0.3
    
#     bar1 = np.arange(len(labels))
#     bar2 = [i+w for i in bar1]
    
#     ax_bar1 = ax.bar(bar1, y1, w, color="lightskyblue", label="#TP")
#     ax_bar2 = ax.bar(bar2, y2, w, color="royalblue", label="#FP")
    
#     ax.set_xticks(bar1+w/2, labels)
#     ax.set_ylabel(f"#Signatures")
#     ax.set_xlabel("Preliminary Types")
    
#     if "opt" in bench:
#         ax.set_title(f"Accurate/ Inaccurate Preliminary Types At {graph_type} Signature (Setting II)")
#     else:
#         ax.set_title(f"Accurate/ Inaccurate Preliminary Types At {graph_type} Signature (Setting I)")
        
#     def autolabel(rects):
#         """
#         Attach a text label above each bar displaying its height
#         """
#         for rect in rects:
#             height = rect.get_height()
#             ax.text(rect.get_x() + rect.get_width()/2., height+1, \
#                     '%d' % int(height), \
#                     ha='center', va='bottom')
#     autolabel(ax_bar1)
#     autolabel(ax_bar2)
    
#     ax.legend()
#     plt.grid(axis='y')
#     #plt.show()
    
#     #plt.savefig(f"/nfs/volatile/zephyr/Ruturaj/cfi_project/figures/{bench}_{graph_type}_types.png")
#     plt.close(fig)

def ifccTypes(bench, obj):
    types = defaultdict(lambda: [0,0])
    for benchmark, df in obj.callsite_dict.items():
        for i, row in df.iterrows():
            call = row.iloc[1:8].to_numpy()
            ret = row.iloc[8]
            call_ida = row.iloc[10:17].to_numpy()
            ret_ida = row.iloc[17]
            
            # Collect void or non-void callsites.
            args = [x for x in call if x>0]
            ida_args = [x for x in call_ida if x>0]
            
            # Keep callsites and function with more than or equal to 7 args.
            # with same encoding 32.
            if len(args) >= 7:
                args = [32 for _ in range(6)]
            if len(ida_args) >= 7:
                ida_args = [32 for _ in range(6)]
            
            for x,y in zip(args, ida_args):
                key = decode(x, False)
                key_ida = decode(y, False)
                # Ignore additional arugments detected by ida.
                if x == 0:
                    continue
                if key == key_ida:
                    types[key][0] += 1
                else:
                    types[key][1] += 1
    
    types_fun = defaultdict(lambda: [0,0])
    # Todo: repeat this for function dictionary as well.
    for benchmark, df in obj.fun_dict.items():
        for i, row in df.iterrows():
            fun = row.iloc[1:8].to_numpy()
            fun_ret = row.iloc[8]
            fun_ida = row.iloc[9:16].to_numpy()
            fun_ret_ida = row.iloc[16]
            
            # Collect void or non-void callsites.
            args = [x for x in fun if x>0]
            ida_args = [x for x in fun_ida if x>0]
            
            # Keep callsites and function with more than or equal to 7 args.
            # with same encoding 32.
            if len(args) >= 7:
                args = [32 for _ in range(6)]
            if len(ida_args) >= 7:
                ida_args = [32 for _ in range(6)]
            
            for x,y in zip(args, ida_args):
                key = decode(x, False)
                key_ida = decode(y, False)
                if x == 0:
                    continue
                #    types_fun[key_ida][1] += 1
                if key == key_ida:
                    types_fun[key][0] += 1
                else:
                    types_fun[key][1] += 1
    
    types.pop("unc.", None)
    #print(types)
    plottypes(bench, "Callsite", types)
    types_fun.pop("unc.", None)
    #print(types_fun)
    plottypes(bench, "Function", types_fun)
                    
ifccTypes("spec", spec)
ifccTypes("spec-opt-stripped", spec_opt_str)

['*', 'Int64', 'Int32']
[1078, 84, 1645]
[33, 12, 127]
['Double', '*', 'Int64', 'Int32', 'Int8', 'Float', 'Int16']
[49, 7622, 282, 8665, 25, 34, 5]
[0, 46, 0, 395, 3, 8, 1]
['*', 'Int64', 'Int32']
[586, 216, 1537]
[862, 26, 237]
['*', 'Int64', 'Int32', 'Int8', 'Double', 'Int16', 'Float']
[2175, 175, 5185, 6, 37, 3, 18]
[4680, 76, 2792, 19, 10, 6, 20]


In [77]:
def plotpointers(bench, graph_type, dic):
    
    y1 = [v[0] for k,v in dic.items()]
    y2 = [v[1] for k,v in dic.items()]
    labels = [k for k in dic.keys()]
    
    fig, ax = plt.subplots()
    
    fig.set_dpi(100)
    fig.set_size_inches(10, 4)
    
    # Set the bar width.
    w = 0.3
    
    bar1 = np.arange(len(labels))
    bar2 = [i+w for i in bar1]
    
    ax_bar1 = ax.bar(bar1, y1, w, color="lightskyblue", label="#TP")
    ax_bar2 = ax.bar(bar2, y2, w, color="royalblue", label="#FP")
    
    ax.set_xticks(bar1+w/2, labels)
    ax.set_ylabel(f"#Signatures")
    ax.set_xlabel("Preliminary Types")
    
    if "opt" in bench:
        ax.set_title(f"Accurate/ Inaccurate Pointer Types At {graph_type} Signature (Setting II)")
    else:
        ax.set_title(f"Accurate/ Inaccurate Pointer Types At {graph_type} Signature (Setting I)")
        
    def autolabel(rects):
        """
        Attach a text label above each bar displaying its height
        """
        for rect in rects:
            height = rect.get_height()
            ax.text(rect.get_x() + rect.get_width()/2., height+1, \
                    '%d' % int(height), \
                    ha='center', va='bottom')
    autolabel(ax_bar1)
    autolabel(ax_bar2)
    
    ax.legend()
    plt.grid(axis='y')
    #plt.show()
    
    plt.savefig(f"/nfs/volatile/zephyr/Ruturaj/cfi_project/figures/{bench}_{graph_type}_pointers.png")
    plt.close(fig)

def ifccTypes(bench, obj):
    types = defaultdict(lambda: [0,0])
    for benchmark, df in obj.callsite_dict.items():
        for i, row in df.iterrows():
            call = row.iloc[1:8].to_numpy()
            ret = row.iloc[8]
            call_ida = row.iloc[10:17].to_numpy()
            ret_ida = row.iloc[17]
            
            # Collect void or non-void callsites.
            args = [x for x in call if x>0]
            ida_args = [x for x in call_ida if x>0]
            
            # Keep callsites and function with more than or equal to 7 args.
            # with same encoding 32.
            if len(args) >= 7:
                args = [32 for _ in range(6)]
            if len(ida_args) >= 7:
                ida_args = [32 for _ in range(6)]
            
            for x,y in zip(args, ida_args):
                key = decode(x, True)
                key_ida = decode(y, True)
                # Ignore additional arugments detected by ida.
                if x == 0:
                    continue
                if key == key_ida:
                    types[key][0] += 1
                else:
                    types[key][1] += 1
    
    types_fun = defaultdict(lambda: [0,0])
    # Todo: repeat this for function dictionary as well.
    for benchmark, df in obj.fun_dict.items():
        for i, row in df.iterrows():
            fun = row.iloc[1:8].to_numpy()
            fun_ret = row.iloc[8]
            fun_ida = row.iloc[9:16].to_numpy()
            fun_ret_ida = row.iloc[16]
            
            # Collect void or non-void callsites.
            args = [x for x in fun if x>0]
            ida_args = [x for x in fun_ida if x>0]
            
            # Keep callsites and function with more than or equal to 7 args.
            # with same encoding 32.
            if len(args) >= 7:
                args = [32 for _ in range(6)]
            if len(ida_args) >= 7:
                ida_args = [32 for _ in range(6)]
            
            for x,y in zip(args, ida_args):
                key = decode(x, True)
                key_ida = decode(y, True)
                if x == 0:
                    continue
                #    types_fun[key_ida][1] += 1
                if key == key_ida:
                    types_fun[key][0] += 1
                else:
                    types_fun[key][1] += 1
    
    types.pop("unc.", None)
    for k in list(types.keys()):
        if "*" not in k:
            types.pop(k, None)
    print(types)
    plotpointers(bench, "Callsite", types)
    types_fun.pop("unc.", None)
    for k in list(types_fun.keys()):
        if "*" not in k:
            types_fun.pop(k, None)
    print(types_fun)
    plotpointers(bench, "Function", types_fun)
                    
ifccTypes("spec", spec)
ifccTypes("spec-opt-stripped", spec_opt_str)

defaultdict(<function ifccTypes.<locals>.<lambda> at 0x7f14d13781f0>, {'Struct*': [323, 65], 'Int8*': [71, 101], '**': [525, 4], 'Undef.*': [4, 0], 'Int32*': [12, 0], 'Int16*': [6, 0]})
defaultdict(<function ifccTypes.<locals>.<lambda> at 0x7f14d1378310>, {'Int8*': [960, 358], 'Int64*': [37, 0], 'Struct*': [4607, 631], '**': [644, 2], 'Int32*': [268, 18], 'Undef.*': [40, 0], 'Double*': [4, 0], 'Array*': [6, 1], 'Int16*': [69, 0], 'Float*': [22, 1]})
defaultdict(<function ifccTypes.<locals>.<lambda> at 0x7f14d1378310>, {'Struct*': [2, 387], 'Int8*': [111, 277], '**': [9, 645], 'Undef.*': [2, 2], 'Int32*': [2, 2], 'Int16*': [0, 9]})
defaultdict(<function ifccTypes.<locals>.<lambda> at 0x7f14d13781f0>, {'Struct*': [132, 4567], 'Int8*': [412, 739], '**': [58, 540], 'Undef.*': [8, 28], 'Int64*': [19, 12], 'Int32*': [64, 184], 'Double*': [1, 3], 'Array*': [0, 7], 'Int16*': [40, 19], 'Float*': [3, 19]})
