### Information

#### Experimental environment

In [None]:
a = !cat /etc/redhat-release
b = !cat /proc/cpuinfo | grep "model name"
c = !nproc
d = !free -h --total | grep Total
e = !gcc --version
# cloc_v = !docker run --rm aldanial/cloc --version
# angr_v = !docker-compose exec angr pip show angr | grep "Version"
# tigress_v = !docker-compose exec -t tigress obfuscate --version | grep "TIGRESS VERSION"
# ollvm_v = !docker-compose exec ollvm obfuscate --version

environment = dict(
    machine=dict(
        OS=a[0],
        CPU=b[0].split(":")[1],
        Processor_num=c[0],
        Memory=d[0].split(":")[1].split()[0]),
    tool=dict(
        gcc=dict(Version=e[0]),
        cloc=dict(Version="1.89"),
        angr=dict(Version="9.1.12332"),
        tigress=dict(Version="3.1"),
        ollvm=dict(Version="Obfuscator-LLVM clang version 4.0.1  (based on Obfuscator-LLVM 4.0.1)")
    )
)

environment

___
### Analyze

In [None]:
import json
import pathlib
import pandas as pd
import statistics
pd.options.display.float_format = '{:.4g}'.format

# Useful function
def load_test_result_data(target: str) -> dict:
    target = pathlib.Path(target).joinpath("result_test")
    return {
        i.stem: [{
            "code_name": pathlib.Path(res[0]).stem,
            **res[1]}
            for res in json.loads(i.read_text())]
        for i in sorted(target.glob("*.json"))}


def organize_test_data(test_result_data: dict) -> list:
    return [
        {"transformation_name": k,
         "test_pass_rate": len([i["fail_cause"] for i in v if not any(i["fail_cause"])]) / len(v),
         "passed_test_num": len([i["fail_cause"] for i in v if not any(i["fail_cause"])]),
         "failed_test_num": len([i["fail_cause"] for i in v if any(i["fail_cause"])]),
         "failed_test": [i["code_name"] for i in v if any(i["fail_cause"])]
        #  } for k, v in sorted(test_result_data.items())]
         } for k, v in test_result_data.items()]


def load_similarity_data(target: str) -> dict:
    target = pathlib.Path(target).joinpath("result_similarity")
    return {
        i.stem: [{
            "code_name": pathlib.Path(res[0]).stem,
            **res[1]}
            for res in json.loads(i.read_text())]
        for i in sorted(target.glob("*.json"))}


def calc_distance_mean(similarity_data) -> list:
    return [
        {"transformation_name": k,
         "3gram_Simpson": statistics.mean([1.0 - x["simpson"] for x in v]),
         "LCSubstr": statistics.mean([1.0 - x["longest_common_substring"] for x in v])
        #  } for k, v in sorted(similarity_data.items())]
         } for k, v in similarity_data.items()]


def convert_to_dataframe(data: dict) -> pd. DataFrame:
    return pd.read_json(json.dumps(data)).set_index("transformation_name")


# Please type the name of the property to be analyzed
target_property_name = "prop1"

#### Test Pass Rate

In [None]:
test_result_data = load_test_result_data(target_property_name)
test_pass_data = organize_test_data(test_result_data)
df_test_pass_rate = convert_to_dataframe(test_pass_data)
# df_test_pass_rate.to_markdown()
df_test_pass_rate

##### Additional analyze

In [None]:
# Common test failed code
failed_codes = [set(i) for i in df_test_pass_rate["failed_test"]]
common_failed_code = failed_codes[0]
for i in failed_codes[1:]:
    common_failed_code &= i
common_failed_code = sorted(common_failed_code)
print("Commonly failed codes in each transformation")
print(common_failed_code)
print()

# Unique failed code
print("Uniquely failed codes in each transformation")
for idx, v in zip(df_test_pass_rate.index, df_test_pass_rate["failed_test"]):
    unique_failed_code = sorted([i for i in v if i not in common_failed_code])
    if len(unique_failed_code) == 0:
        continue
    # print(json.dumps(dict(name=idx, unique_failed_code=unique_failed_code)))
    print(f"{idx}ï¼š{unique_failed_code}")

#### Code Distance Mean

In [None]:
similarity_data = load_similarity_data(target_property_name)
distance_data = calc_distance_mean(similarity_data)
df_code_distance_mean = convert_to_dataframe(distance_data)
df_code_distance_mean

___
### Plot Analyzed Data

In [None]:
import matplotlib
import matplotlib.pyplot as plt

matplotlib.rc('pdf', fonttype=42)

def plot_code_distance_mean(df: pd.DataFrame, fname: str):
    fig, ax = plt.subplots(1, 1, figsize=(12, 6), constrained_layout=True)
    plt.gca().yaxis.set_major_formatter(plt.FormatStrFormatter('%.2f'))

    df = df.reset_index()

    colors = ["#3498db", "#1abc9c", "#e74c3c"]
    markers = ["o", "s", "x"]
    [df.plot.scatter(
        ax=ax, x=df.columns[0], y=i, label=i,
        grid=True, rot=90, color=colors[idx], marker=markers[idx], s=75)
     for idx, i in enumerate(df.columns[1:])]

    ax.set_axisbelow(True)
    ax.tick_params(axis="x", pad=12, labelsize=15)
    ax.tick_params(axis="y", pad=12, labelsize=15)
    ax.set_xlabel("Obfuscation method", labelpad=12, fontsize=18)
    ax.set_ylabel("Code Distance Mean", labelpad=12, fontsize=18)
    ax.legend(prop=dict(size=15))
    
    fig.savefig(f"{fname}.png")
    plt.show()
    plt.close("all")


plot_code_distance_mean(df=df_code_distance_mean, fname="code_distance_mean")


In [None]:
target = ["prop1", "prop2"]

# Collectively retrieve data from target properties
test_result_data = dict()
[test_result_data.update(load_test_result_data(t)) for t in target]

similarity_data = dict()
[similarity_data.update(load_similarity_data(t)) for t in target]

test_pass_data = organize_test_data({k: v for k, v in test_result_data.items() if k.count("_") < 2})
distance_data = calc_distance_mean({k: v for k, v in similarity_data.items() if k.count("_") < 2})

df_test_pass_rate = convert_to_dataframe(test_pass_data)
df_code_distance_mean = convert_to_dataframe(distance_data)

In [None]:
import matplotlib
import matplotlib.pyplot as plt
import numpy as np

matplotlib.rc('pdf', fonttype=42)


def plot_code_distance_mean(df: pd.DataFrame, fname: str):
    fig, ax = plt.subplots(1, 1, figsize=(12, 6), constrained_layout=True)
    plt.gca().yaxis.set_major_formatter(plt.FormatStrFormatter('%.2f'))

    df = df.T
    df.insert(28, "", None)
    df = df.T.reset_index()

    colors = ["#3498db", "#1abc9c", "#e74c3c"]
    markers = ["o", "s", "x"]
    [df.plot.scatter(
        ax=ax, x=df.columns[0], y=i, label=i,
        grid=True, rot=90, color="k", marker=markers[idx], s=75)
     for idx, i in enumerate(df.columns[1:])]

    ax.set_axisbelow(True)
    ax.tick_params(axis="x", pad=12, labelsize=15)
    ax.tick_params(axis="y", pad=12, labelsize=15)
    ax.set_xlabel("Obfuscation Transformation", labelpad=12, fontsize=18)
    ax.set_ylabel("Code Distance Mean", labelpad=12, fontsize=18)
    ax.legend(prop=dict(size=15))
    
    ax.set_xlim(-1, 38)
    plt.axvline(x=28, linestyle="dotted", color="k")

    fig.savefig(f"{fname}.pdf")
    plt.show()
    plt.close("all")


def plot_reliability(
        df_test_pass_rate: pd.DataFrame,
        df_code_distance_mean: pd.DataFrame,
        fname: str):
    fig, ax = plt.subplots(1, 1, figsize=(12, 6), constrained_layout=True)
    plt.gca().yaxis.set_major_formatter(plt.FormatStrFormatter('%.2f'))

    df = pd.merge(
        df_test_pass_rate,
        df_code_distance_mean,
        on="transformation_name")
    df = df.rename(columns={
        "test_pass_rate": "Test Pass Rate",
        "3gram_Simpson": "Code Distance Mean(3-gram Simpson)",
        "LCSubstr": "Code Distance Mean(LCSubstr)"})
    df = df.T
    df.insert(28, "", None)
    df = df.T.reset_index()

    markers = ["o", "+", "x"]
    [df.plot.scatter(
        ax=ax, x=df.columns[0], y=i, label=i,
        grid=True, rot=90, color="k", marker=markers[idx], s=75)
     for idx, i in enumerate(df.columns[1:])]

    ax.tick_params(axis="x", pad=12, labelsize=15)
    ax.set_xlabel("Obfuscating Transformation", labelpad=12, fontsize=18)
    ax.set_axisbelow(True)
    ax.set_xlim(-1, 38)
    ax.tick_params(axis="y", pad=12, labelsize=15)
    ax.set_ylabel("Rate", labelpad=12, fontsize=18)
    ax.set_ylim(0.0, 1.0)
    ax.set_yticks(np.arange(0.0, 1.1, 0.2))
    ax.legend(prop=dict(size=15))

    plt.axvline(x=28, linestyle="dotted", color="k")

    fig.savefig(f"{fname}.pdf")
    plt.show()
    plt.close("all")

plot_reliability(df_test_pass_rate[["test_pass_rate"]],
                 df_code_distance_mean, fname="exp_result")

# plot_code_distance_mean(df_code_distance_mean, fname="exp_code_distance_mean")
