In [None]:
# Imports
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

plt.rcParams['figure.figsize'] = (15.0, 8.0) # set default size of plots
plt.rcParams['figure.facecolor'] = 'white'

pd.set_option('display.max_rows', None)
pd.options.display.max_columns = None
pd.options.display.max_rows = 500

matplotlib.rcParams.update({'font.size': 15})

In [None]:
standard_column_names = [
    "query_type",
    "database_type",
    "number_of_write",
    "number_of_read",
    "profiling_level",
    "time_retrieval",
    "time",
]
wasi_column_names = [
    "wasi_fd_close",
    "wasi_count_fd_close",
    "wasi_fd_fdstat_get",
    "wasi_count_fd_fdstat_get",
    "wasi_fd_filestat_get",
    "wasi_count_fd_filestat_get",
    "wasi_fd_prestat_get",
    "wasi_count_fd_prestat_get",
    "wasi_fd_prestat_dir_name",
    "wasi_count_fd_prestat_dir_name",
    "wasi_fd_read",
    "wasi_count_fd_read",
    "wasi_fd_seek",
    "wasi_count_fd_seek",
    "wasi_fd_sync",
    "wasi_count_fd_sync",
    "wasi_fd_tell",
    "wasi_count_fd_tell",
    "wasi_fd_write",
    "wasi_count_fd_write",
    "wasi_path_filestat_get",
    "wasi_count_path_filestat_get",
    "wasi_path_open",
    "wasi_count_path_open",
    "wasi_clock_time_get",
    "wasi_count_clock_time_get"
]
wasi_wo_count_column_names = [col for col in wasi_column_names if not "_count_" in col]

trusted_crypto_column_names = [
    "trusted_crypto_static",
    "trusted_count_crypto_static",
    "trusted_crypto_read_data_node",
    "trusted_count_crypto_read_data_node",
    "trusted_crypto_read_mht_node",
    "trusted_count_crypto_read_mht_node",
]
trusted_crypto_wo_count_column_names = [col for col in trusted_crypto_column_names if not "_count_" in col]

trusted_io_column_names = [
    "trusted_fopen_auto_key",
    "trusted_count_fopen_auto_key",
    "trusted_fwrite",
    "trusted_count_fwrite",
    "trusted_fread",
    "trusted_count_fread",
    "trusted_ftell",
    "trusted_count_ftell",
    "trusted_fseek",
    "trusted_count_fseek",
    "trusted_fflush",
    "trusted_count_fflush",
    "trusted_feof",
    "trusted_count_feof",
    "trusted_fclose",
    "trusted_count_fclose"
]
trusted_io_wo_count_column_names = [col for col in trusted_io_column_names if not "_count_" in col]

trusted_node_column_names = [
    "trusted_get_data_node",
    "trusted_count_get_data_node",
    "trusted_append_data_node",
    "trusted_count_append_data_node",
    "trusted_read_data_node",
    "trusted_count_read_data_node",
    "trusted_get_mht_node",
    "trusted_count_get_mht_node",
    "trusted_append_mht_node",
    "trusted_count_append_mht_node",
    "trusted_read_mht_node",
    "trusted_count_read_mht_node",
    "trusted_internal_flush_node",
    "trusted_count_internal_flush_node"
]
trusted_node_wo_count_column_names = [col for col in trusted_node_column_names if not "_count_" in col]

trusted_mem_column_names = [
    "trusted_memcpy_write",
    "trusted_count_memcpy_write",
    "trusted_memcpy_read",
    "trusted_count_memcpy_read",
    "trusted_memset_get_data_node",
    "trusted_count_memset_get_data_node",
    "trusted_memset_append_data_node",
    "trusted_count_memset_append_data_node",
    "trusted_memset_read_data_node",
    "trusted_count_memset_read_data_node",
    "trusted_memset_append_mht_node",
    "trusted_count_memset_append_mht_node",
    "trusted_memset_read_mht_node",
    "trusted_count_memset_read_mht_node"
]
trusted_mem_wo_count_column_names = [col for col in trusted_mem_column_names if not "_count_" in col]

trusted_cache_column_names = [
    "trusted_cache_get_data_node_get",
    "trusted_count_cache_get_data_node_get",
    "trusted_cache_get_data_node_size",
    "trusted_count_cache_get_data_node_size",
    "trusted_cache_get_data_node_remove_last",
    "trusted_count_cache_get_data_node_remove_last",
    "trusted_cache_get_data_node_get_last",
    "trusted_count_cache_get_data_node_get_last",
    "trusted_cache_append_data_node_add",
    "trusted_count_cache_append_data_node_add",
    "trusted_cache_read_data_node_get",
    "trusted_count_cache_read_data_node_get",
    "trusted_cache_read_data_node_add",
    "trusted_count_cache_read_data_node_add",
    "trusted_cache_read_mht_node_find",
    "trusted_count_cache_read_mht_node_find",
    "trusted_cache_append_mht_node_find",
    "trusted_count_cache_append_mht_node_find",
    "trusted_cache_append_mht_node_add",
    "trusted_count_cache_append_mht_node_add",
    "trusted_cache_read_mht_node_add",
    "trusted_count_cache_read_mht_node_add"
]
trusted_cache_wo_count_column_names = [col for col in trusted_cache_column_names if not "_count_" in col]

trusted_ocall_column_names = [
    "trusted_fread_node_read_data_node",
    "trusted_count_fread_node_read_data_node",
    "trusted_fread_node_read_mht_node",
    "trusted_count_fread_node_read_mht_node"
]
trusted_ocall_wo_count_column_names = [col for col in trusted_ocall_column_names if not "_count_" in col]

untrusted_column_names = [
    "untrusted_check_if_file_exists",
    "untrusted_count_check_if_file_exists",
    "untrusted_do_file_recovery",
    "untrusted_count_do_file_recovery",
    "untrusted_exclusive_file_open",
    "untrusted_count_exclusive_file_open",
    "untrusted_fclose",
    "untrusted_count_fclose",
    "untrusted_fflush",
    "untrusted_count_fflush",
    "untrusted_fread_node",
    "untrusted_count_fread_node",
    "untrusted_fwrite_node",
    "untrusted_count_fwrite_node",
    "untrusted_fwrite_recovery_node",
    "untrusted_count_fwrite_recovery_node",
    "untrusted_recovery_file_open",
    "untrusted_count_recovery_file_open",
    "untrusted_remove",
    "untrusted_count_remove"
]
untrusted_wo_count_column_names = [col for col in untrusted_column_names if not "_count_" in col]

all_metric_column_names = \
    wasi_column_names + \
    trusted_crypto_column_names + \
    trusted_io_column_names + \
    trusted_node_column_names + \
    trusted_mem_column_names + \
    trusted_cache_column_names + \
    trusted_ocall_column_names + \
    untrusted_column_names
    
all_metric_wo_count_column_names = \
    wasi_wo_count_column_names + \
    trusted_crypto_wo_count_column_names + \
    trusted_io_wo_count_column_names + \
    trusted_node_wo_count_column_names + \
    trusted_mem_wo_count_column_names + \
    trusted_cache_wo_count_column_names + \
    trusted_ocall_wo_count_column_names + \
    untrusted_wo_count_column_names

all_column_names = standard_column_names + all_metric_column_names
all_wo_count_column_names = standard_column_names + all_metric_wo_count_column_names

In [None]:
colors = ["#1F77B4", "#AEC7E8", "#FF7F0E", "#FFBB78", "#2CA02C", "#98DF8A", "#D62728", "#FF9896", "#9467BD", "#C5B0D5", "#8C564B", "#C49C94", "#E377C2", "#F7B6D2", "#7F7F7F", "#C7C7C7", "#BCBD22", "#DBDB8D", "#17BECF", "#9EDAE5"]

def read_benchmark_data(filename):
    return pd.read_csv(filename, names = all_column_names)

def get_by_query_type(data, query_type):
    return data[data['query_type'] == query_type]

def plot_values(plot_title, tracked_mean_time, numbers_of_write, total_mean_time):
    tracked_mean_time.loc[:, ("other")] = total_mean_time - tracked_mean_time.sum(axis=1)
    
    fig, ax = plt.subplots()
    ax.stackplot(numbers_of_write, tracked_mean_time.T, labels=tracked_mean_time.columns, colors=colors)
    ax.set_title(plot_title)
    ax.set_xlabel("Number of records in the database")
    ax.set_ylabel("time [µs]")
    
    handles, labels = ax.get_legend_handles_labels()
    ax.legend(reversed(handles), reversed(labels), loc='lower left')  # reverse to keep order consistent
    
    plt.show()

In [None]:
raw_data = read_benchmark_data('profiling-benchmark-wasm-sgx-regular.csv')
raw_data

In [None]:
#####
# 1. Remove the time dedicated to the ocalls to retrieve the current time.
#    Only apply for timing measured in the enclave.
#####

def remove_ocall_overhead(data):
    for column_name in all_metric_column_names:
        if "_count_" in column_name and not "untrusted_" in column_name:
            target_metric = column_name.replace("count_", "")
            data[target_metric] -= data["time_retrieval"] * data[column_name]
            
    return data
        
raw_data = remove_ocall_overhead(raw_data)
raw_data

In [None]:
#####
# 2. Remove all the ocalls counting columns.
#####

def remove_counting_columns(data):
    return data[all_wo_count_column_names]

raw_data_wo_count = remove_counting_columns(raw_data)
raw_data_wo_count

In [None]:
#####
# 3. Convert unspecified and biased time by NaN constant, in order to dismiss them from aggregate operations.
#####

def convert_unspecified_biased_to_nan(data):
    # Void biased running time, since it includes ocall timing
    data.loc[data['profiling_level'] > 0, 'time'] = np.NaN

    # Void unspecified time for the metrics
    data[all_metric_wo_count_column_names] = \
        data[all_metric_wo_count_column_names].replace(0, np.NaN)
    
    return data

raw_data_wo_count = convert_unspecified_biased_to_nan(raw_data_wo_count)
raw_data_wo_count

In [None]:
#####
# 4. Drop unused columns and calculate the mean of multiple samples for the same configuration
#    (number_of_write, profiling_level, query_type).
#####

def drop_unused_columns_calculate_mean(data):
    return data \
            .drop(['database_type', 'number_of_read', 'time_retrieval'], 1) \
            .groupby(['number_of_write', 'query_type', 'profiling_level']) \
            .mean()

raw_data_wo_count = drop_unused_columns_calculate_mean(raw_data_wo_count)
display(raw_data_wo_count)

In [None]:
#####
# 5. Aggregate all the profiling levels to create a single entry per number of write and query type.
#####

def aggregate_profiling_levels(data):
    data = data.reset_index()
    return data \
            .drop('profiling_level', 1) \
            .groupby(['number_of_write', 'query_type']) \
            .sum()

raw_data_wo_count = aggregate_profiling_levels(raw_data_wo_count)
display(raw_data_wo_count)

In [None]:
raw_data_wo_count = raw_data_wo_count.reset_index()
raw_data_wo_count

In [None]:
#####
# Plot the first number of records using a bar chart, to highlight the important values of the breakdown.
#####

def filter_on_first_number_of_records(data):
    first_line = data["number_of_write"][0]
    return data[data["number_of_write"] == first_line]

first_number_of_records_regular = filter_on_first_number_of_records(raw_data_wo_count)
first_number_of_records_regular

In [None]:
def convert_to_report_breakdown(data, query_type):
    data = data[data["query_type"] == query_type]
    breakdown = pd.DataFrame()
    
    breakdown.loc[:, ("ocall_reads")] = data["trusted_fread_node_read_mht_node"] + \
        data["trusted_fread_node_read_data_node"]
    
    breakdown.loc[:, ("memset")] = data["trusted_memset_read_mht_node"] + data["trusted_memset_append_mht_node"] + \
        data["trusted_memset_read_data_node"] + data["trusted_memset_append_data_node"] + \
        data["trusted_memset_get_data_node"]
    
    breakdown.loc[:, ("wasi_fd_read_other")] = data["wasi_fd_read"] - (
        breakdown["ocall_reads"] + breakdown["memset"])
    
    breakdown.loc[:, ("sqlite")] = data["time"] - data[wasi_wo_count_column_names].sum(axis=1)
    
    return breakdown

report_breakdown = convert_to_report_breakdown(first_number_of_records_regular, "qr")
report_breakdown

In [None]:
raw_data_wo_count

In [None]:
r = get_by_query_type(raw_data_wo_count, 'i')
plot_values("Breakdown of insertion in the WASI layer", 
            r[wasi_wo_count_column_names],
            r["number_of_write"].values, 
            r["time"])

r = get_by_query_type(raw_data_wo_count, 'qs')
plot_values("Breakdown of sequential reading in the WASI layer", 
            r[wasi_wo_count_column_names],
            r["number_of_write"].values, 
            r["time"])

r = get_by_query_type(raw_data_wo_count, 'qr')
plot_values("Breakdown of random reading in the WASI layer", 
            r[wasi_wo_count_column_names],
            r["number_of_write"].values, 
            r["time"])

In [None]:
r = get_by_query_type(raw_data_wo_count, 'i')
plot_values("Breakdown of insertion in the Intel Protected FS layer", 
            r[trusted_io_wo_count_column_names],
            r["number_of_write"].values, 
            r["time"])

r = get_by_query_type(raw_data_wo_count, 'qs')
plot_values("Breakdown of sequential reading in the Intel Protected FS layer", 
            r[trusted_io_wo_count_column_names],
            r["number_of_write"].values, 
            r["time"])

r = get_by_query_type(raw_data_wo_count, 'qr')
plot_values("Breakdown of random reading in the Intel Protected FS layer", 
            r[trusted_io_wo_count_column_names],
            r["number_of_write"].values, 
            r["time"])

In [None]:
r = get_by_query_type(raw_data_wo_count, 'i')
plot_values("Breakdown of insertion in the Intel Protected FS (untrusted) layer", 
            r[untrusted_wo_count_column_names],
            r["number_of_write"].values, 
            r["time"])

r = get_by_query_type(raw_data_wo_count, 'qs')
plot_values("Breakdown of sequential reading in the Intel Protected FS (untrusted) layer", 
            r[untrusted_wo_count_column_names],
            r["number_of_write"].values, 
            r["time"])

r = get_by_query_type(raw_data_wo_count, 'qr')
plot_values("Breakdown of random reading in the Intel Protected FS (untrusted) layer", 
            r[untrusted_wo_count_column_names],
            r["number_of_write"].values, 
            r["time"])

In [None]:
r = get_by_query_type(raw_data_wo_count, 'i')
plot_values("Breakdown of insertion in the cryptography of Intel Protected FS layer", 
            r[trusted_crypto_wo_count_column_names],
            r["number_of_write"].values, 
            r["time"])

r = get_by_query_type(raw_data_wo_count, 'qs')
plot_values("Breakdown of sequential reading in the cryptography of Intel Protected FS layer", 
            r[trusted_crypto_wo_count_column_names],
            r["number_of_write"].values, 
            r["time"])

r = get_by_query_type(raw_data_wo_count, 'qr')
plot_values("Breakdown of random reading in the cryptography of Intel Protected FS layer", 
            r[trusted_crypto_wo_count_column_names],
            r["number_of_write"].values, 
            r["time"])

In [None]:
r = get_by_query_type(raw_data_wo_count, 'i')
plot_values("Breakdown of insertion in the nodes of Intel Protected FS layer", 
            r[trusted_node_wo_count_column_names],
            r["number_of_write"].values, 
            r["time"])

r = get_by_query_type(raw_data_wo_count, 'qs')
plot_values("Breakdown of sequential reading in the nodes of Intel Protected FS layer", 
            r[trusted_node_wo_count_column_names],
            r["number_of_write"].values, 
            r["time"])

r = get_by_query_type(raw_data_wo_count, 'qr')
plot_values("Breakdown of random reading in the nodes of Intel Protected FS layer", 
            r[trusted_node_wo_count_column_names],
            r["number_of_write"].values, 
            r["time"])

In [None]:
r = get_by_query_type(raw_data_wo_count, 'i')
plot_values("Breakdown of insertion by memory operations of Intel Protected FS layer", 
            r[trusted_mem_wo_count_column_names],
            r["number_of_write"].values, 
            r["time"])

r = get_by_query_type(raw_data_wo_count, 'qs')
plot_values("Breakdown of sequential reading by memory operations of Intel Protected FS layer", 
            r[trusted_mem_wo_count_column_names],
            r["number_of_write"].values, 
            r["time"])

r = get_by_query_type(raw_data_wo_count, 'qr')
plot_values("Breakdown of random reading by memory operations of Intel Protected FS layer", 
            r[trusted_mem_wo_count_column_names],
            r["number_of_write"].values, 
            r["time"])

In [None]:
r = get_by_query_type(raw_data_wo_count, 'i')
plot_values("Breakdown of insertion in cache of Intel Protected FS layer", 
            r[trusted_cache_wo_count_column_names],
            r["number_of_write"].values, 
            r["time"])

r = get_by_query_type(raw_data_wo_count, 'qs')
plot_values("Breakdown of sequential reading in cache of Intel Protected FS layer", 
            r[trusted_cache_wo_count_column_names],
            r["number_of_write"].values, 
            r["time"])

r = get_by_query_type(raw_data_wo_count, 'qr')
plot_values("Breakdown of random reading in cache of Intel Protected FS layer", 
            r[trusted_cache_wo_count_column_names],
            r["number_of_write"].values, 
            r["time"])

In [None]:
r = get_by_query_type(raw_data_wo_count, 'i')
plot_values("Breakdown of insertion for reading in fread_node ocall Intel Protected FS layer", 
            r[trusted_ocall_wo_count_column_names],
            r["number_of_write"].values, 
            r["time"])

r = get_by_query_type(raw_data_wo_count, 'qs')
plot_values("Breakdown of sequential reading in fread_node ocall Intel Protected FS layer", 
            r[trusted_ocall_wo_count_column_names],
            r["number_of_write"].values, 
            r["time"])

r = get_by_query_type(raw_data_wo_count, 'qr')
plot_values("Breakdown of random reading in fread_node ocall Intel Protected FS layer", 
            r[trusted_ocall_wo_count_column_names],
            r["number_of_write"].values, 
            r["time"])

In [None]:
#####
# Plot the first number of records using a bar chart, to highlight the important values of the breakdown.
#####

raw_data_noncopy = read_benchmark_data('profiling-benchmark-wasm-sgx-nocopy.csv')
raw_data_noncopy

In [None]:
# 1. Remove the time dedicated to the ocalls to retrieve the current time.
#    Only apply for timing measured in the enclave.
raw_data_noncopy = remove_ocall_overhead(raw_data_noncopy)
raw_data_noncopy

In [None]:
# 2. Remove all the ocalls counting columns.
raw_data_wo_count_noncopy = remove_counting_columns(raw_data_noncopy)
raw_data_wo_count_noncopy

In [None]:
# 3. Convert unspecified and biased time by NaN constant, in order to dismiss them from aggregate operations.
raw_data_wo_count_noncopy = convert_unspecified_biased_to_nan(raw_data_wo_count_noncopy)
raw_data_wo_count_noncopy

In [None]:
# 4. Drop unused columns and calculate the mean of multiple samples for the same configuration
#    (number_of_write, profiling_level, query_type).
raw_data_wo_count_noncopy = drop_unused_columns_calculate_mean(raw_data_wo_count_noncopy)
display(raw_data_wo_count_noncopy)

In [None]:
# 5. Aggregate all the profiling levels to create a single entry per number of write and query type.
raw_data_wo_count_noncopy = aggregate_profiling_levels(raw_data_wo_count_noncopy)
display(raw_data_wo_count_noncopy)

In [None]:
raw_data_wo_count_noncopy = raw_data_wo_count_noncopy.reset_index()
raw_data_wo_count_noncopy

In [None]:
first_number_of_records_noncopy = filter_on_first_number_of_records(raw_data_wo_count_noncopy)
first_number_of_records_noncopy

In [None]:
report_breakdown_nocopy = convert_to_report_breakdown(first_number_of_records_noncopy, "qr")
report_breakdown_nocopy

In [None]:
concatened_reports = pd.concat([report_breakdown, report_breakdown_nocopy])
concatened_reports

In [None]:
# Export to CSV
#
# Files generated:
# - XXX
#
# File format: layer, mean

def export_to_files(dataset):
    
    for column in dataset:
        file = pd.DataFrame(columns = ["target", "mean"])
        
        i = 0
        for profiling_target in ["regular", "nocopy"]:
            file.loc[i] = [profiling_target, dataset[column].values[i] / 1024 / 1024]
            i+= 1
    
        file.to_csv("profiling_" + column + "-formatted.csv", index=False)

export_to_files(concatened_reports)

In [None]:
#
## Values for the paper
#

def to_percent(value):
    return f"{round(value * 100, 1)}"

def to_ratio(value):
    return f"{round(value, 1)}"

def get_time_by_type(dataset, query_type):
    return dataset.loc[dataset["query_type"] == query_type]["time"].values[0]

memsetRatio = to_percent(report_breakdown["memset"].values[0] / report_breakdown.sum().sum())
ocallRatio = to_percent(report_breakdown["ocall_reads"].values[0] / report_breakdown.sum().sum())
otherOperationsRatio = to_percent(report_breakdown["wasi_fd_read_other"].values[0] / report_breakdown.sum().sum())
sqliteRatio = to_percent(report_breakdown["sqlite"].values[0] / report_breakdown.sum().sum())
copyRatio = to_percent(1 - report_breakdown_nocopy.sum().sum() / report_breakdown.sum().sum())
copyInvertedRatio = to_percent(report_breakdown_nocopy.sum().sum() / report_breakdown.sum().sum())
insertRatio = to_ratio(get_time_by_type(first_number_of_records_regular, "i") / get_time_by_type(first_number_of_records_noncopy, "i"))
seqReadingRatio = to_ratio(get_time_by_type(first_number_of_records_regular, "qs") / get_time_by_type(first_number_of_records_noncopy, "qs"))
randReadingRatio = to_ratio(get_time_by_type(first_number_of_records_regular, "qr") / get_time_by_type(first_number_of_records_noncopy, "qr"))

print(seqReadingRatio)

In [None]:
#
## Export to LaTeX
#

f = open("profiling-export.tex", "w")

f.write(f"\\def\\memsetRatio{{{memsetRatio}}}\n")
f.write(f"\\def\\ocallRatio{{{ocallRatio}}}\n")
f.write(f"\\def\\otherOperationsRatio{{{otherOperationsRatio}}}\n")
f.write(f"\\def\\sqliteRatio{{{sqliteRatio}}}\n")
f.write(f"\\def\\copyRatio{{{copyRatio}}}\n")
f.write(f"\\def\\copyInvertedRatio{{{copyInvertedRatio}}}\n")
f.write(f"\\def\\insertRatio{{{insertRatio}}}\n")
f.write(f"\\def\\seqReadingRatio{{{seqReadingRatio}}}\n")
f.write(f"\\def\\randReadingRatio{{{randReadingRatio}}}\n")

f.close()