# Interpretation of the data
This notebook produces the tables shown in the paper. It is assumed that the scripts in `scripts/` have been run and the results have been written to `results` directory. 

### Dependencies

In [None]:
import pandas as pd
import numpy as np
import glob

def create_table_of_results(directory):
    files = [filename for filename in glob.glob("results/" + directory + "/*.csv")]

    tables = [pd.read_csv(f, header=None).T for f in files]
    for t in tables:
        t.columns = list([t.iloc[0]])
        t.drop(0, inplace=True)

    all_results = pd.concat(tables)
    all_results.columns = [y[0] for y in all_results.columns]    
    return all_results

def get_sig_figs(x, sig=1):
    return round(x, -int(np.log10(x))+sig)

## Results for PIR with one plaintext payload

In [None]:
# Reading data
pir_data = create_table_of_results('pir-one-plaintext')
pir_data["db_size"] = pir_data["num_keywords"] * (2**pir_data["log_poly_mod_degree"]) * 20 / 8

### Folklore with N=8192

In [None]:
selected_data = pir_data[
    (
        (pir_data['eq_type'] == 0) &
        (pir_data['log_poly_mod_degree'] == 13) &
        (pir_data['num_threads'] == 1)
    )
]
selected_data = selected_data.reset_index().drop(columns=["index"])
selected_data = selected_data.astype('int64')

averaged_data = selected_data.groupby(["num_keywords"], as_index=True).mean()
averaged_data = averaged_data.astype('int64')

for col in ["db_size", "time_query", "time_expansion","time_sel_vec", "time_inner_prod", "time_server_latency"]:
    averaged_data[col] = averaged_data[col].apply(lambda x: "{:1g}".format(get_sig_figs(x/1000000)))

res = averaged_data.reset_index(level=0)[["num_keywords", "db_size", "encoding_size", "time_expansion", "time_sel_vec", "time_inner_prod", "time_server_latency"]]
res.astype('str').apply(lambda x: x + " &")

### Folklore with N=16384

In [None]:
selected_data = pir_data[
    (
        (pir_data['eq_type'] == 0) &
        (pir_data['log_poly_mod_degree'] == 14) &
        (pir_data['num_threads'] == 1)
    )
]
selected_data = selected_data.reset_index().drop(columns=["index"])
selected_data = selected_data.astype('int64')

averaged_data = selected_data.groupby(["num_keywords"], as_index=True).mean()
averaged_data = averaged_data.astype('int64')

for col in ["db_size", "time_query", "time_expansion", "time_sel_vec", "time_inner_prod", "time_server_latency"]:
    averaged_data[col] = averaged_data[col].apply(lambda x: "{:1g}".format(get_sig_figs(x/1000000)))

res = averaged_data.reset_index(level=0)[["num_keywords", "db_size",  "encoding_size", "time_expansion", "time_sel_vec", "time_inner_prod", "time_server_latency"]]
res.astype('str').apply(lambda x: x + " &")

### Constant-weight k=2 Single Thread

In [None]:
selected_data = pir_data[
    (
        (pir_data['hamming_weight'] == 2) &
        (pir_data['eq_type'] == 1) &
        (pir_data['num_threads'] == 1)
    )
]
selected_data = selected_data.reset_index().drop(columns=["index"])
selected_data = selected_data.astype('int64')

averaged_data = selected_data.groupby(["hamming_weight", "num_keywords"], as_index=True).mean()
averaged_data = averaged_data.astype('int64')

for col in ["time_query", "time_expansion",]:
    averaged_data[col] = averaged_data[col].apply(lambda x: "{:.1g}".format(x/1000000))

for col in ["db_size", "time_sel_vec", "time_inner_prod", "time_server_latency"]:
    averaged_data[col] = averaged_data[col].apply(lambda x: "{:1g}".format(get_sig_figs(x/1000000)))

averaged_data.index = averaged_data.index.droplevel(level=0)
res = averaged_data[["db_size", "encoding_size", "time_expansion", "time_sel_vec", "time_inner_prod", "time_server_latency"]]
res.astype('str').apply(lambda x: x + " &")

### Constant-weight k=2 Parallel

In [None]:
selected_data = pir_data[
    (
        (pir_data['hamming_weight'] == 2) &
        (pir_data['eq_type'] == 1) &
        (pir_data['num_threads'] == 64)
    )
]
selected_data = selected_data.reset_index().drop(columns=["index"])
selected_data = selected_data.astype('int64')

averaged_data = selected_data.groupby(["hamming_weight", "num_keywords"], as_index=True).mean()
averaged_data = averaged_data.astype('int64')

for col in ["db_size", "time_query", "time_expansion","time_sel_vec", "time_inner_prod", "time_server_latency"]:
    averaged_data[col] = averaged_data[col].apply(lambda x: "{:1g}".format(get_sig_figs(x/1000000)))

res = averaged_data.reset_index(level=0)[["db_size", "encoding_size", "time_expansion", "time_sel_vec", "time_inner_prod", "time_server_latency"]]
res.astype('str').apply(lambda x: x + " &")

### Unary Single Thread

In [None]:
selected_data = pir_data[
    (
        (pir_data['hamming_weight'] == 1) &
        (pir_data['eq_type'] == 1) &
        (pir_data['num_threads'] == 1)
    )
]
selected_data = selected_data.reset_index().drop(columns=["index"])
selected_data = selected_data.astype('int64')

averaged_data = selected_data.groupby(["hamming_weight", "num_keywords"], as_index=True).mean()
averaged_data = averaged_data.astype('int64')

for col in ["time_query", "time_expansion",]:
    averaged_data[col] = averaged_data[col].apply(lambda x: "{:1g}".format(get_sig_figs(x/1000000)))

for col in ["db_size", "time_sel_vec", "time_inner_prod", "time_server_latency"]:
    averaged_data[col] = averaged_data[col].apply(lambda x: "{:1g}".format(get_sig_figs(x/1000000)))

res = averaged_data.reset_index(level=0)[["db_size", "encoding_size", "time_expansion", "time_sel_vec", "time_inner_prod", "time_server_latency"]]
res.astype('str').apply(lambda x: x + " &")

## Results for Databases with Large Payloads (large databases)

In [None]:
# Reading data
large_pir_data = create_table_of_results("pir-large-payload")

In [None]:
results = []
for h, kw in [(2, 16), (3, 32), (4, 48)]:
    for nk in [1000, 10000]:
        data = large_pir_data[(large_pir_data['num_keywords'] == nk) & (large_pir_data['hamming_weight']==h) & (large_pir_data["valid_response"] == 1)]
        data = data.reset_index().drop(columns=["index"])
        data = data.astype('int64')
        data["db_size"] = (data["num_keywords"] * (2**data["log_poly_mod_degree"]) * data["num_output_ciphers"] * 20 / 8000000000).apply(lambda x : get_sig_figs(x))
        data["item_size"] = ((2**data["log_poly_mod_degree"]) * data["num_output_ciphers"] * 20 / 8000000).apply(lambda x : get_sig_figs(x))
        data = data[data.db_size >= 0.5]
        data=  data.groupby(["num_output_ciphers"], as_index=True).mean()

        for col in ["time_server_latency", "time_expansion", "time_sel_vec", "time_inner_prod",]:
            data[col] = data[col].apply(lambda x: "{:1g}".format(get_sig_figs(x/1000000,2)))

        data["keyword_bitlength"] = kw
        res = data[["keyword_bitlength", "num_keywords", "db_size", "item_size", "time_server_latency",]]
        res = res.sort_values(by=["db_size"])
        res.reset_index(drop=True, inplace=True)
        res.set_index('keyword_bitlength', inplace=True)
        res=res.astype('str').apply(lambda x: x + " &")
        results += [res]



In [None]:
# hamming_weight = 2
# keyword bitlength = 16
# number of rows in database = 1000
results[0]

In [None]:
# hamming_weight = 2
# keyword bitlength = 16
# number of rows in database = 10000
results[1]

In [None]:
# hamming_weight = 3
# keyword bitlength = 32
# number of rows in database = 1000
results[2]

In [None]:
# hamming_weight = 3
# keyword bitlength = 32
# number of rows in database = 10000
results[3]

In [None]:
# hamming_weight = 4
# keyword bitlength = 48
# number of rows in database = 1000
results[4]

In [None]:
# hamming_weight = 4
# keyword bitlength = 48
# number of rows in database = 10000
results[5]