In [None]:
import pandas as pd
import numpy as np
import datetime as dt
import os
import re
import json
import copy

# 1. Data Preparation

In [None]:
# define path for both log files
setup_csv_filename = "setup_log.csv"
test_log_jsonl_filename = "test_log.jsonl"
log_path = os.path.abspath(r"/content/log")
data_dict = dict()

In [None]:
# read cost data for setup.sh
file_path = os.path.join(log_path, setup_csv_filename)
log_file = open(file_path, "r")
data = []
for idx, line in enumerate(log_file.readlines()):
    if idx == 0 :
        cols = line.split(", ")
        cols[-1] = cols[-1][:-1]
    else:
        data_line = line.split(", ")
        data_line[-1] = data_line[-1][:-1]
        data.append(data_line)   
log_file.close()
data_dict['setup'] = pd.DataFrame(data, columns = cols)
data_dict['setup']

Unnamed: 0,Datetime,nVoters,nVotingOptions,design,citcuit,nConstraints,t_compile,t_proofGen,provingKeySize
0,20221105153022,3,3,original,PublicKeyGen,1291,4630,12103,807556
1,20221105153022,3,3,original,encryptedVoteGen,5707,17889,25777,3289900
2,20221105153022,3,3,original,tallying,2675,9006,18136,1661620
3,20221105153415,4,3,original,PublicKeyGen,1291,4672,12048,807556
4,20221105153415,4,3,original,encryptedVoteGen,6249,20128,28375,3584788
5,20221105153415,4,3,original,tallying,3200,10662,20538,1949288
6,20221105153831,5,3,original,PublicKeyGen,1291,4803,12239,807556
7,20221105153831,5,3,original,encryptedVoteGen,6804,21843,30822,3886960
8,20221105153831,5,3,original,tallying,5965,19494,27349,3491780
9,20221105154304,6,3,original,PublicKeyGen,1291,4840,12071,807556


In [None]:
# Data Preprocessing
data_dict['setup']['Datetime'] = pd.to_datetime(data_dict['setup']['Datetime'], format = "%Y%m%d%H%M%S")
data_dict['setup']['citcuit']  = pd.Categorical(data_dict['setup']['citcuit'], 
                                                categories=['PublicKeyGen',
                                                           'encryptedVoteGen',
                                                           'tallying'], 
                                                ordered=True)
for column in ['nVoters', 'nVotingOptions', 'nConstraints','t_compile','t_proofGen','provingKeySize']:
    data_dict['setup'][column] = data_dict['setup'][column].astype('float')
data_dict['setup'] = data_dict['setup'].rename({'citcuit':'circuit'}, axis = 1)
data_dict['setup'] = data_dict['setup'].sort_values(['nVoters','nVotingOptions','Datetime','circuit'])
data_dict['setup'] = data_dict['setup'].drop_duplicates(ignore_index = True)

In [None]:
data_dict['setup']

Unnamed: 0,Datetime,nVoters,nVotingOptions,design,circuit,nConstraints,t_compile,t_proofGen,provingKeySize
0,2022-11-05 15:30:22,3.0,3.0,original,PublicKeyGen,1291.0,4630.0,12103.0,807556.0
1,2022-11-05 15:30:22,3.0,3.0,original,encryptedVoteGen,5707.0,17889.0,25777.0,3289900.0
2,2022-11-05 15:30:22,3.0,3.0,original,tallying,2675.0,9006.0,18136.0,1661620.0
3,2022-11-05 15:52:31,3.0,4.0,original,PublicKeyGen,1291.0,5391.0,12255.0,807556.0
4,2022-11-05 15:52:31,3.0,4.0,original,encryptedVoteGen,5710.0,21505.0,26531.0,3291784.0
5,2022-11-05 15:52:31,3.0,4.0,original,tallying,3635.0,17597.0,21012.0,2087092.0
6,2022-11-05 15:57:01,3.0,5.0,original,PublicKeyGen,1291.0,5597.0,12720.0,807556.0
7,2022-11-05 15:57:01,3.0,5.0,original,encryptedVoteGen,5713.0,22017.0,26436.0,3293668.0
8,2022-11-05 15:57:01,3.0,5.0,original,tallying,7475.0,28863.0,26208.0,4051036.0
9,2022-11-05 16:03:04,3.0,6.0,original,PublicKeyGen,1291.0,5513.0,12455.0,807556.0


In [None]:
# read cost data for test_log.jsonl (The cost data for each test voting)
file_path = os.path.join(log_path, test_log_jsonl_filename)
log_file = open(file_path, "r")
for idx, line in enumerate(log_file.readlines()):
    # data_dict = json.load(line)  
    pass
log_file.close()
jsonObj = pd.read_json(path_or_buf=file_path, lines=True)


data_dict['test'] = jsonObj.explode(['steps'],ignore_index =True)

field_names = []
for idx,row in data_dict['test'].iterrows():
    steps_dict = copy.deepcopy(row['steps'])
    for key in steps_dict:
        if key not in field_names:
            field_names.append(key)


for col_suffix in field_names:
    data_dict['test'][f"step__{col_suffix}"] = data_dict['test'][['steps']].apply(lambda x : x['steps'].get(col_suffix), axis = 1)

In [None]:
full_run_steps = data_dict['test']['step__name'].loc[0:14].tolist()

In [None]:
data_dict['test']['vote_gen_family__params'] = data_dict['test']['vote_gen_family'] + data_dict['test']['params'].astype('str')

In [None]:
display(data_dict['test'].head(20))

Unnamed: 0,start_time,n_voters,n_options,encoding_size,vote_gen_family,params,steps,step__name,step__start_time,step__duration,step__status_ok,step__message,step__cost,vote_gen_family__params
0,2022-11-05 15:30:46.112000+00:00,3,3,4,independent,[0.5],"{'name': 'Generate Testing Data', 'start_time'...",Generate Testing Data,1667662246178,6066,True,36.0,,independent[0.5]
1,2022-11-05 15:30:46.112000+00:00,3,3,4,independent,[0.5],"{'name': 'Deploy the contracts', 'start_time':...",Deploy the contracts,1667662252248,38,True,,,independent[0.5]
2,2022-11-05 15:30:46.112000+00:00,3,3,4,independent,[0.5],"{'name': 'Set Verification keys: PublicKey', '...",Set Verification keys: PublicKey,1667662252289,104,True,,462204.0,independent[0.5]
3,2022-11-05 15:30:46.112000+00:00,3,3,4,independent,[0.5],{'name': 'Set Verification keys: EncrpytedVote...,Set Verification keys: EncrpytedVote,1667662252393,146,True,,636788.0,independent[0.5]
4,2022-11-05 15:30:46.112000+00:00,3,3,4,independent,[0.5],"{'name': 'Set Verification keys: Tallying', 's...",Set Verification keys: Tallying,1667662252539,109,True,,,independent[0.5]
5,2022-11-05 15:30:46.112000+00:00,3,3,4,independent,[0.5],{'name': 'Register public keys for elligible u...,Register public keys for elligible users excep...,1667662252651,1318,True,,707556.0,independent[0.5]
6,2022-11-05 15:30:46.112000+00:00,3,3,4,independent,[0.5],{'name': 'Throw an error if non-elligable user...,Throw an error if non-elligable user tries to ...,1667662253972,203,True,,,independent[0.5]
7,2022-11-05 15:30:46.112000+00:00,3,3,4,independent,[0.5],{'name': 'Throw an error if sender already reg...,Throw an error if sender already registered,1667662254178,696,True,,,independent[0.5]
8,2022-11-05 15:30:46.112000+00:00,3,3,4,independent,[0.5],{'name': 'Register public key of the last vote...,Register public key of the last voter,1667662254880,338,True,,346847.0,independent[0.5]
9,2022-11-05 15:30:46.112000+00:00,3,3,4,independent,[0.5],{'name': 'Throw an error if an user tries to r...,Throw an error if an user tries to register bu...,1667662255221,48,True,,,independent[0.5]


In [None]:
import matplotlib
import plotly
import plotly.express as px

In [None]:
data_dict['setup'].columns

Index(['Datetime', 'nVoters', 'nVotingOptions', 'design', 'circuit',
       'nConstraints', 't_compile', 't_proofGen', 'provingKeySize'],
      dtype='object')

# 2. Cost of Setup

In [None]:
for circuit, df in data_dict['setup'].groupby("circuit"):
    for y in ['nConstraints', 't_compile','t_proofGen', 'provingKeySize']:
        fig = px.scatter(df, x = 'nVoters', y = y, color = 'nVotingOptions', 
        opacity=0.9,color_continuous_scale='portland', title = circuit)
        fig.show()
    

# 3. Cost of Test Run

In [None]:
data_dict['test']

Unnamed: 0,start_time,n_voters,n_options,encoding_size,vote_gen_family,params,steps,step__name,step__start_time,step__duration,step__status_ok,step__message,step__cost,vote_gen_family__params
0,2022-11-05 15:30:46.112000+00:00,3,3,4,independent,[0.5],"{'name': 'Generate Testing Data', 'start_time'...",Generate Testing Data,1667662246178,6066,True,36,,independent[0.5]
1,2022-11-05 15:30:46.112000+00:00,3,3,4,independent,[0.5],"{'name': 'Deploy the contracts', 'start_time':...",Deploy the contracts,1667662252248,38,True,,,independent[0.5]
2,2022-11-05 15:30:46.112000+00:00,3,3,4,independent,[0.5],"{'name': 'Set Verification keys: PublicKey', '...",Set Verification keys: PublicKey,1667662252289,104,True,,462204.0,independent[0.5]
3,2022-11-05 15:30:46.112000+00:00,3,3,4,independent,[0.5],{'name': 'Set Verification keys: EncrpytedVote...,Set Verification keys: EncrpytedVote,1667662252393,146,True,,636788.0,independent[0.5]
4,2022-11-05 15:30:46.112000+00:00,3,3,4,independent,[0.5],"{'name': 'Set Verification keys: Tallying', 's...",Set Verification keys: Tallying,1667662252539,109,True,,,independent[0.5]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
715,2022-11-11 20:07:28.546000+00:00,4,7,4,independent,[0.5],"{'name': 'Cast valid encrypted votes', 'start_...",Cast valid encrypted votes,1668197267872,3398,True,,1583101.0,independent[0.5]
716,2022-11-11 20:07:28.546000+00:00,4,7,4,independent,[0.5],{'name': 'Throw an error if elligable user pro...,Throw an error if elligable user provides inva...,1668197271276,689,True,,,independent[0.5]
717,2022-11-11 20:07:28.546000+00:00,4,7,4,independent,[0.5],"{'name': 'Malicious Administrator', 'start_tim...",Malicious Administrator,1668197271988,3347,True,,,independent[0.5]
718,2022-11-11 20:07:28.546000+00:00,4,7,4,independent,[0.5],"{'name': 'Honest Administrator', 'start_time':...",Honest Administrator,1668197275363,1961,True,,358019.0,independent[0.5]


In [None]:
for (nVoters, step__name) , df in data_dict['test'].groupby(["n_voters","step__name"], as_index = False):
    for y in ["step__duration","step__cost"]:
        if not df[y].isna().all():
            df_temp = df[[y,'n_options']].groupby("n_options", as_index=False).mean()
            fig = px.bar(df_temp, x = 'n_options', y = y, title = f"{nVoters}_{step__name}")
            fig.show()
    

In [None]:
for (nOptions, step__name) , df in data_dict['test'].groupby(["n_options","step__name"], as_index = False):
    for y in ["step__duration","step__cost"]:
        if not df[y].isna().all():
            df_temp = df[[y,'n_voters']].groupby("n_voters", as_index=False).mean()
            fig = px.bar(df_temp, x = 'n_voters', y = y, title = f"{nOptions}_{step__name}")
            fig.show()
    