In [1]:
import pandas as pd
import numpy as np
import datetime as dt
import os
import re
import json
import copy

# 1. Data Preparation

In [16]:
# define path for both log files
setup_csv_filename = "setup_log.csv"
test_log_jsonl_filename = "test_log.jsonl"
log_path = os.path.abspath(r"/content/log")
data_dict = dict()

In [17]:
# read cost data for setup.sh
file_path = os.path.join(log_path, setup_csv_filename)
log_file = open(file_path, "r")
data = []
for idx, line in enumerate(log_file.readlines()):
    if idx == 0 :
        cols = line.split(", ")
        cols[-1] = cols[-1][:-1]
    else:
        data_line = line.split(", ")
        data_line[-1] = data_line[-1][:-1]
        data.append(data_line)   
log_file.close()
data_dict['setup'] = pd.DataFrame(data, columns = cols)
data_dict['setup']

Unnamed: 0,Datetime,nVoters,nVotingOptions,design,citcuit,nConstraints,t_compile,t_proofGen,provingKeySize
0,20221105102648,3,3,original,PublicKeyGen,1291,4624,12632,807556
1,20221105102648,3,3,original,encryptedVoteGen,5707,16655,25515,3289900
2,20221105102648,3,3,original,tallying,2675,7713,17683,1661620
3,20221105103035,4,3,original,PublicKeyGen,1291,4331,11972,807556
4,20221105103035,4,3,original,encryptedVoteGen,6249,16591,27818,3584788
5,20221105103035,4,3,original,tallying,3200,9032,19926,1949288
6,20221105103447,5,3,original,PublicKeyGen,1291,3965,11898,807556
7,20221105103447,5,3,original,encryptedVoteGen,6804,18081,29921,3886960
8,20221105103447,5,3,original,tallying,5965,16489,26660,3491780
9,20221105103907,6,3,original,PublicKeyGen,1291,3986,11558,807556


In [18]:
# Data Preprocessing
data_dict['setup']['Datetime'] = pd.to_datetime(data_dict['setup']['Datetime'], format = "%Y%m%d%H%M%S")
data_dict['setup']['citcuit']  = pd.Categorical(data_dict['setup']['citcuit'], 
                                                categories=['PublicKeyGen',
                                                           'encryptedVoteGen',
                                                           'tallying'], 
                                                ordered=True)
for column in ['nVoters', 'nVotingOptions', 'nConstraints','t_compile','t_proofGen','provingKeySize']:
    data_dict['setup'][column] = data_dict['setup'][column].astype('float')
data_dict['setup'] = data_dict['setup'].rename({'citcuit':'circuit'}, axis = 1)
data_dict['setup'] = data_dict['setup'].sort_values(['nVoters','nVotingOptions','Datetime','circuit'])
data_dict['setup'] = data_dict['setup'].drop_duplicates(ignore_index = True)

In [None]:
data_dict['setup']

Unnamed: 0,Datetime,nVoters,design,circuit,nConstraints,t_compile,t_proofGen,provingKeySize
0,2022-10-23 05:10:39,4.0,original,PublicKeyGen,1291.0,7663.0,29809.0,807556.0
1,2022-10-23 05:10:39,4.0,original,encryptedVoteGen,4723.0,25446.0,42921.0,2861092.0
2,2022-10-23 05:10:39,4.0,original,tallying,2900.0,13423.0,33628.0,1816328.0
3,2022-10-23 06:59:36,100.0,original,PublicKeyGen,1291.0,8232.0,26969.0,807556.0
4,2022-10-23 06:59:36,100.0,original,encryptedVoteGen,57755.0,150248.0,233339.0,35398044.0
5,2022-10-23 06:59:36,100.0,original,tallying,53780.0,140391.0,229771.0,33577352.0
6,2022-10-25 23:51:40,100.0,original,PublicKeyGen,1291.0,5959.0,29518.0,807556.0
7,2022-10-25 23:51:40,100.0,original,encryptedVoteGen,57755.0,172499.0,239465.0,35398044.0
8,2022-10-25 23:51:40,100.0,original,tallying,53780.0,136876.0,234274.0,33577352.0
9,2022-10-26 15:38:36,100.0,original,PublicKeyGen,1291.0,5256.0,22303.0,807556.0


In [19]:
# read cost data for test_log.jsonl (The cost data for each test voting)
file_path = os.path.join(log_path, test_log_jsonl_filename)
log_file = open(file_path, "r")
for idx, line in enumerate(log_file.readlines()):
    # data_dict = json.load(line)  
    pass
log_file.close()
jsonObj = pd.read_json(path_or_buf=file_path, lines=True)


data_dict['test'] = jsonObj.explode(['steps'],ignore_index =True)

field_names = []
for idx,row in data_dict['test'].iterrows():
    steps_dict = copy.deepcopy(row['steps'])
    for key in steps_dict:
        if key not in field_names:
            field_names.append(key)


for col_suffix in field_names:
    data_dict['test'][f"step__{col_suffix}"] = data_dict['test'][['steps']].apply(lambda x : x['steps'].get(col_suffix), axis = 1)

In [20]:
full_run_steps = data_dict['test']['step__name'].loc[0:14].tolist()

In [21]:
data_dict['test']['vote_gen_family__params'] = data_dict['test']['vote_gen_family'] + data_dict['test']['params'].astype('str')

In [22]:
display(data_dict['test'].head(20))

Unnamed: 0,start_time,n_voters,n_options,encoding_size,vote_gen_family,params,steps,step__name,step__start_time,step__duration,step__status_ok,step__message,step__cost,vote_gen_family__params
0,2022-11-05 10:27:09.391000+00:00,3,3,4,independent,[0.5],"{'name': 'Generate Testing Data', 'start_time'...",Generate Testing Data,1667644029491,6320,True,9.0,,independent[0.5]
1,2022-11-05 10:27:09.391000+00:00,3,3,4,independent,[0.5],"{'name': 'Deploy the contracts', 'start_time':...",Deploy the contracts,1667644035822,65,True,,,independent[0.5]
2,2022-11-05 10:27:09.391000+00:00,3,3,4,independent,[0.5],"{'name': 'Set Verification keys: PublicKey', '...",Set Verification keys: PublicKey,1667644035890,139,True,,462204.0,independent[0.5]
3,2022-11-05 10:27:09.391000+00:00,3,3,4,independent,[0.5],{'name': 'Set Verification keys: EncrpytedVote...,Set Verification keys: EncrpytedVote,1667644036029,136,True,,636812.0,independent[0.5]
4,2022-11-05 10:27:09.391000+00:00,3,3,4,independent,[0.5],"{'name': 'Set Verification keys: Tallying', 's...",Set Verification keys: Tallying,1667644036165,132,True,,,independent[0.5]
5,2022-11-05 10:27:09.391000+00:00,3,3,4,independent,[0.5],{'name': 'Register public keys for elligible u...,Register public keys for elligible users excep...,1667644036300,1542,True,,707554.0,independent[0.5]
6,2022-11-05 10:27:09.391000+00:00,3,3,4,independent,[0.5],{'name': 'Throw an error if non-elligable user...,Throw an error if non-elligable user tries to ...,1667644037855,249,True,,,independent[0.5]
7,2022-11-05 10:27:09.391000+00:00,3,3,4,independent,[0.5],{'name': 'Throw an error if sender already reg...,Throw an error if sender already registered,1667644038112,679,True,,,independent[0.5]
8,2022-11-05 10:27:09.391000+00:00,3,3,4,independent,[0.5],{'name': 'Register public key of the last vote...,Register public key of the last voter,1667644038806,344,True,,346835.0,independent[0.5]
9,2022-11-05 10:27:09.391000+00:00,3,3,4,independent,[0.5],{'name': 'Throw an error if an user tries to r...,Throw an error if an user tries to register bu...,1667644039153,50,True,,,independent[0.5]


In [23]:
import matplotlib
import plotly
import plotly.express as px

In [24]:
data_dict['setup'].columns

Index(['Datetime', 'nVoters', 'nVotingOptions', 'design', 'circuit',
       'nConstraints', 't_compile', 't_proofGen', 'provingKeySize'],
      dtype='object')

# 2. Cost of Setup

In [39]:
for circuit, df in data_dict['setup'].groupby("circuit"):
    for y in ['nConstraints', 't_compile','t_proofGen', 'provingKeySize']:
        fig = px.scatter(df, x = 'nVoters', y = y, color = 'nVotingOptions', 
        opacity=0.9,color_continuous_scale='portland', title = circuit)
        fig.show()
    

# 3. Cost of Test Run

In [26]:
data_dict['test']

Unnamed: 0,start_time,n_voters,n_options,encoding_size,vote_gen_family,params,steps,step__name,step__start_time,step__duration,step__status_ok,step__message,step__cost,vote_gen_family__params
0,2022-11-05 10:27:09.391000+00:00,3,3,4,independent,[0.5],"{'name': 'Generate Testing Data', 'start_time'...",Generate Testing Data,1667644029491,6320,True,9,,independent[0.5]
1,2022-11-05 10:27:09.391000+00:00,3,3,4,independent,[0.5],"{'name': 'Deploy the contracts', 'start_time':...",Deploy the contracts,1667644035822,65,True,,,independent[0.5]
2,2022-11-05 10:27:09.391000+00:00,3,3,4,independent,[0.5],"{'name': 'Set Verification keys: PublicKey', '...",Set Verification keys: PublicKey,1667644035890,139,True,,462204.0,independent[0.5]
3,2022-11-05 10:27:09.391000+00:00,3,3,4,independent,[0.5],{'name': 'Set Verification keys: EncrpytedVote...,Set Verification keys: EncrpytedVote,1667644036029,136,True,,636812.0,independent[0.5]
4,2022-11-05 10:27:09.391000+00:00,3,3,4,independent,[0.5],"{'name': 'Set Verification keys: Tallying', 's...",Set Verification keys: Tallying,1667644036165,132,True,,,independent[0.5]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
400,2022-11-05 11:09:38.260000+00:00,3,7,4,independent,[0.5],"{'name': 'Cast valid encrypted votes', 'start_...",Cast valid encrypted votes,1667646599241,3821,True,,1137256.0,independent[0.5]
401,2022-11-05 11:09:38.260000+00:00,3,7,4,independent,[0.5],{'name': 'Throw an error if elligable user pro...,Throw an error if elligable user provides inva...,1667646603080,669,True,,,independent[0.5]
402,2022-11-05 11:09:38.260000+00:00,3,7,4,independent,[0.5],"{'name': 'Malicious Administrator', 'start_tim...",Malicious Administrator,1667646603764,4543,True,,,independent[0.5]
403,2022-11-05 11:09:38.260000+00:00,3,7,4,independent,[0.5],"{'name': 'Honest Administrator', 'start_time':...",Honest Administrator,1667646608338,4669,True,,342960.0,independent[0.5]


In [31]:
for (nVoters, step__name) , df in data_dict['test'].groupby(["n_voters","step__name"], as_index = False):
    for y in ["step__duration","step__cost"]:
        if not df[y].isna().all():
            df_temp = df[[y,'n_options']].groupby("n_options", as_index=False).mean()
            fig = px.bar(df_temp, x = 'n_options', y = y, title = f"{nVoters}_{step__name}")
            fig.show()
    

In [32]:
for (nOptions, step__name) , df in data_dict['test'].groupby(["n_options","step__name"], as_index = False):
    for y in ["step__duration","step__cost"]:
        if not df[y].isna().all():
            df_temp = df[[y,'n_voters']].groupby("n_voters", as_index=False).mean()
            fig = px.bar(df_temp, x = 'n_voters', y = y, title = f"{nVoters}_{step__name}")
            fig.show()
    