In [1]:
import pandas as pd
import numpy as np
import datetime as dt
import os
import re
import json
import copy

# 1. Data Preparation

In [17]:
# define path for both log files
setup_csv_filename = "setup_log.csv"
test_log_jsonl_filename = "test_log.jsonl"
log_path = os.path.abspath(r"./TestingBatchRunData/log")
data_dict = dict()

In [18]:
# read cost data for setup.sh
file_path = os.path.join(log_path, setup_csv_filename)
log_file = open(file_path, "r")
data = []
for idx, line in enumerate(log_file.readlines()):
    if idx == 0 :
        cols = line.split(", ")
        cols[-1] = cols[-1][:-1]
    else:
        data_line = line.split(", ")
        data_line[-1] = data_line[-1][:-1]
        data.append(data_line)   
log_file.close()
data_dict['setup'] = pd.DataFrame(data, columns = cols)

In [19]:
# Data Preprocessing
data_dict['setup']['Datetime'] = pd.to_datetime(data_dict['setup']['Datetime'], format = "%Y%m%d%H%M%S")
data_dict['setup']['citcuit']  = pd.Categorical(data_dict['setup']['citcuit'], 
                                                categories=['PublicKeyGen',
                                                           'encryptedVoteGen',
                                                           'tallying'], 
                                                ordered=True)
for column in ['nVoters','nConstraints','t_compile','t_proofGen','provingKeySize']:
    data_dict['setup'][column] = data_dict['setup'][column].astype('float')
data_dict['setup'] = data_dict['setup'].rename({'citcuit':'circuit'}, axis = 1)
data_dict['setup'] = data_dict['setup'].sort_values(['nVoters','Datetime','circuit'])
data_dict['setup'] = data_dict['setup'].drop_duplicates(ignore_index = True)

In [21]:
data_dict['setup']

Unnamed: 0,Datetime,nVoters,design,circuit,nConstraints,t_compile,t_proofGen,provingKeySize
0,2022-10-23 05:10:39,4.0,original,PublicKeyGen,1291.0,7663.0,29809.0,807556.0
1,2022-10-23 05:10:39,4.0,original,encryptedVoteGen,4723.0,25446.0,42921.0,2861092.0
2,2022-10-23 05:10:39,4.0,original,tallying,2900.0,13423.0,33628.0,1816328.0
3,2022-10-23 06:59:36,100.0,original,PublicKeyGen,1291.0,8232.0,26969.0,807556.0
4,2022-10-23 06:59:36,100.0,original,encryptedVoteGen,57755.0,150248.0,233339.0,35398044.0
5,2022-10-23 06:59:36,100.0,original,tallying,53780.0,140391.0,229771.0,33577352.0
6,2022-10-25 23:51:40,100.0,original,PublicKeyGen,1291.0,5959.0,29518.0,807556.0
7,2022-10-25 23:51:40,100.0,original,encryptedVoteGen,57755.0,172499.0,239465.0,35398044.0
8,2022-10-25 23:51:40,100.0,original,tallying,53780.0,136876.0,234274.0,33577352.0
9,2022-10-26 15:38:36,100.0,original,PublicKeyGen,1291.0,5256.0,22303.0,807556.0


In [43]:
# read cost data for test_log.jsonl (The cost data for each test voting)
file_path = os.path.join(log_path, test_log_jsonl_filename)
log_file = open(file_path, "r")
for idx, line in enumerate(log_file.readlines()):
    # data_dict = json.load(line)  
    pass
log_file.close()
jsonObj = pd.read_json(path_or_buf=file_path, lines=True)


data_dict['test'] = jsonObj.explode(['steps'],ignore_index =True)

field_names = []
for idx,row in data_dict['test'].iterrows():
    steps_dict = copy.deepcopy(row['steps'])
    for key in steps_dict:
        if key not in field_names:
            field_names.append(key)


for col_suffix in field_names:
    data_dict['test'][f"step__{col_suffix}"] = data_dict['test'][['steps']].apply(lambda x : x['steps'].get(col_suffix), axis = 1)

In [44]:
full_run_steps = data_dict['test']['step__name'].loc[0:14].tolist()

In [45]:
data_dict['test']['vote_gen_family__params'] = data_dict['test']['vote_gen_family'] + data_dict['test']['params'].astype('str')

In [46]:
display(data_dict['test'].head(20))

Unnamed: 0,start_time,n_voters,vote_gen_family,params,steps,step__name,step__start_time,step__duration,step__status_ok,step__message,step__cost,vote_gen_family__params
0,2022-10-23 05:11:28.083000+00:00,4,independent,[0.5],"{'name': 'Generate Testing Data', 'start_time'...",Generate Testing Data,1666501888290,14424,True,,,independent[0.5]
1,2022-10-23 05:11:28.083000+00:00,4,independent,[0.5],"{'name': 'Deploy the contracts', 'start_time':...",Deploy the contracts,1666501902727,69,True,,,independent[0.5]
2,2022-10-23 05:11:28.083000+00:00,4,independent,[0.5],"{'name': 'Set Verification keys: PublicKey', '...",Set Verification keys: PublicKey,1666501902819,233,True,,462216.0,independent[0.5]
3,2022-10-23 05:11:28.083000+00:00,4,independent,[0.5],{'name': 'Set Verification keys: EncrpytedVote...,Set Verification keys: EncrpytedVote,1666501903052,252,True,,680479.0,independent[0.5]
4,2022-10-23 05:11:28.083000+00:00,4,independent,[0.5],"{'name': 'Set Verification keys: Tallying', 's...",Set Verification keys: Tallying,1666501903304,285,True,,,independent[0.5]
5,2022-10-23 05:11:28.083000+00:00,4,independent,[0.5],{'name': 'Register public keys for elligible u...,Register public keys for elligible users excep...,1666501903608,4541,True,,1050159.0,independent[0.5]
6,2022-10-23 05:11:28.083000+00:00,4,independent,[0.5],{'name': 'Throw an error if non-elligable user...,Throw an error if non-elligable user tries to ...,1666501908178,462,True,,,independent[0.5]
7,2022-10-23 05:11:28.083000+00:00,4,independent,[0.5],{'name': 'Throw an error if elligable user pro...,Throw an error if elligable user provides inva...,1666501908675,880,False,error in verifying invalid user,,independent[0.5]
8,2022-10-23 05:11:28.083000+00:00,4,independent,[0.5],{'name': 'Register public key of the last vote...,Register public key of the last voter,1666501909577,878,True,,345057.0,independent[0.5]
9,2022-10-23 05:11:28.083000+00:00,4,independent,[0.5],{'name': 'Throw an error if an user tries to r...,Throw an error if an user tries to register bu...,1666501910505,99,True,,,independent[0.5]


In [47]:
import matplotlib
import plotly
import plotly.express as px

In [48]:
data_dict['setup'].columns

Index(['Datetime', 'nVoters', 'design', 'circuit', 'nConstraints', 't_compile',
       't_proofGen', 'provingKeySize'],
      dtype='object')

# 2. Cost of Setup

In [49]:
for circuit, df in data_dict['setup'].groupby("circuit"):
    for y in ['nConstraints', 't_compile','t_proofGen', 'provingKeySize']:
        fig = px.scatter(df, x = 'nVoters', y = y, title = circuit)
        fig.show()
    

# 3. Cost of Test Run

In [50]:
data_dict['test']

Unnamed: 0,start_time,n_voters,vote_gen_family,params,steps,step__name,step__start_time,step__duration,step__status_ok,step__message,step__cost,vote_gen_family__params
0,2022-10-23 05:11:28.083000+00:00,4,independent,[0.5],"{'name': 'Generate Testing Data', 'start_time'...",Generate Testing Data,1666501888290,14424,True,,,independent[0.5]
1,2022-10-23 05:11:28.083000+00:00,4,independent,[0.5],"{'name': 'Deploy the contracts', 'start_time':...",Deploy the contracts,1666501902727,69,True,,,independent[0.5]
2,2022-10-23 05:11:28.083000+00:00,4,independent,[0.5],"{'name': 'Set Verification keys: PublicKey', '...",Set Verification keys: PublicKey,1666501902819,233,True,,462216.0,independent[0.5]
3,2022-10-23 05:11:28.083000+00:00,4,independent,[0.5],{'name': 'Set Verification keys: EncrpytedVote...,Set Verification keys: EncrpytedVote,1666501903052,252,True,,680479.0,independent[0.5]
4,2022-10-23 05:11:28.083000+00:00,4,independent,[0.5],"{'name': 'Set Verification keys: Tallying', 's...",Set Verification keys: Tallying,1666501903304,285,True,,,independent[0.5]
...,...,...,...,...,...,...,...,...,...,...,...,...
2718,2022-10-29 17:36:45.909000+00:00,300,independent,[0.01],{'name': 'Throw an error if elligable user pro...,Throw an error if elligable user provides inva...,1667091414352,122,False,error in verifying invalid user,,independent[0.01]
2719,2022-10-29 17:36:45.909000+00:00,300,independent,[0.01],{'name': 'Throw an error if an user tries to r...,Throw an error if an user tries to register bu...,1667091414605,101,False,error in verifying max number of voters,,independent[0.01]
2720,2022-10-29 17:36:45.909000+00:00,300,independent,[0.01],{'name': 'Throw an error if elligable user pro...,Throw an error if elligable user provides inva...,1667091431240,14462,True,,,independent[0.01]
2721,2022-10-29 17:36:45.909000+00:00,300,independent,[0.01],"{'name': 'Malicious Administrator', 'start_tim...",Malicious Administrator,1667091445922,10323,True,,,independent[0.01]


In [52]:
for (nVoters, step__name) , df in data_dict['test'].groupby(["n_voters","step__name"], as_index = False):
    for y in ["step__duration","step__cost"]:
        if not df[y].isna().all():
            df_temp = df[[y,'vote_gen_family__params']].groupby("vote_gen_family__params", as_index=False).mean()
            fig = px.bar(df_temp, x = 'vote_gen_family__params', y = y, title = f"{nVoters}_{step__name}")
            fig.show()
    