In [169]:
import pandas as pd
import numpy as np

from influxdb_client import InfluxDBClient, Point, Dialect

import re
import time
import datetime

import warnings
from influxdb_client.client.warnings import MissingPivotFunction

import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import matplotlib.colors as colors

import pandasql as ps
import sqlite3

import csv

import warnings 
from influxdb_client.client.warnings import MissingPivotFunction
warnings.simplefilter("ignore", MissingPivotFunction)

import gc

pd.set_option('display.max_rows', 500)

In [170]:
def _parse_line(line):

    rx_dict = {
    'token': re.compile(r'var token = "(?P<token>.*)"\n'),
    'url': re.compile(r'var url = "(?P<url>.*)"\n'),
    'org': re.compile(r'var org = "(?P<org>.*)"\n'),
    'bucket': re.compile(r'var bucket = "(?P<bucket>.*)"\n'),
    }   

    """
    Do a regex search against all defined regexes and
    return the key and match result of the first matching regex

    """
    for key, rx in rx_dict.items():
        match = rx.search(line)
        if match:
            return key, match
    # if there are no matches
    return None, None

filepath = '/root/flexi-pipe/config.go'
# open the file and read through it line by line
with open(filepath, 'r') as file_object:
    line = file_object.readline()
    while line:
        # at each line check for a match with a regex
        key, match = _parse_line(line)

        if key == 'token':
            token = match.group('token')
        elif key == 'url':
            url = match.group('url')
        elif key == 'org':
            org = match.group('org')
        elif key == 'bucket':
            bucket = match.group('bucket')
        
        line = file_object.readline()
# url="http://192.168.20.58:8086"
url = "http://localhost:8086"

In [171]:
start = 1696445523
end = 1696710919

In [172]:
def experiment(start_time, end_time, filepath):
    # Retrieve experiments data from csv
    data = pd.read_csv(filepath, header=None)
    df = pd.DataFrame(data)

    #Rename columns
    experiments = df.rename(columns={0: "start", 1: "end", 2: "topology", 3: "runtime", 4: "parameter", 5: "d", 6: "dlo", 7: "dhi", 8: "dscore", 9: "dlazy", 10: "dout", 11: "gossipFactor", 12: "initialDelay", 13: "interval"}, errors='raise')

    #Correct timestamp
    experiments["start"] = experiments["start"].str.slice(0, 27)
    experiments["end"] = experiments["end"].str.slice(0, 27)

    #String to timestamp
    experiments['startUnix'] = pd.to_datetime(experiments["start"],format="mixed").astype('int64') / 10**9
    experiments['endUnix'] = pd.to_datetime(experiments["end"],format="mixed").astype('int64') / 10**9


    experiments['startUnix'] = pd.to_timedelta(experiments['startUnix'], unit='s').dt.total_seconds().astype(int)#.astype(str)
    experiments['endUnix'] = pd.to_timedelta(experiments['endUnix'], unit='s').dt.total_seconds().astype(int)#.astype(str)

    #Drop fields we don't mneed for the moment
    exp = experiments.drop(columns=["runtime", "initialDelay"]).sort_values(by=["start"])

    expTime = exp[exp['startUnix'].astype(int).between(start_time, end_time)]
    # expTime['experiment'] = expTime.index
    expTime = expTime.reset_index().rename({'index':'experiment'}, axis = 'columns')

    return expTime

experiments = experiment(start, end, '../experiments.csv')
experiments.tail(500)

Unnamed: 0,experiment,start,end,topology,parameter,d,dlo,dhi,dscore,dlazy,dout,gossipFactor,interval,startUnix,endUnix
0,390,2023-10-04 18:52:03.0807730,2023-10-04 19:22:03.0866518,unl,reference,8,6,12,4,8,2,0.25,1.0,1696445523,1696447323
1,391,2023-10-04 19:25:37.1533399,2023-10-04 19:55:37.1555856,unl,reference,8,6,12,4,8,2,0.25,1.0,1696447537,1696449337
2,392,2023-10-04 19:59:12.6589825,2023-10-04 20:29:12.6608307,unl,reference,8,6,12,4,8,2,0.25,1.0,1696449552,1696451352
3,393,2023-10-04 20:32:47.3098616,2023-10-04 21:02:47.3149178,unl,informed,8,6,12,4,8,2,0.25,3.0,1696451567,1696453367
4,394,2023-10-04 21:06:22.7755634,2023-10-04 21:36:22.7788800,unl,informed,8,6,12,4,8,2,0.25,3.0,1696453582,1696455382
5,395,2023-10-04 21:39:58.6528165,2023-10-04 22:09:58.6545536,unl,informed,8,6,12,4,8,2,0.25,3.0,1696455598,1696457398
6,396,2023-10-04 22:13:33.8146704,2023-10-04 22:43:33.8174094,unl,informed0,16,8,20,16,8,4,0.5,1.0,1696457613,1696459413
7,397,2023-10-04 22:47:09.3837110,2023-10-04 23:17:09.3868442,unl,informed0,16,8,20,16,8,4,0.5,1.0,1696459629,1696461429
8,398,2023-10-04 23:20:44.6001719,2023-10-04 23:50:44.6033804,unl,informed0,16,8,20,16,8,4,0.5,1.0,1696461644,1696463444
9,399,2023-10-04 23:54:20.0569526,2023-10-05 00:24:20.0600345,unl,informed1,16,8,20,16,8,4,0.5,3.0,1696463660,1696465460


In [173]:
def from_influx(url, token, org, measurement, start_time, end_time,grouping_key):
    client = InfluxDBClient(url=url, token=token, org=org,  timeout=900_000)

    # write_api = client.write_api(write_options=SYNCHRONOUS)
    query_api = client.query_api()

    data_frame = query_api.query_data_frame('from(bucket: "gs") '
                                        ' |> range(start: '+str(start_time)+', stop:'+str(end_time)+') '
                                        ' |> filter(fn: (r) => r._measurement == "'+measurement+'") '
                                        ' |> group(columns: ["_measurement", "_field", "_tag"], mode: "by") '
                                        ' |> pivot(rowKey:["_time", "node"], columnKey: ["_field"], valueColumn: "_value")')
    client.close()

    # df = data_frame.drop(columns=['result', 'table','_start', '_stop', '_measurement', 'topic', 'receivedFrom']).sort_values(by=["_time"]).reset_index(drop=True)
    data_frame.reset_index(inplace=True)
    df = data_frame[['_time', "ledger", "node"]].sort_values(by=["_time"]).reset_index(drop=True)
    df["_time"] = pd.to_datetime(df["_time"])

    return df

In [174]:
def from_influx_count(url, token, org, start_time, end_time,grouping_key):
    # start_time = 1693222601 
    # end_time = 1693228430    
    client = InfluxDBClient(url=url, token=token, org=org,  timeout=900_000)
        
        # write_api = client.write_api(write_options=SYNCHRONOUS)
    query_api = client.query_api()
        
    data_frame = query_api.query_data_frame('from(bucket: "gs") '
                       ' |> range(start: '+str(start_time)+', stop:'+str(end_time)+') '
                        '|> filter(fn: (r) => r._measurement == "deliverMessage") '
                        '|> group(columns: ["_measurement", "_field"], mode: "by") '
                        '|> count()')
    client.close()

    if data_frame.empty:
        count = 0
    else:
        count = data_frame["_value"].min().astype(int)
    return count



In [175]:
#Validate data
validate = pd.DataFrame()

for index, row in experiments.iterrows():
    count = from_influx_count(url, token, org, row["startUnix"], row["endUnix"],"_measurement")
    d = {'experiment': [row['experiment']], 'count': [count], 'topology': [row["topology"]], 'd': [row["d"]],'dhi': [row["dhi"]],'dlo': [row["dlo"]],'dlazy': [row["dlazy"]],
        'dscore': [row["dscore"]],'dout': [row["dout"]],'gossipFactor': [row["gossipFactor"]],'interval': [row["interval"]],}
    aux = pd.DataFrame(data=d)
    validate = pd.concat([validate, aux])

# validate.head(200)

exps = experiments.merge(validate, on=['experiment', 'topology','d','dhi','dlo','dlazy','dscore','dout','gossipFactor','interval'])
exps = exps.loc[exps["count"]>1000]
exps.to_csv('exp_filtered.csv')
exps.head(100)

Unnamed: 0,experiment,start,end,topology,parameter,d,dlo,dhi,dscore,dlazy,dout,gossipFactor,interval,startUnix,endUnix,count
0,390,2023-10-04 18:52:03.0807730,2023-10-04 19:22:03.0866518,unl,reference,8,6,12,4,8,2,0.25,1.0,1696445523,1696447323,344487
1,391,2023-10-04 19:25:37.1533399,2023-10-04 19:55:37.1555856,unl,reference,8,6,12,4,8,2,0.25,1.0,1696447537,1696449337,244016
2,392,2023-10-04 19:59:12.6589825,2023-10-04 20:29:12.6608307,unl,reference,8,6,12,4,8,2,0.25,1.0,1696449552,1696451352,130775
3,393,2023-10-04 20:32:47.3098616,2023-10-04 21:02:47.3149178,unl,informed,8,6,12,4,8,2,0.25,3.0,1696451567,1696453367,194668
4,394,2023-10-04 21:06:22.7755634,2023-10-04 21:36:22.7788800,unl,informed,8,6,12,4,8,2,0.25,3.0,1696453582,1696455382,165538
5,395,2023-10-04 21:39:58.6528165,2023-10-04 22:09:58.6545536,unl,informed,8,6,12,4,8,2,0.25,3.0,1696455598,1696457398,64772
6,396,2023-10-04 22:13:33.8146704,2023-10-04 22:43:33.8174094,unl,informed0,16,8,20,16,8,4,0.5,1.0,1696457613,1696459413,141092
7,397,2023-10-04 22:47:09.3837110,2023-10-04 23:17:09.3868442,unl,informed0,16,8,20,16,8,4,0.5,1.0,1696459629,1696461429,125279
8,398,2023-10-04 23:20:44.6001719,2023-10-04 23:50:44.6033804,unl,informed0,16,8,20,16,8,4,0.5,1.0,1696461644,1696463444,12799
11,401,2023-10-05 01:01:29.7404689,2023-10-05 01:31:29.7447830,unl,informed1,16,8,20,16,8,4,0.5,3.0,1696467689,1696469489,24429


In [176]:
#Remove fault executions

#get median
median = exps.drop(columns=['start','end','startUnix','endUnix', 'parameter', 'experiment']).drop_duplicates()
median = median.groupby(['topology','d','dhi','dlo','dlazy','dscore','dout','gossipFactor','interval']).agg({'count':['median', 'std', 'mean']})
median.columns = median.columns.droplevel(0)
median.reset_index(inplace=True)
median = median.loc[median['median'] > 0]

median.head(50)

Unnamed: 0,topology,d,dhi,dlo,dlazy,dscore,dout,gossipFactor,interval,median,std,mean
0,general,6,8,4,2,2,2,0.25,1.0,67562.0,43190.754964,61750.333333
1,general,6,8,4,2,2,4,0.25,0.5,38578.0,4038.273063,40861.333333
2,general,6,8,4,2,2,4,4.25,3.0,6168.0,,6168.0
3,general,8,12,6,8,4,2,0.25,1.0,146083.0,18854.905498,144964.333333
4,general,8,12,6,8,4,2,0.25,3.0,126305.0,13436.713003,129080.333333
5,general,8,16,6,8,4,2,0.25,0.25,12154.0,8620.271999,12467.333333
6,general,8,16,6,8,4,2,0.25,1.0,129363.0,72481.597674,103803.666667
7,general,10,16,8,8,4,4,0.25,1.0,54994.0,21867.153366,42821.666667
8,general,16,20,8,8,4,2,0.25,0.5,1360.0,,1360.0
9,general,16,20,8,8,4,2,0.25,1.0,90864.0,24190.730711,89020.666667


In [97]:
#Validate the data

validation = exps.merge(median, on=['topology','d','dhi','dlo','dlazy','dscore','dout','gossipFactor','interval'])
validated = validation[(validation['count'] >= validation['mean']-(0.15*validation['std']))]
validated = validated.drop(columns=['startUnix', 'endUnix'])
validated.dropna()

validated.head(100)

Unnamed: 0,experiment,start,end,topology,parameter,d,dlo,dhi,dscore,dlazy,dout,gossipFactor,interval,count,median,std,mean
0,390,2023-10-04 18:52:03.0807730,2023-10-04 19:22:03.0866518,unl,reference,8,6,12,4,8,2,0.25,1.0,344487,244016.0,106919.568575,239759.333333
1,391,2023-10-04 19:25:37.1533399,2023-10-04 19:55:37.1555856,unl,reference,8,6,12,4,8,2,0.25,1.0,244016,244016.0,106919.568575,239759.333333
3,393,2023-10-04 20:32:47.3098616,2023-10-04 21:02:47.3149178,unl,informed,8,6,12,4,8,2,0.25,3.0,194668,165538.0,68160.73463,141659.333333
4,394,2023-10-04 21:06:22.7755634,2023-10-04 21:36:22.7788800,unl,informed,8,6,12,4,8,2,0.25,3.0,165538,165538.0,68160.73463,141659.333333
6,396,2023-10-04 22:13:33.8146704,2023-10-04 22:43:33.8174094,unl,informed0,16,8,20,16,8,4,0.5,1.0,141092,125279.0,69953.431198,93056.666667
7,397,2023-10-04 22:47:09.3837110,2023-10-04 23:17:09.3868442,unl,informed0,16,8,20,16,8,4,0.5,1.0,125279,125279.0,69953.431198,93056.666667
12,404,2023-10-05 02:42:12.1044873,2023-10-05 03:12:12.1564872,unl,informed2,16,8,20,16,8,4,0.5,0.5,156688,108285.0,34060.042166,118649.666667
13,405,2023-10-05 03:15:44.6418112,2023-10-05 03:45:44.6440687,unl,informed3,6,4,8,2,2,2,0.25,1.0,307237,307237.0,158819.787177,239980.0
14,406,2023-10-05 03:49:18.1204295,2023-10-05 04:19:18.1229836,unl,informed3,6,4,8,2,2,2,0.25,1.0,354105,307237.0,158819.787177,239980.0
18,412,2023-10-05 07:10:41.3942831,2023-10-05 07:40:41.4330557,unl,informed5,6,4,8,2,2,4,0.25,0.5,129844,129844.0,40185.142379,118570.0


In [100]:
#experiment,n_nodes,topology,d,dlo,dhi,dscore,dlazy,dout,gossipFactor,interval,start,end,overhead,propTime,bandwidth,totalMessages,totalrpc

final = validated.drop(columns=['experiment', 'start','end', 'parameter', 'count', 'std', 'median', 'mean']).drop_duplicates()
final['n_nodes'] = 24
final = final.reset_index().rename({'index':'identifier'}, axis = 'columns')
final.head(100)
 

Unnamed: 0,identifier,topology,d,dlo,dhi,dscore,dlazy,dout,gossipFactor,interval,n_nodes
0,0,unl,8,6,12,4,8,2,0.25,1.0,24
1,3,unl,8,6,12,4,8,2,0.25,3.0,24
2,6,unl,16,8,20,16,8,4,0.5,1.0,24
3,12,unl,16,8,20,16,8,4,0.5,0.5,24
4,13,unl,6,4,8,2,2,2,0.25,1.0,24
5,18,unl,6,4,8,2,2,4,0.25,0.5,24
6,21,unl,20,16,24,8,16,8,0.5,1.0,24
7,23,unl,16,8,20,4,8,2,0.5,1.0,24
8,27,unl,16,8,20,8,8,4,0.25,1.0,24
9,29,unl,16,8,20,4,8,2,0.25,1.0,24


In [110]:
#Limit extraction to 15 minutes

experiments = validated.drop(columns=['count','median', 'mean','std']).rename(columns={'start':'originalStart','end':'originalEnd'})

experiments['originalStart'] = pd.to_datetime(experiments["originalStart"], format='mixed')
experiments['originalEnd'] = pd.to_datetime(experiments["originalEnd"], format='mixed')

experiments['start'] = experiments['originalStart'] + pd.Timedelta(hours=0, minutes=7, seconds=30)
experiments['end'] = experiments['originalEnd'] - pd.Timedelta(hours=0, minutes=7, seconds=30)

experiments['startUnix'] = pd.to_datetime(experiments["start"],format="mixed").astype('int64') / 10**9
experiments['endUnix'] = pd.to_datetime(experiments["end"],format="mixed").astype('int64') / 10**9

experiments['startUnix'] = pd.to_timedelta(experiments['startUnix'], unit='s').dt.total_seconds().astype(int)#.astype(str)
experiments['endUnix'] = pd.to_timedelta(experiments['endUnix'], unit='s').dt.total_seconds().astype(int)#.astype(str)

experiments.to_csv('experiments_filtered.csv')
experiments.head(10)

Unnamed: 0,experiment,originalStart,originalEnd,topology,parameter,d,dlo,dhi,dscore,dlazy,dout,gossipFactor,interval,start,end,startUnix,endUnix
0,390,2023-10-04 18:52:03.080773000,2023-10-04 19:22:03.086651800,unl,reference,8,6,12,4,8,2,0.25,1.0,2023-10-04 18:59:33.080773000,2023-10-04 19:14:33.086651800,1696445973,1696446873
1,391,2023-10-04 19:25:37.153339900,2023-10-04 19:55:37.155585600,unl,reference,8,6,12,4,8,2,0.25,1.0,2023-10-04 19:33:07.153339900,2023-10-04 19:48:07.155585600,1696447987,1696448887
3,393,2023-10-04 20:32:47.309861600,2023-10-04 21:02:47.314917800,unl,informed,8,6,12,4,8,2,0.25,3.0,2023-10-04 20:40:17.309861600,2023-10-04 20:55:17.314917800,1696452017,1696452917
4,394,2023-10-04 21:06:22.775563400,2023-10-04 21:36:22.778880000,unl,informed,8,6,12,4,8,2,0.25,3.0,2023-10-04 21:13:52.775563400,2023-10-04 21:28:52.778880000,1696454032,1696454932
6,396,2023-10-04 22:13:33.814670400,2023-10-04 22:43:33.817409400,unl,informed0,16,8,20,16,8,4,0.5,1.0,2023-10-04 22:21:03.814670400,2023-10-04 22:36:03.817409400,1696458063,1696458963
7,397,2023-10-04 22:47:09.383711000,2023-10-04 23:17:09.386844200,unl,informed0,16,8,20,16,8,4,0.5,1.0,2023-10-04 22:54:39.383711000,2023-10-04 23:09:39.386844200,1696460079,1696460979
12,404,2023-10-05 02:42:12.104487300,2023-10-05 03:12:12.156487200,unl,informed2,16,8,20,16,8,4,0.5,0.5,2023-10-05 02:49:42.104487300,2023-10-05 03:04:42.156487200,1696474182,1696475082
13,405,2023-10-05 03:15:44.641811200,2023-10-05 03:45:44.644068700,unl,informed3,6,4,8,2,2,2,0.25,1.0,2023-10-05 03:23:14.641811200,2023-10-05 03:38:14.644068700,1696476194,1696477094
14,406,2023-10-05 03:49:18.120429500,2023-10-05 04:19:18.122983600,unl,informed3,6,4,8,2,2,2,0.25,1.0,2023-10-05 03:56:48.120429500,2023-10-05 04:11:48.122983600,1696478208,1696479108
18,412,2023-10-05 07:10:41.394283100,2023-10-05 07:40:41.433055700,unl,informed5,6,4,8,2,2,4,0.25,0.5,2023-10-05 07:18:11.394283100,2023-10-05 07:33:11.433055700,1696490291,1696491191


In [111]:
# final = final.loc[final["identifier"]<9]
# final.head(10)

In [125]:
#Bandwidth
# recvRpc = pd.DataFrame()
# deliverMessage = pd.DataFrame()
# publishMessage = pd.DataFrame()

message = pd.DataFrame()
exps = experiments.drop(columns=['originalStart', 'originalEnd'])

for index, row in final.iterrows():
    # print(row["start"], row["end"], row["experiment"])
    execs = exps.loc[(exps["topology"] == row["topology"]) & (exps["d"] == row["d"]) &
                            (exps["dlo"] == row["dlo"]) & (exps["dhi"] == row["dhi"]) &
                            (exps["dscore"] == row["dscore"]) & (exps["dlazy"] == row["dlazy"]) &
                            (exps["dout"] == row["dout"]) & (exps["gossipFactor"] == row["gossipFactor"]) &                    
                            (exps["interval"] == row["interval"])]

    for idx, exec in execs.iterrows():
        # print(exec["startUnix"], exec["endUnix"])
        query_con = from_influx(url, token, org, "consensus", exec["startUnix"], exec["endUnix"],"_measurement")

    
        query_con["identifier"] = row["identifier"]

        query_con["experiment"] = exec["experiment"]
    
        query_con["start"] = exec["start"]
        query_con["end"] = exec["end"]
    
        query_con["_time"] = pd.to_datetime(query_con["_time"])
        message = pd.concat([message,query_con])

        del query_con
        gc.collect()
        

message.head(10)


Unnamed: 0,_time,ledger,node,identifier,experiment,start,end
0,2023-10-04 18:59:33.736232+00:00,540,sminardi,0,390,2023-10-04 18:59:33.080773,2023-10-04 19:14:33.086651800
1,2023-10-04 18:59:33.855173+00:00,540,caterham,0,390,2023-10-04 18:59:33.080773,2023-10-04 19:14:33.086651800
2,2023-10-04 18:59:33.885739+00:00,540,sligier,0,390,2023-10-04 18:59:33.080773,2023-10-04 19:14:33.086651800
3,2023-10-04 18:59:33.939139+00:00,540,toleman,0,390,2023-10-04 18:59:33.080773,2023-10-04 19:14:33.086651800
4,2023-10-04 18:59:33.960842+00:00,540,minardi,0,390,2023-10-04 18:59:33.080773,2023-10-04 19:14:33.086651800
5,2023-10-04 18:59:33.997923+00:00,540,vsauber,0,390,2023-10-04 18:59:33.080773,2023-10-04 19:14:33.086651800
6,2023-10-04 18:59:34.023833+00:00,540,benetton,0,390,2023-10-04 18:59:33.080773,2023-10-04 19:14:33.086651800
7,2023-10-04 18:59:34.042268+00:00,540,vbrawn,0,390,2023-10-04 18:59:33.080773,2023-10-04 19:14:33.086651800
8,2023-10-04 18:59:34.146305+00:00,540,lotus,0,390,2023-10-04 18:59:33.080773,2023-10-04 19:14:33.086651800
9,2023-10-04 18:59:34.159556+00:00,540,sauber,0,390,2023-10-04 18:59:33.080773,2023-10-04 19:14:33.086651800


In [114]:
message.to_csv('consensusMessage.csv')

In [127]:
#Time delta
dfNoIndex = message.reset_index()
dfNoIndex = dfNoIndex.sort_values(by=['identifier', 'experiment', 'node', '_time'])
dfTime = dfNoIndex.drop(columns=['start', 'end'])
dfTime['_time'] = pd.to_datetime(dfTime["_time"], format='mixed')#.tz_localize(None)

dfTime['delta'] = dfTime['_time'].diff().fillna(datetime.timedelta(0)).apply(lambda x: x.total_seconds())

dfTime.head(100)


Unnamed: 0,index,_time,ledger,node,identifier,experiment,delta
6,6,2023-10-04 18:59:34.023833+00:00,540,benetton,0,390,0.0
29,29,2023-10-04 18:59:37.035846+00:00,541,benetton,0,390,3.012013
52,52,2023-10-04 18:59:40.041041+00:00,542,benetton,0,390,3.005195
75,75,2023-10-04 18:59:43.049388+00:00,543,benetton,0,390,3.008347
98,98,2023-10-04 18:59:46.059680+00:00,544,benetton,0,390,3.010292
121,121,2023-10-04 18:59:49.066575+00:00,545,benetton,0,390,3.006895
144,144,2023-10-04 18:59:52.066386+00:00,546,benetton,0,390,2.999811
167,167,2023-10-04 18:59:55.077639+00:00,547,benetton,0,390,3.011253
190,190,2023-10-04 18:59:58.081975+00:00,548,benetton,0,390,3.004336
213,213,2023-10-04 19:00:01.099488+00:00,549,benetton,0,390,3.017513


In [165]:
df=dfTime.drop(columns=["_time", "node", "ledger", "index"])
# avgPropExp = df.groupby(['experiment', 'identifier']).apply(lambda x: x.iloc[:-1])
avgPropExp = df.loc[df['delta'] < 20].loc[df['delta'] >= 0.1]
# avgPropExp = avgPropExp.loc[avgPropExp['delta'] < 1]

avgPropExp = avgPropExp.groupby(['experiment', 'identifier']).mean()#.drop(columns=['delta'])
avgPropExp.reset_index(inplace=True)
# avgPropExp['mean'] = avgPropExp['count']/avgPropExp['duration']
# df.tail(500)
avgPropExp.head(500)

Unnamed: 0,experiment,identifier,delta
0,390,0,3.018746
1,391,0,3.026998
2,393,3,3.025818
3,394,3,3.027125
4,396,6,3.028129
5,397,6,3.026944
6,404,12,3.073917
7,405,13,3.01648
8,406,13,3.061136
9,412,18,3.009081


In [166]:
avgProp = avgPropExp.drop(columns=['experiment'])
avgBand = avgProp.groupby(['identifier']).agg({'delta':['mean','std']})
avgBand.columns = avgBand.columns.droplevel(0)
avgBand.reset_index(inplace=True)

avgBand.head(100)

Unnamed: 0,identifier,mean,std
0,0,3.022872,0.005835
1,3,3.026471,0.000925
2,6,3.027537,0.000838
3,12,3.073917,
4,13,3.038808,0.031576
5,18,3.014395,0.007515
6,21,3.029904,0.002301
7,23,3.021019,0.000279
8,27,3.025704,
9,29,3.044116,0.033411


In [177]:
finalBandwidth = final.merge(avgBand, on='identifier')
finalBandwidth = finalBandwidth.rename(columns={'mean' : 'consensus'}).drop(columns=['std'])

finalBandwidth.to_csv('consensus.csv')
finalBandwidth.head(100)

Unnamed: 0,identifier,topology,d,dlo,dhi,dscore,dlazy,dout,gossipFactor,interval,n_nodes,consensus
0,0,unl,8,6,12,4,8,2,0.25,1.0,24,3.022872
1,3,unl,8,6,12,4,8,2,0.25,3.0,24,3.026471
2,6,unl,16,8,20,16,8,4,0.5,1.0,24,3.027537
3,12,unl,16,8,20,16,8,4,0.5,0.5,24,3.073917
4,13,unl,6,4,8,2,2,2,0.25,1.0,24,3.038808
5,18,unl,6,4,8,2,2,4,0.25,0.5,24,3.014395
6,21,unl,20,16,24,8,16,8,0.5,1.0,24,3.029904
7,23,unl,16,8,20,4,8,2,0.5,1.0,24,3.021019
8,27,unl,16,8,20,8,8,4,0.25,1.0,24,3.025704
9,29,unl,16,8,20,4,8,2,0.25,1.0,24,3.044116
