In [1]:
import pandas as pd
import numpy as np

from influxdb_client import InfluxDBClient, Point, Dialect

import re
import time
import datetime

import warnings
from influxdb_client.client.warnings import MissingPivotFunction

import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import matplotlib.colors as colors

import pandasql as ps
import sqlite3

import csv

import warnings 
from influxdb_client.client.warnings import MissingPivotFunction
warnings.simplefilter("ignore", MissingPivotFunction)

import gc

pd.set_option('display.max_rows', 500)

In [14]:
def _parse_line(line):

    rx_dict = {
    'token': re.compile(r'var token = "(?P<token>.*)"\n'),
    'url': re.compile(r'var url = "(?P<url>.*)"\n'),
    'org': re.compile(r'var org = "(?P<org>.*)"\n'),
    'bucket': re.compile(r'var bucket = "(?P<bucket>.*)"\n'),
    }   

    """
    Do a regex search against all defined regexes and
    return the key and match result of the first matching regex

    """
    for key, rx in rx_dict.items():
        match = rx.search(line)
        if match:
            return key, match
    # if there are no matches
    return None, None

filepath = '/root/flexi-pipe/config.go'
# open the file and read through it line by line
with open(filepath, 'r') as file_object:
    line = file_object.readline()
    while line:
        # at each line check for a match with a regex
        key, match = _parse_line(line)

        if key == 'token':
            token = match.group('token')
        elif key == 'url':
            url = match.group('url')
        elif key == 'org':
            org = match.group('org')
        elif key == 'bucket':
            bucket = match.group('bucket')
        
        line = file_object.readline()
# url="http://192.168.20.58:8086"
url = "http://localhost:8086"

In [2]:
start = 1696426715
end = 1700624814

In [3]:
def experiment(start_time, end_time, filepath):
    # Retrieve experiments data from csv
    data = pd.read_csv(filepath, header=None)
    df = pd.DataFrame(data)

    #Rename columns
    experiments = df.rename(columns={0: "start", 1: "end", 2: "topology", 3: "runtime", 4: "parameter", 5: "d", 6: "dlo", 7: "dhi", 8: "dscore", 9: "dlazy", 10: "dout", 11: "gossipFactor", 12: "initialDelay", 13: "interval"}, errors='raise')

    #Correct timestamp
    experiments["start"] = experiments["start"].str.slice(0, 27)
    experiments["end"] = experiments["end"].str.slice(0, 27)

    #String to timestamp
    # experiments['startUnix'] = pd.to_datetime(experiments["start"],format="%Y-%m-%d %H:%M:%S.%f").astype('int64') / 10**9
    # experiments['endUnix'] = pd.to_datetime(experiments["end"],format="%Y-%m-%d %H:%M:%S.%f").astype('int64') / 10**9
    experiments['startUnix'] = pd.to_datetime(experiments["start"],format="mixed", infer_datetime_format=True).astype('int64') / 10**9
    experiments['endUnix'] = pd.to_datetime(experiments["end"],format="mixed", infer_datetime_format=True).astype('int64') / 10**9


    experiments['startUnix'] = pd.to_timedelta(experiments['startUnix'], unit='s').dt.total_seconds().astype(int)#.astype(str)
    experiments['endUnix'] = pd.to_timedelta(experiments['endUnix'], unit='s').dt.total_seconds().astype(int)#.astype(str)

    #Drop fields we don't mneed for the moment
    exp = experiments.drop(columns=["runtime", "initialDelay"]).sort_values(by=["start"])

    #Get times for different intervals
    # intervals = exp["interval"].drop_duplicates().sort_values().reset_index(drop=True)
    # intervals.head(10)

    expTime = exp[exp['startUnix'].astype(int).between(start_time, end_time)]
    # expTime['experiment'] = expTime.index
    expTime = expTime.reset_index().rename({'index':'experiment'}, axis = 'columns')

    return expTime

experiments = experiment(start, end, '../../datasets/experiments.csv')
experiments.tail(5)

  experiments['startUnix'] = pd.to_datetime(experiments["start"],format="mixed", infer_datetime_format=True).astype('int64') / 10**9
  experiments['endUnix'] = pd.to_datetime(experiments["end"],format="mixed", infer_datetime_format=True).astype('int64') / 10**9


Unnamed: 0,experiment,start,end,topology,parameter,d,dlo,dhi,dscore,dlazy,dout,gossipFactor,interval,startUnix,endUnix
274,660,2023-11-22 01:29:06.3602456,2023-11-22 01:59:06.3648203,general,informed15,6,3,8,2,2,4,0.25,0.5,1700616546,1700618346
275,661,2023-11-22 02:02:40.2343885,2023-11-22 02:32:40.2368451,general,informed15,6,3,8,2,2,4,0.25,0.5,1700618560,1700620360
276,662,2023-11-22 02:36:14.2823737,2023-11-22 03:06:14.2932442,general,informed15,6,3,8,2,2,4,0.25,0.5,1700620574,1700622374
277,663,2023-11-22 03:09:48.5789046,2023-11-22 03:39:48.5860069,general,informed16,21,16,24,5,16,8,0.5,1.0,1700622588,1700624388
278,664,2023-11-22 03:43:22.2716142,2023-11-22 04:13:22.2769946,general,informed16,21,16,24,5,16,8,0.5,1.0,1700624602,1700626402


In [18]:
def from_influx(url, token, org, measurement, start_time, end_time,grouping_key):
    client = InfluxDBClient(url=url, token=token, org=org,  timeout=900_000)

    # write_api = client.write_api(write_options=SYNCHRONOUS)
    query_api = client.query_api()

    data_frame = query_api.query_data_frame('from(bucket: "gs") '
                                        ' |> range(start: '+str(start_time)+', stop:'+str(end_time)+') '
                                        ' |> filter(fn: (r) => r._measurement == "'+measurement+'") '
                                        ' |> group(columns: ["_measurement", "_field"], mode: "by") '
                                        ' |> pivot(rowKey:["_time"], columnKey: ["_field"], valueColumn: "_value")')
    client.close()

    # df = data_frame.drop(columns=['result', 'table','_start', '_stop', '_measurement', 'topic', 'receivedFrom']).sort_values(by=["_time"]).reset_index(drop=True)
    data_frame.reset_index(inplace=True)
    df = data_frame[['_time', grouping_key]].sort_values(by=["_time"]).reset_index(drop=True)
    df["_time"] = pd.to_datetime(df["_time"])

    return df

In [19]:
def from_influx_count(url, token, org, start_time, end_time,grouping_key):
    # start_time = 1693222601 
    # end_time = 1693228430    
    client = InfluxDBClient(url=url, token=token, org=org,  timeout=900_000)
        
        # write_api = client.write_api(write_options=SYNCHRONOUS)
    query_api = client.query_api()
        
    data_frame = query_api.query_data_frame('from(bucket: "gs") '
                       ' |> range(start: '+str(start_time)+', stop:'+str(end_time)+') '
                        '|> filter(fn: (r) => r._measurement == "deliverMessage") '
                        '|> group(columns: ["_measurement", "_field"], mode: "by") '
                        '|> count()')
    client.close()

    if data_frame.empty:
        count = 0
    else:
        count = data_frame["_value"].min().astype(int)
    return count



In [4]:
#Validate data
# validate = pd.DataFrame()

# for index, row in experiments.iterrows():
#     count = from_influx_count(url, token, org, row["startUnix"], row["endUnix"],"_measurement")
#     d = {'experiment': [row['experiment']], 'count': [count], 'topology': [row["topology"]], 'd': [row["d"]],'dhi': [row["dhi"]],'dlo': [row["dlo"]],'dlazy': [row["dlazy"]],
#         'dscore': [row["dscore"]],'dout': [row["dout"]],'gossipFactor': [row["gossipFactor"]],'interval': [row["interval"]],}
#     aux = pd.DataFrame(data=d)
#     validate = pd.concat([validate, aux])

# # validate.head(200)

# exps = experiments.merge(validate, on=['experiment', 'topology','d','dhi','dlo','dlazy','dscore','dout','gossipFactor','interval'])
# exps = exps.loc[exps["count"]>1000]
# exps.to_csv('exp_filtered.csv')
data = pd.read_csv('./exp_filtered.csv', header=0,  index_col=0)
exps = pd.DataFrame(data)

exps.head(100)

Unnamed: 0,experiment,start,end,topology,parameter,d,dlo,dhi,dscore,dlazy,dout,gossipFactor,interval,startUnix,endUnix,count
0,386,2023-10-04 14:18:48.4409855,2023-10-04 14:33:48.4430498,unl,reference,8,6,12,4,8,2,0.25,1.0,1696429128,1696430028,128631
3,389,2023-10-04 17:00:47.8475357,2023-10-04 17:15:47.8502305,unl,reference,8,6,12,4,8,2,0.25,1.0,1696438847,1696439747,18248
4,390,2023-10-04 18:52:03.0807730,2023-10-04 19:22:03.0866518,unl,reference,8,6,12,4,8,2,0.25,1.0,1696445523,1696447323,344487
5,391,2023-10-04 19:25:37.1533399,2023-10-04 19:55:37.1555856,unl,reference,8,6,12,4,8,2,0.25,1.0,1696447537,1696449337,244016
6,392,2023-10-04 19:59:12.6589825,2023-10-04 20:29:12.6608307,unl,reference,8,6,12,4,8,2,0.25,1.0,1696449552,1696451352,130775
7,393,2023-10-04 20:32:47.3098616,2023-10-04 21:02:47.3149178,unl,informed,8,6,12,4,8,2,0.25,3.0,1696451567,1696453367,194668
8,394,2023-10-04 21:06:22.7755634,2023-10-04 21:36:22.7788800,unl,informed,8,6,12,4,8,2,0.25,3.0,1696453582,1696455382,165538
9,395,2023-10-04 21:39:58.6528165,2023-10-04 22:09:58.6545536,unl,informed,8,6,12,4,8,2,0.25,3.0,1696455598,1696457398,64772
10,396,2023-10-04 22:13:33.8146704,2023-10-04 22:43:33.8174094,unl,informed0,16,8,20,16,8,4,0.5,1.0,1696457613,1696459413,141092
11,397,2023-10-04 22:47:09.3837110,2023-10-04 23:17:09.3868442,unl,informed0,16,8,20,16,8,4,0.5,1.0,1696459629,1696461429,125279


In [5]:
data = pd.read_csv('./overhead.csv', header=0,  index_col=0)
final = pd.DataFrame(data)

final.head(10)

Unnamed: 0,experiment,start,end,topology,d,dlo,dhi,dscore,dlazy,dout,gossipFactor,interval,bandwidth,messageOverhead
0,390,2023-10-04 18:52:03.0807730,2023-10-04 19:22:03.0866518,unl,8,6,12,4,8,2,0.25,1.0,285.933511,2473589
1,391,2023-10-04 19:25:37.1533399,2023-10-04 19:55:37.1555856,unl,8,6,12,4,8,2,0.25,1.0,195.675311,1681977
2,392,2023-10-04 19:59:12.6589825,2023-10-04 20:29:12.6608307,unl,8,6,12,4,8,2,0.25,1.0,86.2688,1379082
3,393,2023-10-04 20:32:47.3098616,2023-10-04 21:02:47.3149178,unl,8,6,12,4,8,2,0.25,3.0,108.486362,1948016
4,394,2023-10-04 21:06:22.7755634,2023-10-04 21:36:22.7788800,unl,8,6,12,4,8,2,0.25,3.0,94.085382,1843504
5,395,2023-10-04 21:39:58.6528165,2023-10-04 22:09:58.6545536,unl,8,6,12,4,8,2,0.25,3.0,36.928853,687571
6,396,2023-10-04 22:13:33.8146704,2023-10-04 22:43:33.8174094,unl,16,8,20,16,8,4,0.5,1.0,79.777656,1428674
7,397,2023-10-04 22:47:09.3837110,2023-10-04 23:17:09.3868442,unl,16,8,20,16,8,4,0.5,1.0,69.534879,1428254
8,398,2023-10-04 23:20:44.6001719,2023-10-04 23:50:44.6033804,unl,16,8,20,16,8,4,0.5,1.0,7.107765,141417
9,402,2023-10-05 01:35:05.2471450,2023-10-05 02:05:05.2498822,unl,16,8,20,16,8,4,0.5,0.5,75.661552,289177


In [6]:
#Remove fault executions

#get median
median = exps.drop(columns=['start','end','startUnix','endUnix', 'parameter', 'experiment']).drop_duplicates()
median = median.groupby(['topology','d','dhi','dlo','dlazy','dscore','dout','gossipFactor','interval']).agg({'count':['median', 'std', 'mean']})
median.columns = median.columns.droplevel(0)
median.reset_index(inplace=True)
median = median.loc[median['median'] > 0]

median.head(50)

Unnamed: 0,topology,d,dhi,dlo,dlazy,dscore,dout,gossipFactor,interval,median,std,mean
0,general,5,7,4,2,3,2,0.25,1.0,84236.0,33491.11197,95775.0
1,general,6,8,3,2,2,4,0.25,0.5,37796.5,42383.273358,37796.5
2,general,6,8,4,2,2,2,0.25,1.0,62045.5,31217.900784,63183.833333
3,general,6,8,4,2,2,4,0.25,0.5,38482.0,19105.694839,27177.6
4,general,6,8,4,2,2,4,0.25,3.0,61654.5,70799.066466,61654.5
5,general,6,8,4,2,2,4,4.25,3.0,6168.0,,6168.0
6,general,6,8,4,5,7,4,0.25,3.0,38317.0,45556.061485,38317.0
7,general,8,12,6,6,4,2,0.15,3.0,94376.0,13164.063823,100061.666667
8,general,8,12,6,8,4,2,0.25,1.0,129832.0,29062.181435,126888.166667
9,general,8,12,6,8,4,2,0.25,3.0,125585.0,19714.69003,124042.833333


In [7]:
#Validate the data

validation = exps.merge(median, on=['topology','d','dhi','dlo','dlazy','dscore','dout','gossipFactor','interval'])
validated = validation[(validation['count'] >= validation['mean']-(0.15*validation['std']))]
validated = validated.drop(columns=['startUnix', 'endUnix'])
validated.dropna()

validated.head(100)

Unnamed: 0,experiment,start,end,topology,parameter,d,dlo,dhi,dscore,dlazy,dout,gossipFactor,interval,count,median,std,mean
2,390,2023-10-04 18:52:03.0807730,2023-10-04 19:22:03.0866518,unl,reference,8,6,12,4,8,2,0.25,1.0,344487,193219.5,146176.585534,228177.0
3,391,2023-10-04 19:25:37.1533399,2023-10-04 19:55:37.1555856,unl,reference,8,6,12,4,8,2,0.25,1.0,244016,193219.5,146176.585534,228177.0
6,505,2023-11-07 18:12:32.6142390,2023-11-07 18:42:32.6167070,unl,reference,8,6,12,4,8,2,0.25,1.0,408150,193219.5,146176.585534,228177.0
7,506,2023-11-07 18:46:01.6133827,2023-11-07 19:16:01.6158393,unl,reference,8,6,12,4,8,2,0.25,1.0,408686,193219.5,146176.585534,228177.0
8,393,2023-10-04 20:32:47.3098616,2023-10-04 21:02:47.3149178,unl,informed,8,6,12,4,8,2,0.25,3.0,194668,162285.0,90760.8064,175292.5
9,394,2023-10-04 21:06:22.7755634,2023-10-04 21:36:22.7788800,unl,informed,8,6,12,4,8,2,0.25,3.0,165538,162285.0,90760.8064,175292.5
11,507,2023-11-07 19:19:31.9016456,2023-11-07 19:49:31.9049682,unl,informed,8,6,12,4,8,2,0.25,3.0,337252,162285.0,90760.8064,175292.5
14,396,2023-10-04 22:13:33.8146704,2023-10-04 22:43:33.8174094,unl,informed0,16,8,20,16,8,4,0.5,1.0,141092,131035.5,74389.509136,106159.333333
15,397,2023-10-04 22:47:09.3837110,2023-10-04 23:17:09.3868442,unl,informed0,16,8,20,16,8,4,0.5,1.0,125279,131035.5,74389.509136,106159.333333
17,510,2023-11-07 21:00:03.5598397,2023-11-07 21:30:03.5622902,unl,informed0,16,8,20,16,8,4,0.5,1.0,136792,131035.5,74389.509136,106159.333333


In [23]:
#Limit extraction to 15 minutes

experiments = validated.drop(columns=['count','median', 'mean','std']).rename(columns={'start':'originalStart','end':'originalEnd'})

experiments['originalStart'] = pd.to_datetime(experiments["originalStart"], format='mixed')
experiments['originalEnd'] = pd.to_datetime(experiments["originalEnd"], format='mixed')

experiments['start'] = experiments['originalStart'] + pd.Timedelta(hours=0, minutes=7, seconds=30)
experiments['end'] = experiments['originalEnd'] - pd.Timedelta(hours=0, minutes=7, seconds=30)

experiments['startUnix'] = pd.to_datetime(experiments["start"],format="mixed").astype('int64') / 10**9
experiments['endUnix'] = pd.to_datetime(experiments["end"],format="mixed").astype('int64') / 10**9

experiments['startUnix'] = pd.to_timedelta(experiments['startUnix'], unit='s').dt.total_seconds().astype(int)#.astype(str)
experiments['endUnix'] = pd.to_timedelta(experiments['endUnix'], unit='s').dt.total_seconds().astype(int)#.astype(str)

experiments.to_csv('experiments_filtered.csv')
experiments.head(10)
 

Unnamed: 0,experiment,originalStart,originalEnd,topology,parameter,d,dlo,dhi,dscore,dlazy,dout,gossipFactor,interval,start,end,startUnix,endUnix
2,390,2023-10-04 18:52:03.080773000,2023-10-04 19:22:03.086651800,unl,reference,8,6,12,4,8,2,0.25,1.0,2023-10-04 18:59:33.080773000,2023-10-04 19:14:33.086651800,1696445973,1696446873
3,391,2023-10-04 19:25:37.153339900,2023-10-04 19:55:37.155585600,unl,reference,8,6,12,4,8,2,0.25,1.0,2023-10-04 19:33:07.153339900,2023-10-04 19:48:07.155585600,1696447987,1696448887
6,505,2023-11-07 18:12:32.614239000,2023-11-07 18:42:32.616707000,unl,reference,8,6,12,4,8,2,0.25,1.0,2023-11-07 18:20:02.614239000,2023-11-07 18:35:02.616707000,1699381202,1699382102
7,506,2023-11-07 18:46:01.613382700,2023-11-07 19:16:01.615839300,unl,reference,8,6,12,4,8,2,0.25,1.0,2023-11-07 18:53:31.613382700,2023-11-07 19:08:31.615839300,1699383211,1699384111
8,393,2023-10-04 20:32:47.309861600,2023-10-04 21:02:47.314917800,unl,informed,8,6,12,4,8,2,0.25,3.0,2023-10-04 20:40:17.309861600,2023-10-04 20:55:17.314917800,1696452017,1696452917
9,394,2023-10-04 21:06:22.775563400,2023-10-04 21:36:22.778880000,unl,informed,8,6,12,4,8,2,0.25,3.0,2023-10-04 21:13:52.775563400,2023-10-04 21:28:52.778880000,1696454032,1696454932
11,507,2023-11-07 19:19:31.901645600,2023-11-07 19:49:31.904968200,unl,informed,8,6,12,4,8,2,0.25,3.0,2023-11-07 19:27:01.901645600,2023-11-07 19:42:01.904968200,1699385221,1699386121
14,396,2023-10-04 22:13:33.814670400,2023-10-04 22:43:33.817409400,unl,informed0,16,8,20,16,8,4,0.5,1.0,2023-10-04 22:21:03.814670400,2023-10-04 22:36:03.817409400,1696458063,1696458963
15,397,2023-10-04 22:47:09.383711000,2023-10-04 23:17:09.386844200,unl,informed0,16,8,20,16,8,4,0.5,1.0,2023-10-04 22:54:39.383711000,2023-10-04 23:09:39.386844200,1696460079,1696460979
17,510,2023-11-07 21:00:03.559839700,2023-11-07 21:30:03.562290200,unl,informed0,16,8,20,16,8,4,0.5,1.0,2023-11-07 21:07:33.559839700,2023-11-07 21:22:33.562290200,1699391253,1699392153


In [24]:
publishMessage = pd.DataFrame()
deliverMessage = pd.DataFrame()

for index, row in final.iterrows():
    # print(row["start"], row["end"], row["experiment"])
    execs = exps.loc[(exps["topology"] == row["topology"]) & (exps["d"] == row["d"]) &
                            (exps["dlo"] == row["dlo"]) & (exps["dhi"] == row["dhi"]) &
                            (exps["dscore"] == row["dscore"]) & (exps["dlazy"] == row["dlazy"]) &
                            (exps["dout"] == row["dout"]) & (exps["gossipFactor"] == row["gossipFactor"]) &                    
                            (exps["interval"] == row["interval"])]

    for idx, exec in execs.iterrows():
        # print(exec["startUnix"], exec["endUnix"])
        query_mess = from_influx(url, token, org, "deliverMessage", exec["startUnix"], exec["endUnix"],"messageID")
        query_pub = from_influx(url, token, org, "publishMessage", exec["startUnix"], exec["endUnix"],"messageID")

        query_mess["experiment"] = exec["experiment"]
        query_pub["experiment"] = exec["experiment"]
    
        query_mess["_time"] = pd.to_datetime(query_mess["_time"])
        query_pub["_time"] = pd.to_datetime(query_pub["_time"])
    
        deliverMessage = pd.concat([query_mess, deliverMessage])
        publishMessage = pd.concat([publishMessage, query_pub])

        query_mess.to_csv('./mess2_'+str(exec["experiment"])+'.csv')
        query_pub.to_csv('./pub_'+str(exec["experiment"])+'.csv')

        del query_pub
        del query_mess

        gc.collect()

deliverMessage.head(10)


Unnamed: 0,_time,messageID,experiment
0,2023-11-22 03:43:22.259545+00:00,ACQIARIgyvYjy7hyNGKdR2St/W+uynGD3jRp3L9MblBO1t...,664
1,2023-11-22 03:43:22.259663+00:00,ACQIARIgyvYjy7hyNGKdR2St/W+uynGD3jRp3L9MblBO1t...,664
2,2023-11-22 03:43:22.259694+00:00,ACQIARIgyvYjy7hyNGKdR2St/W+uynGD3jRp3L9MblBO1t...,664
3,2023-11-22 03:43:22.260048+00:00,ACQIARIgyvYjy7hyNGKdR2St/W+uynGD3jRp3L9MblBO1t...,664
4,2023-11-22 03:43:22.260194+00:00,ACQIARIgyvYjy7hyNGKdR2St/W+uynGD3jRp3L9MblBO1t...,664
5,2023-11-22 03:43:22.260626+00:00,ACQIARIgyvYjy7hyNGKdR2St/W+uynGD3jRp3L9MblBO1t...,664
6,2023-11-22 03:43:22.260715+00:00,ACQIARIgyvYjy7hyNGKdR2St/W+uynGD3jRp3L9MblBO1t...,664
7,2023-11-22 03:43:22.260880+00:00,ACQIARIgyvYjy7hyNGKdR2St/W+uynGD3jRp3L9MblBO1t...,664
8,2023-11-22 03:43:22.260915+00:00,ACQIARIgyvYjy7hyNGKdR2St/W+uynGD3jRp3L9MblBO1t...,664
9,2023-11-22 03:43:22.260997+00:00,ACQIARIgyvYjy7hyNGKdR2St/W+uynGD3jRp3L9MblBO1t...,664


In [9]:
publishMessage = pd.DataFrame()
deliverMessage = pd.DataFrame()

for index, row in final.iterrows():
    data = pd.read_csv('./mess2_'+str(row['experiment'])+'.csv', header=0,  index_col=0)
    query_mess = pd.DataFrame(data)

    data = pd.read_csv('./pub_'+str(row['experiment'])+'.csv', header=0,  index_col=0)
    query_pub = pd.DataFrame(data)
    
    deliverMessage = pd.concat([query_mess, deliverMessage])
    publishMessage = pd.concat([publishMessage, query_pub])

    del query_pub
    del query_mess

    gc.collect()

deliverMessage.head(10)


Unnamed: 0,_time,messageID,experiment
0,2023-11-22 03:43:22.259545+00:00,ACQIARIgyvYjy7hyNGKdR2St/W+uynGD3jRp3L9MblBO1t...,664
1,2023-11-22 03:43:22.259663+00:00,ACQIARIgyvYjy7hyNGKdR2St/W+uynGD3jRp3L9MblBO1t...,664
2,2023-11-22 03:43:22.259694+00:00,ACQIARIgyvYjy7hyNGKdR2St/W+uynGD3jRp3L9MblBO1t...,664
3,2023-11-22 03:43:22.260048+00:00,ACQIARIgyvYjy7hyNGKdR2St/W+uynGD3jRp3L9MblBO1t...,664
4,2023-11-22 03:43:22.260194+00:00,ACQIARIgyvYjy7hyNGKdR2St/W+uynGD3jRp3L9MblBO1t...,664
5,2023-11-22 03:43:22.260626+00:00,ACQIARIgyvYjy7hyNGKdR2St/W+uynGD3jRp3L9MblBO1t...,664
6,2023-11-22 03:43:22.260715+00:00,ACQIARIgyvYjy7hyNGKdR2St/W+uynGD3jRp3L9MblBO1t...,664
7,2023-11-22 03:43:22.260880+00:00,ACQIARIgyvYjy7hyNGKdR2St/W+uynGD3jRp3L9MblBO1t...,664
8,2023-11-22 03:43:22.260915+00:00,ACQIARIgyvYjy7hyNGKdR2St/W+uynGD3jRp3L9MblBO1t...,664
9,2023-11-22 03:43:22.260997+00:00,ACQIARIgyvYjy7hyNGKdR2St/W+uynGD3jRp3L9MblBO1t...,664


In [11]:
publishMessage['_time'] = pd.to_datetime(publishMessage["_time"], format='mixed')#.tz_localize(None)
deliverMessage['_time'] = pd.to_datetime(deliverMessage["_time"], format='mixed')#.tz_localize(None)

# def calcAverageTime(publish, received, expTime, parameter):
expTime = exps
publish = publishMessage
received = deliverMessage

publish = publish[['_time', 'messageID','experiment']]
received = received[['_time', 'messageID', 'experiment']]

joined = publish.merge(received, on=['messageID', 'experiment'])
joined['diff'] = ((joined['_time_y'] - joined['_time_x'])/ pd.Timedelta(microseconds=1)).astype(int)
joined = joined.loc[joined["diff"] >= 0].dropna()
joined.head(10)


Unnamed: 0,_time_x,messageID,experiment,_time_y,diff
0,2023-10-04 18:52:04.134150+00:00,ACQIARIgD0L9DFUrEMrmskLqnXInFpXWSpAe/CjcDO6NgA...,390,2023-10-04 18:52:04.134903+00:00,753
1,2023-10-04 18:52:04.134150+00:00,ACQIARIgD0L9DFUrEMrmskLqnXInFpXWSpAe/CjcDO6NgA...,390,2023-10-04 18:52:04.136298+00:00,2148
2,2023-10-04 18:52:04.134150+00:00,ACQIARIgD0L9DFUrEMrmskLqnXInFpXWSpAe/CjcDO6NgA...,390,2023-10-04 18:52:04.136886+00:00,2736
3,2023-10-04 18:52:04.134150+00:00,ACQIARIgD0L9DFUrEMrmskLqnXInFpXWSpAe/CjcDO6NgA...,390,2023-10-04 18:52:04.137090+00:00,2940
4,2023-10-04 18:52:04.134150+00:00,ACQIARIgD0L9DFUrEMrmskLqnXInFpXWSpAe/CjcDO6NgA...,390,2023-10-04 18:52:04.137205+00:00,3055
5,2023-10-04 18:52:04.134150+00:00,ACQIARIgD0L9DFUrEMrmskLqnXInFpXWSpAe/CjcDO6NgA...,390,2023-10-04 18:52:04.137276+00:00,3126
6,2023-10-04 18:52:04.134150+00:00,ACQIARIgD0L9DFUrEMrmskLqnXInFpXWSpAe/CjcDO6NgA...,390,2023-10-04 18:52:04.137353+00:00,3203
7,2023-10-04 18:52:04.134150+00:00,ACQIARIgD0L9DFUrEMrmskLqnXInFpXWSpAe/CjcDO6NgA...,390,2023-10-04 18:52:04.137377+00:00,3227
8,2023-10-04 18:52:04.134667+00:00,ACQIARIgD0L9DFUrEMrmskLqnXInFpXWSpAe/CjcDO6NgA...,390,2023-10-04 18:52:04.134975+00:00,308
9,2023-10-04 18:52:04.134667+00:00,ACQIARIgD0L9DFUrEMrmskLqnXInFpXWSpAe/CjcDO6NgA...,390,2023-10-04 18:52:04.135169+00:00,502


In [16]:
#Average propagation time per interval
df = joined.drop(columns=['_time_x', '_time_y', 'messageID'])

avgProp = df.groupby(['experiment']).agg('mean')
avgProp.reset_index(inplace=True)
# avgProp = avgPropExp.drop(columns=['experiment'])
# avgPropExp.head(10)

# avgProp = avgPropExp.groupby(['identifier']).agg({'diff':['mean','std']}).fillna(0)
# avgProp.columns = avgProp.columns.droplevel(0)
# avgProp.reset_index(inplace=True)

avgProp.head(10)

Unnamed: 0,experiment,diff
0,390,2150.636202
1,391,1886.452761
2,392,1796.105503
3,393,2027.536181
4,394,2079.683079
5,395,1934.724939
6,396,2048.156118
7,397,1808.903694
8,398,384.884444
9,402,2075.841833


In [18]:
finalProp = final.merge(avgProp, on='experiment')
finalProp = finalProp.rename(columns={'diff' : 'propagationTime'})#.drop(columns=['std'])
finalProp.to_csv('propagation.csv')
finalProp.head(10)

Unnamed: 0,experiment,start,end,topology,d,dlo,dhi,dscore,dlazy,dout,gossipFactor,interval,bandwidth,messageOverhead,propagationTime
0,390,2023-10-04 18:52:03.0807730,2023-10-04 19:22:03.0866518,unl,8,6,12,4,8,2,0.25,1.0,285.933511,2473589,2150.636202
1,391,2023-10-04 19:25:37.1533399,2023-10-04 19:55:37.1555856,unl,8,6,12,4,8,2,0.25,1.0,195.675311,1681977,1886.452761
2,392,2023-10-04 19:59:12.6589825,2023-10-04 20:29:12.6608307,unl,8,6,12,4,8,2,0.25,1.0,86.2688,1379082,1796.105503
3,393,2023-10-04 20:32:47.3098616,2023-10-04 21:02:47.3149178,unl,8,6,12,4,8,2,0.25,3.0,108.486362,1948016,2027.536181
4,394,2023-10-04 21:06:22.7755634,2023-10-04 21:36:22.7788800,unl,8,6,12,4,8,2,0.25,3.0,94.085382,1843504,2079.683079
5,395,2023-10-04 21:39:58.6528165,2023-10-04 22:09:58.6545536,unl,8,6,12,4,8,2,0.25,3.0,36.928853,687571,1934.724939
6,396,2023-10-04 22:13:33.8146704,2023-10-04 22:43:33.8174094,unl,16,8,20,16,8,4,0.5,1.0,79.777656,1428674,2048.156118
7,397,2023-10-04 22:47:09.3837110,2023-10-04 23:17:09.3868442,unl,16,8,20,16,8,4,0.5,1.0,69.534879,1428254,1808.903694
8,398,2023-10-04 23:20:44.6001719,2023-10-04 23:50:44.6033804,unl,16,8,20,16,8,4,0.5,1.0,7.107765,141417,384.884444
9,402,2023-10-05 01:35:05.2471450,2023-10-05 02:05:05.2498822,unl,16,8,20,16,8,4,0.5,0.5,75.661552,289177,2075.841833
