In [1]:
import pandas as pd
import numpy as np

from influxdb_client import InfluxDBClient, Point, Dialect

import re
import time
import datetime

import warnings
from influxdb_client.client.warnings import MissingPivotFunction

import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import matplotlib.colors as colors

import pandasql as ps
import sqlite3

import csv

pd.set_option('display.max_rows', 100)

In [2]:
def _parse_line(line):

    rx_dict = {
    'token': re.compile(r'var token = "(?P<token>.*)"\n'),
    'url': re.compile(r'var url = "(?P<url>.*)"\n'),
    'org': re.compile(r'var org = "(?P<org>.*)"\n'),
    'bucket': re.compile(r'var bucket = "(?P<bucket>.*)"\n'),
    }   

    """
    Do a regex search against all defined regexes and
    return the key and match result of the first matching regex

    """
    for key, rx in rx_dict.items():
        match = rx.search(line)
        if match:
            return key, match
    # if there are no matches
    return None, None


filepath = '/root/flexi-pipe/config.go'
# open the file and read through it line by line
with open(filepath, 'r') as file_object:
    line = file_object.readline()
    while line:
        # at each line check for a match with a regex
        key, match = _parse_line(line)

        if key == 'token':
            token = match.group('token')
        elif key == 'url':
            url = match.group('url')
        elif key == 'org':
            org = match.group('org')
        elif key == 'bucket':
            bucket = match.group('bucket')
        
        line = file_object.readline()
# url="http://192.168.20.58:8086"
url = "http://localhost:8086"

In [3]:
start_time = 1693222601
end_time = 1693373479

filepath = "../experiments.csv"

# Retrieve experiments data from csv
data = pd.read_csv(filepath, header=None)
df = pd.DataFrame(data)

#Rename columns
experiments = df.rename(columns={0: "start", 1: "end", 2: "topology", 3: "runtime", 4: "parameter", 5: "d", 6: "dlo", 7: "dhi", 8: "dscore", 9: "dlazy", 10: "dout", 11: "gossipFactor", 12: "initialDelay", 13: "interval"}, errors='raise')

#Correct timestamp
experiments["start"] = experiments["start"].str.slice(0, 27)
experiments["end"] = experiments["end"].str.slice(0, 27)

#String to timestamp
# experiments['startUnix'] = pd.to_datetime(experiments["start"],format="%Y-%m-%d %H:%M:%S.%f").astype('int64') / 10**9
# experiments['endUnix'] = pd.to_datetime(experiments["end"],format="%Y-%m-%d %H:%M:%S.%f").astype('int64') / 10**9
experiments['startUnix'] = pd.to_datetime(experiments["start"],format="mixed").astype('int64') / 10**9
experiments['endUnix'] = pd.to_datetime(experiments["end"],format="mixed").astype('int64') / 10**9

experiments['startUnix'] = pd.to_timedelta(experiments['startUnix'], unit='s').dt.total_seconds().astype(int)#.astype(str)
experiments['endUnix'] = pd.to_timedelta(experiments['endUnix'], unit='s').dt.total_seconds().astype(int)#.astype(str)

#Drop fields we don't mneed for the moment
exp = experiments.drop(columns=["runtime", "initialDelay"]).sort_values(by=["start"])

#Get times for different intervals
# intervals = exp["interval"].drop_duplicates().sort_values().reset_index(drop=True)
# intervals.head(10)

expTime = exp[exp['startUnix'].astype(int).between(start_time, end_time)]
# expTime['experiment'] = expTime.index
expTime = expTime.reset_index().rename({'index':'experiment'}, axis = 'columns')

expTime.head(100)

Unnamed: 0,experiment,start,end,topology,parameter,d,dlo,dhi,dscore,dlazy,dout,gossipFactor,interval,startUnix,endUnix
0,225,2023-08-28 11:36:41.3956182,2023-08-28 12:06:41.3980282,general,reference,8,6,12,4,8,2,0.25,1.0,1693222601,1693224401
1,226,2023-08-28 12:10:15.7332456,2023-08-28 12:40:15.7376061,general,reference,8,6,12,4,8,2,0.25,1.0,1693224615,1693226415
2,227,2023-08-28 12:43:50.4559273,2023-08-28 13:13:50.4587305,general,reference,8,6,12,4,8,2,0.25,1.0,1693226630,1693228430
3,228,2023-08-28 13:17:25.0094743,2023-08-28 13:47:25.0663662,general,interval,8,6,12,4,8,2,0.25,0.5,1693228645,1693230445
4,229,2023-08-28 13:50:58.9294778,2023-08-28 14:20:58.9480353,general,interval,8,6,12,4,8,2,0.25,0.5,1693230658,1693232458
5,230,2023-08-28 14:24:33.1981004,2023-08-28 14:54:33.2858529,general,interval,8,6,12,4,8,2,0.25,0.5,1693232673,1693234473
6,231,2023-08-28 14:58:07.0446170,2023-08-28 15:28:07.0528423,general,interval,8,6,12,4,8,2,0.25,30.0,1693234687,1693236487
7,232,2023-08-28 15:31:41.2282073,2023-08-28 16:01:41.2341023,general,interval,8,6,12,4,8,2,0.25,30.0,1693236701,1693238501
8,233,2023-08-28 16:05:15.0353471,2023-08-28 16:35:15.0382452,general,interval,8,6,12,4,8,2,0.25,30.0,1693238715,1693240515
9,234,2023-08-28 16:38:48.6882669,2023-08-28 17:08:48.6898789,general,interval,8,6,12,4,8,2,0.25,3.0,1693240728,1693242528


In [4]:
experiments = expTime
experiments.head(10)

Unnamed: 0,experiment,start,end,topology,parameter,d,dlo,dhi,dscore,dlazy,dout,gossipFactor,interval,startUnix,endUnix
0,225,2023-08-28 11:36:41.3956182,2023-08-28 12:06:41.3980282,general,reference,8,6,12,4,8,2,0.25,1.0,1693222601,1693224401
1,226,2023-08-28 12:10:15.7332456,2023-08-28 12:40:15.7376061,general,reference,8,6,12,4,8,2,0.25,1.0,1693224615,1693226415
2,227,2023-08-28 12:43:50.4559273,2023-08-28 13:13:50.4587305,general,reference,8,6,12,4,8,2,0.25,1.0,1693226630,1693228430
3,228,2023-08-28 13:17:25.0094743,2023-08-28 13:47:25.0663662,general,interval,8,6,12,4,8,2,0.25,0.5,1693228645,1693230445
4,229,2023-08-28 13:50:58.9294778,2023-08-28 14:20:58.9480353,general,interval,8,6,12,4,8,2,0.25,0.5,1693230658,1693232458
5,230,2023-08-28 14:24:33.1981004,2023-08-28 14:54:33.2858529,general,interval,8,6,12,4,8,2,0.25,0.5,1693232673,1693234473
6,231,2023-08-28 14:58:07.0446170,2023-08-28 15:28:07.0528423,general,interval,8,6,12,4,8,2,0.25,30.0,1693234687,1693236487
7,232,2023-08-28 15:31:41.2282073,2023-08-28 16:01:41.2341023,general,interval,8,6,12,4,8,2,0.25,30.0,1693236701,1693238501
8,233,2023-08-28 16:05:15.0353471,2023-08-28 16:35:15.0382452,general,interval,8,6,12,4,8,2,0.25,30.0,1693238715,1693240515
9,234,2023-08-28 16:38:48.6882669,2023-08-28 17:08:48.6898789,general,interval,8,6,12,4,8,2,0.25,3.0,1693240728,1693242528


In [5]:
ref = experiments.loc[experiments["parameter"] == "reference"]
start_reference = ref["startUnix"].min().astype(int)
end_reference = ref["endUnix"].max().astype(int)

d = experiments.loc[experiments["parameter"] == "d"]
start_d = d["startUnix"].min().astype(int)
end_d = d["endUnix"].max().astype(int)

dlo = experiments.loc[experiments["parameter"] == "dlo"]
start_dlo = dlo["startUnix"].min().astype(int)
end_dlo = dlo["endUnix"].max().astype(int)

dhi = experiments.loc[experiments["parameter"] == "dhi"]
start_dhi = dhi["startUnix"].min().astype(int)
end_dhi = dhi["endUnix"].max().astype(int)

dscore = experiments.loc[experiments["parameter"] == "dscore"]
start_dscore = dscore["startUnix"].min().astype(int)
end_dscore = dscore["endUnix"].max().astype(int)

dlazy = experiments.loc[experiments["parameter"] == "dlazy"]
start_dlazy = dlazy["startUnix"].min().astype(int)
end_dlazy = dlazy["endUnix"].max().astype(int)

dout = experiments.loc[experiments["parameter"] == "dout"]
start_dout = dout["startUnix"].min().astype(int)
end_dout = dout["endUnix"].max().astype(int)

gf = experiments.loc[experiments["parameter"] == "gossipFactor"]
start_gf = gf["startUnix"].min().astype(int)
end_gf = gf["endUnix"].max().astype(int)

interval = experiments.loc[experiments["parameter"] == "interval"]
start_intv = interval["startUnix"].min().astype(int)
end_intv = interval["endUnix"].max().astype(int)

print(start_reference,end_reference)

1693222601 1693228430


In [13]:
def from_influx(url, token, org, measurement, start_time, end_time,grouping_key):
    client = InfluxDBClient(url=url, token=token, org=org,  timeout=900_000)

    # write_api = client.write_api(write_options=SYNCHRONOUS)
    query_api = client.query_api()

    data_frame = query_api.query_data_frame('from(bucket: "gs") '
                                        ' |> range(start: '+str(start_time)+', stop:'+str(end_time)+') '
                                        ' |> filter(fn: (r) => r._measurement == "'+measurement+'") '
                                        ' |> group(columns: ["_measurement", "_field"], mode: "by") '
                                        ' |> pivot(rowKey:["_time"], columnKey: ["_field"], valueColumn: "_value")')
    client.close()

    # df = data_frame.drop(columns=['result', 'table','_start', '_stop', '_measurement', 'topic', 'receivedFrom']).sort_values(by=["_time"]).reset_index(drop=True)
    data_frame.reset_index(inplace=True)
    df = data_frame[['_time', grouping_key]].sort_values(by=["_time"]).reset_index(drop=True)
    df["_time"] = pd.to_datetime(df["_time"])

    return df

In [14]:
reference = from_influx(url, token, org, "message", start_reference, end_reference, "type")
d = from_influx(url, token, org, "message", start_d, end_d, "type")
dlo = from_influx(url, token, org, "message", start_dlo, end_dlo, "type")
dhi = from_influx(url, token, org, "message", start_dhi, end_dhi, "type")
dscore = from_influx(url, token, org, "message", start_dscore, end_dscore, "type")
dlazy = from_influx(url, token, org, "message", start_dlazy, end_dlazy, "type")
dout = from_influx(url, token, org, "message", start_dout, end_dout, "type")
gf = from_influx(url, token, org, "message", start_gf, end_gf, "type")
interval = from_influx(url, token, org, "message", start_intv, end_intv, "type")

reference.head(10)

Unnamed: 0,_time,type
0,2023-08-28 11:51:01.576407+00:00,9.0
1,2023-08-28 11:51:01.590947+00:00,9.0
2,2023-08-28 11:51:01.599709+00:00,4.0
3,2023-08-28 11:51:01.599876+00:00,6.0
4,2023-08-28 11:51:01.600282+00:00,4.0
5,2023-08-28 11:51:01.600827+00:00,6.0
6,2023-08-28 11:51:01.605203+00:00,9.0
7,2023-08-28 11:51:01.611808+00:00,4.0
8,2023-08-28 11:51:01.612753+00:00,4.0
9,2023-08-28 11:51:01.612867+00:00,6.0


In [26]:
reference["parameter"] = "reference"
reference.head(10)

Unnamed: 0,_time,type,parameter
0,2023-08-28 11:51:01.576407+00:00,9.0,reference
1,2023-08-28 11:51:01.590947+00:00,9.0,reference
2,2023-08-28 11:51:01.599709+00:00,4.0,reference
3,2023-08-28 11:51:01.599876+00:00,6.0,reference
4,2023-08-28 11:51:01.600282+00:00,4.0,reference
5,2023-08-28 11:51:01.600827+00:00,6.0,reference
6,2023-08-28 11:51:01.605203+00:00,9.0,reference
7,2023-08-28 11:51:01.611808+00:00,4.0,reference
8,2023-08-28 11:51:01.612753+00:00,4.0,reference
9,2023-08-28 11:51:01.612867+00:00,6.0,reference


In [15]:
d["parameter"] = "d"
d.head(10)

Unnamed: 0,_time,type,parameter
0,2023-08-28 18:19:32.077435+00:00,7.0,d
1,2023-08-28 18:19:32.077458+00:00,7.0,d
2,2023-08-28 18:19:32.077474+00:00,7.0,d
3,2023-08-28 18:19:32.077479+00:00,7.0,d
4,2023-08-28 18:19:32.077486+00:00,7.0,d
5,2023-08-28 18:19:32.077703+00:00,6.0,d
6,2023-08-28 18:19:32.078498+00:00,6.0,d
7,2023-08-28 18:19:32.078565+00:00,6.0,d
8,2023-08-28 18:19:32.079500+00:00,6.0,d
9,2023-08-28 18:19:32.129985+00:00,7.0,d


In [16]:
dlo["parameter"] = "dlo"
dlo.head(10)

Unnamed: 0,_time,type,parameter
0,2023-08-28 23:21:39.182835+00:00,6.0,dlo
1,2023-08-28 23:21:39.183326+00:00,6.0,dlo
2,2023-08-28 23:21:39.183684+00:00,6.0,dlo
3,2023-08-28 23:21:39.184362+00:00,6.0,dlo
4,2023-08-28 23:21:39.184436+00:00,6.0,dlo
5,2023-08-28 23:21:39.191222+00:00,7.0,dlo
6,2023-08-28 23:21:39.191240+00:00,7.0,dlo
7,2023-08-28 23:21:39.191246+00:00,7.0,dlo
8,2023-08-28 23:21:39.191251+00:00,7.0,dlo
9,2023-08-28 23:21:39.191272+00:00,7.0,dlo


In [17]:
dhi["parameter"] = "dhi"
dhi.head(10)

Unnamed: 0,_time,type,parameter
0,2023-08-29 04:23:51.143990+00:00,0.0,dhi
1,2023-08-29 04:23:51.144233+00:00,3.0,dhi
2,2023-08-29 04:23:51.144246+00:00,7.0,dhi
3,2023-08-29 04:23:51.144249+00:00,7.0,dhi
4,2023-08-29 04:23:51.144252+00:00,7.0,dhi
5,2023-08-29 04:23:51.144254+00:00,7.0,dhi
6,2023-08-29 04:23:51.144256+00:00,7.0,dhi
7,2023-08-29 04:23:51.144258+00:00,7.0,dhi
8,2023-08-29 04:23:51.144260+00:00,7.0,dhi
9,2023-08-29 04:23:51.144377+00:00,6.0,dhi


In [18]:
dscore["parameter"] = "dscore"
dscore.head(10)

Unnamed: 0,_time,type,parameter
0,2023-08-29 09:26:00.053731+00:00,6.0,dscore
1,2023-08-29 09:26:00.053939+00:00,6.0,dscore
2,2023-08-29 09:26:00.054156+00:00,6.0,dscore
3,2023-08-29 09:26:00.054294+00:00,6.0,dscore
4,2023-08-29 09:26:00.054902+00:00,6.0,dscore
5,2023-08-29 09:26:00.062482+00:00,6.0,dscore
6,2023-08-29 09:26:00.062701+00:00,6.0,dscore
7,2023-08-29 09:26:00.062727+00:00,3.0,dscore
8,2023-08-29 09:26:00.062771+00:00,7.0,dscore
9,2023-08-29 09:26:00.062776+00:00,7.0,dscore


In [19]:
dlazy["parameter"] = "dlazy"
dlazy.head(10)

Unnamed: 0,_time,type,parameter
0,2023-08-29 14:28:15.208294+00:00,6.0,dlazy
1,2023-08-29 14:28:15.208908+00:00,6.0,dlazy
2,2023-08-29 14:28:15.229992+00:00,6.0,dlazy
3,2023-08-29 14:28:15.231072+00:00,6.0,dlazy
4,2023-08-29 14:28:15.260792+00:00,6.0,dlazy
5,2023-08-29 14:28:15.261260+00:00,6.0,dlazy
6,2023-08-29 14:28:15.261482+00:00,6.0,dlazy
7,2023-08-29 14:28:15.263930+00:00,7.0,dlazy
8,2023-08-29 14:28:15.263950+00:00,7.0,dlazy
9,2023-08-29 14:28:15.263956+00:00,7.0,dlazy


In [20]:
dout["parameter"] = "dout"
dout.head(10)

Unnamed: 0,_time,type,parameter
0,2023-08-29 19:30:28.036376+00:00,6.0,dout
1,2023-08-29 19:30:28.558433+00:00,6.0,dout
2,2023-08-29 19:30:28.558574+00:00,7.0,dout
3,2023-08-29 19:30:28.558599+00:00,7.0,dout
4,2023-08-29 19:30:28.558607+00:00,7.0,dout
5,2023-08-29 19:30:28.558616+00:00,7.0,dout
6,2023-08-29 19:30:28.558656+00:00,7.0,dout
7,2023-08-29 19:30:28.558663+00:00,7.0,dout
8,2023-08-29 19:30:28.558669+00:00,7.0,dout
9,2023-08-29 19:30:28.558677+00:00,7.0,dout


In [21]:
gf["parameter"] = "gf"
gf.head(10)

Unnamed: 0,_time,type,parameter
0,2023-08-30 00:32:40.012967+00:00,6.0,gf
1,2023-08-30 00:32:40.012993+00:00,6.0,gf
2,2023-08-30 00:32:40.013045+00:00,6.0,gf
3,2023-08-30 00:32:40.013088+00:00,6.0,gf
4,2023-08-30 00:32:40.013197+00:00,3.0,gf
5,2023-08-30 00:32:40.013206+00:00,3.0,gf
6,2023-08-30 00:32:40.013234+00:00,7.0,gf
7,2023-08-30 00:32:40.013236+00:00,7.0,gf
8,2023-08-30 00:32:40.013240+00:00,7.0,gf
9,2023-08-30 00:32:40.013242+00:00,7.0,gf


In [22]:
interval["parameter"] = "interval"
interval.head(10)

Unnamed: 0,_time,type,parameter
0,2023-08-28 13:17:25.076892+00:00,0.0,interval
1,2023-08-28 13:17:25.077097+00:00,3.0,interval
2,2023-08-28 13:17:25.077133+00:00,7.0,interval
3,2023-08-28 13:17:25.077139+00:00,7.0,interval
4,2023-08-28 13:17:25.077143+00:00,7.0,interval
5,2023-08-28 13:17:25.078306+00:00,6.0,interval
6,2023-08-28 13:17:25.078423+00:00,6.0,interval
7,2023-08-28 13:17:25.078505+00:00,3.0,interval
8,2023-08-28 13:17:25.078520+00:00,7.0,interval
9,2023-08-28 13:17:25.078523+00:00,7.0,interval


In [27]:
df = pd.concat([reference, d, dlo, dhi, dscore, dlazy, dout, gf, interval])

df.head(10)

Unnamed: 0,_time,type,parameter
0,2023-08-28 11:51:01.576407+00:00,9.0,reference
1,2023-08-28 11:51:01.590947+00:00,9.0,reference
2,2023-08-28 11:51:01.599709+00:00,4.0,reference
3,2023-08-28 11:51:01.599876+00:00,6.0,reference
4,2023-08-28 11:51:01.600282+00:00,4.0,reference
5,2023-08-28 11:51:01.600827+00:00,6.0,reference
6,2023-08-28 11:51:01.605203+00:00,9.0,reference
7,2023-08-28 11:51:01.611808+00:00,4.0,reference
8,2023-08-28 11:51:01.612753+00:00,4.0,reference
9,2023-08-28 11:51:01.612867+00:00,6.0,reference


In [1]:
# df=reference
expRaw=exp
start = start_reference
end = end_reference
grouping_key = "type"
parameter = "d"

# expTime = expRaw.loc[expRaw['startUnix']>= int(start)].loc[expRaw['endUnix'] <= int(end)]
# expTime.head(20)
df.head(100)

NameError: name 'exp' is not defined

In [None]:
 #Make the db in memory
conn = sqlite3.connect(':memory:')
#write the tables
df.to_sql('df', conn, index=False)
expTime.to_sql('expTime', conn, index=False)

qry = '''
        select  
            df._time,
            df.'''+grouping_key+''',
            expTime.experiment,
            df.parameter
        from
            df join expTime on
            df._time between expTime.start and expTime.end
        '''
dfNew = pd.read_sql_query(qry, conn)
# print(dfNew)

dfNew = dfNew.set_index('experiment')#.rename(columns={"_time": "min"})#.drop(columns=["messageID"])

#dfNew['min'] = 
dfNew['min'] = pd.to_datetime(dfNew["min"], format='mixed')
dfNew['max'] = pd.to_datetime(dfNew["max"], format='mixed')
dfNew['_time'] = pd.to_datetime(dfNew["_time"], format='mixed')
# dfNew['_min'] = pd.to_datetime(dfNew["_min"])

dfNew.head(100)

In [None]:
n_messages = dfNew.groupby(['experiment']).agg('count')#.drop(columns=[parameter, 'count'])
n_messages.head(100)