In [1]:
# from flightsql import FlightSQLClient
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from influxdb_client import InfluxDBClient, Point, Dialect

import re

from influxdb_client.client.write_api import SYNCHRONOUS

import time
import datetime

import requests

import warnings
from influxdb_client.client.warnings import MissingPivotFunction


In [2]:
#Read config file and load data into the variables

rx_dict = {
    'token': re.compile(r'var token = "(?P<token>.*)"\n'),
    'url': re.compile(r'var url = "(?P<url>.*)"\n'),
    'org': re.compile(r'var org = "(?P<org>.*)"\n'),
    'bucket': re.compile(r'var bucket = "(?P<bucket>.*)"\n'),
}

def _parse_line(line):
    """
    Do a regex search against all defined regexes and
    return the key and match result of the first matching regex

    """
    for key, rx in rx_dict.items():
        match = rx.search(line)
        if match:
            return key, match
    # if there are no matches
    return None, None



filepath = '/root/flexi-pipe/config.go'
# open the file and read through it line by line
with open(filepath, 'r') as file_object:
    line = file_object.readline()
    while line:
        # at each line check for a match with a regex
        key, match = _parse_line(line)

        if key == 'token':
            token = match.group('token')
        elif key == 'url':
            url = match.group('url')
        elif key == 'org':
            org = match.group('org')
        elif key == 'bucket':
            bucket = match.group('bucket')
        
        line = file_object.readline()


url="http://192.168.20.58:8086"

In [10]:
client = InfluxDBClient(url=url, token=token, org=org,  timeout=30_000)

# write_api = client.write_api(write_options=SYNCHRONOUS)
query_api = client.query_api()

In [11]:
# Retrieve experiments data from csv
data = pd.read_csv('./experiments.csv', header=None)
df = pd.DataFrame(data)

#Rename columns
experiments = df.rename(columns={0: "start", 1: "end", 2: "topology", 3: "runtime", 4: "d", 5: "dlo", 6: "dhi", 7: "dscore", 8: "dlazy", 9: "dout", 10: "gossipFactor", 11: "initialDelay", 12: "interval"}, errors='raise')

#Correct timestamp
experiments["start"] = experiments["start"].str.slice(0, 27)
experiments["end"] = experiments["end"].str.slice(0, 27)

#String to timestamp
experiments['startUnix'] = pd.to_datetime(experiments["start"],format="%Y-%m-%d %H:%M:%S.%f").astype('int64') / 10**9
experiments['endUnix'] = pd.to_datetime(experiments["end"],format="%Y-%m-%d %H:%M:%S.%f").astype('int64') / 10**9

experiments['startUnix'] = pd.to_timedelta(experiments['startUnix'], unit='s').dt.total_seconds().astype(int).astype(str)
experiments['endUnix'] = pd.to_timedelta(experiments['endUnix'], unit='s').dt.total_seconds().astype(int).astype(str)

#Drop fields we don't mneed for the moment
exp = experiments.drop(columns=["runtime", "initialDelay"])

exp.head(10)

Unnamed: 0,start,end,topology,d,dlo,dhi,dscore,dlazy,dout,gossipFactor,interval,startUnix,endUnix
0,2023-07-21 11:33:25.8681227,2023-07-21 11:48:25.8815367,unl,8,6,12,4,8,2,0.25,0.5,1689939205,1689940105
1,2023-07-21 11:51:57.6626049,2023-07-21 12:06:57.7216522,unl,8,6,12,4,8,2,0.25,0.5,1689940317,1689941217
2,2023-07-21 12:10:30.3045413,2023-07-21 12:25:30.3056339,unl,8,6,12,4,8,2,0.25,0.5,1689941430,1689942330
3,2023-07-21 12:29:02.5299110,2023-07-21 12:44:02.5886491,unl,8,6,12,4,8,2,0.25,1.0,1689942542,1689943442
4,2023-07-21 12:47:35.4834622,2023-07-21 13:02:35.4862777,unl,8,6,12,4,8,2,0.25,1.0,1689943655,1689944555
5,2023-07-21 13:06:08.6071564,2023-07-21 13:21:08.7077403,unl,8,6,12,4,8,2,0.25,1.0,1689944768,1689945668
6,2023-07-21 13:24:41.7356835,2023-07-21 13:39:41.7368319,unl,8,6,12,4,8,2,0.25,30.0,1689945881,1689946781
7,2023-07-21 13:43:15.0470143,2023-07-21 13:58:15.0484766,unl,8,6,12,4,8,2,0.25,30.0,1689946995,1689947895
8,2023-07-21 14:01:48.0040406,2023-07-21 14:16:48.0065267,unl,8,6,12,4,8,2,0.25,30.0,1689948108,1689949008
9,2023-07-21 14:20:21.0317126,2023-07-21 14:35:21.0329588,unl,8,6,12,4,8,2,0.25,3.0,1689949221,1689950121


In [12]:
# Select only the last 6 experiments

# REMOVE THIS AFTER FINISH DEV
exp = exp.tail(6)

exp.head(6)

Unnamed: 0,start,end,topology,d,dlo,dhi,dscore,dlazy,dout,gossipFactor,interval,startUnix,endUnix
13,2023-07-21 14:38:54.2010769,2023-07-21 14:53:54.2031584,unl,8,6,12,4,8,2,0.25,3.0,1689950334,1689951234
14,2023-07-21 15:16:01.8757798,2023-07-21 15:31:01.8790008,unl,12,6,12,4,8,2,0.25,1.0,1689952561,1689953461
15,2023-07-21 18:21:40.8773507,2023-07-21 18:36:40.8958100,unl,8,6,12,6,8,2,0.25,1.0,1689963700,1689964600
16,2023-07-21 14:57:28.0962312,2023-07-21 15:12:28.1510119,unl,8,6,12,4,8,2,0.25,3.0,1689951448,1689952348
17,2023-07-21 16:30:17.6535234,2023-07-21 16:45:17.6691699,unl,6,6,12,4,8,2,0.25,1.0,1689957017,1689957917
18,2023-07-21 20:31:39.3030571,2023-07-21 20:46:39.3911994,unl,8,6,12,4,4,2,0.25,1.0,1689971499,1689972399


In [14]:
# exp1 = exp.head(1).reset_index(drop=True) 
# start = exp1["startUnix"]
# end = exp1["endUnix"]
# warnings.warn(message, MissingPivotFunction)

data_frame = query_api.query_data_frame('from(bucket: "gs") '
                                        ' |> range(start: 1689950334, stop:1689951234) '
                                        ' |> filter(fn: (r) => r._measurement == "message") '
                                        ' |> group(columns: ["_measurement", "_field"], mode: "by") '
                                        ' |> pivot(rowKey:["_time"], columnKey: ["_field"], valueColumn: "_value")')
# print(data_frame.to_string())

client.close()

In [15]:
data_frame.head(15)

Unnamed: 0,result,table,_start,_stop,_time,_measurement,type
0,_result,0,2023-07-21 14:38:54+00:00,2023-07-21 14:53:54+00:00,2023-07-21 14:38:54.240597+00:00,message,7.0
1,_result,0,2023-07-21 14:38:54+00:00,2023-07-21 14:53:54+00:00,2023-07-21 14:38:54.240645+00:00,message,7.0
2,_result,0,2023-07-21 14:38:54+00:00,2023-07-21 14:53:54+00:00,2023-07-21 14:38:54.240666+00:00,message,7.0
3,_result,0,2023-07-21 14:38:54+00:00,2023-07-21 14:53:54+00:00,2023-07-21 14:38:54.240683+00:00,message,7.0
4,_result,0,2023-07-21 14:38:54+00:00,2023-07-21 14:53:54+00:00,2023-07-21 14:38:54.240698+00:00,message,7.0
5,_result,0,2023-07-21 14:38:54+00:00,2023-07-21 14:53:54+00:00,2023-07-21 14:38:54.240704+00:00,message,7.0
6,_result,0,2023-07-21 14:38:54+00:00,2023-07-21 14:53:54+00:00,2023-07-21 14:38:54.240745+00:00,message,7.0
7,_result,0,2023-07-21 14:38:54+00:00,2023-07-21 14:53:54+00:00,2023-07-21 14:38:54.240758+00:00,message,7.0
8,_result,0,2023-07-21 14:38:54+00:00,2023-07-21 14:53:54+00:00,2023-07-21 14:38:54.240775+00:00,message,7.0
9,_result,0,2023-07-21 14:38:54+00:00,2023-07-21 14:53:54+00:00,2023-07-21 14:38:54.240782+00:00,message,7.0


In [50]:
df = data_frame.drop(columns=["result", "table", "_start", "_stop", "_measurement"])

#Datetime
df["_time"] = pd.to_datetime(df["_time"])

#All message types
types = df["type"].drop_duplicates().sort_values().reset_index(drop=True).to_numpy()
# types.head(100)
print(types)

#Group
# dfGroup = df.groupby([df["_time"].dt.second, "type"]).count()

# dfGroup = df.groupby([pd.Grouper(key='_time', freq='5s'), df["type"]])

#Reshape
# dfPivot = df.pivot(index="_time", columns="location", values="value")

#Resample to group info at each 3s
# df2 = df.resample("S").count()

# dfGroup.head(10)

[ 0.  2.  3.  4.  5.  6.  7.  9. 11. 12.]
