In [1]:
!rm df-*-checkpoint.csv

zsh:1: no matches found: df-*-checkpoint.csv


In [2]:
import os
import glob

In [3]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# Load all datasets

In [4]:
all_files = glob.glob("*.csv")
df = pd.concat((pd.read_csv(f) for f in all_files))
df = df.drop_duplicates()
print(df.shape)

(555, 19)


In [5]:
df.head()

Unnamed: 0,Latency,Requests,Throughput,Measured_Throughput,Baseline,Orchestrator,Benchmark,Infinite,Exp,Availability,Horizontal,Vertical,Threads,Connections,Duration,Mean,Max,StdDev,ReqSec
0,10.06,500.0,14814.0,9899,1,k8s,sn,1,exp15,1,1,1,4,512,30,3.983,52.864,2.127,493.26
1,11.61,500.0,14706.0,9908,1,k8s,hr,1,exp15,1,1,1,4,512,30,4.298,22.256,2.219,490.0
2,11.85,500.0,14813.0,9898,1,k8s,mm,1,exp15,1,1,1,4,512,30,4.902,19.696,1.883,493.64
3,7.66,1000.0,29634.0,19875,1,k8s,hr,1,exp15,1,1,1,4,512,30,3.181,11.592,1.41,987.61
4,19600.0,1000.0,12128.0,8079,1,k8s,mm,1,exp15,1,1,1,4,512,30,11164.807,22691.84,3871.293,404.04


In [6]:
# df['Exp'] = df['Exp'].replace(['exp12','exp13','exp14','exp15'],
                              # ['baseline', 'vertical', 'horizontal', 'availability'])

# Overview experiment settings

In [7]:
df.columns

Index(['Latency', 'Requests', 'Throughput', 'Measured_Throughput', 'Baseline',
       'Orchestrator', 'Benchmark', 'Infinite', 'Exp', 'Availability',
       'Horizontal', 'Vertical', 'Threads', 'Connections', 'Duration', 'Mean',
       'Max', 'StdDev', 'ReqSec'],
      dtype='object')

In [8]:
df.loc[:,['Exp','Baseline','Orchestrator','Availability','Horizontal','Vertical', 'Throughput']]

Unnamed: 0,Exp,Baseline,Orchestrator,Availability,Horizontal,Vertical,Throughput
0,exp15,1,k8s,1,1,1,14814.0
1,exp15,1,k8s,1,1,1,14706.0
2,exp15,1,k8s,1,1,1,14813.0
3,exp15,1,k8s,1,1,1,29634.0
4,exp15,1,k8s,1,1,1,12128.0
...,...,...,...,...,...,...,...
23,exp14,1,nomad,0,0,1,449699.0
24,exp14,1,nomad,0,0,1,104179.0
25,exp14,1,nomad,0,0,1,106307.0
26,exp14,1,nomad,0,0,1,105577.0


In [9]:
df['Orchestrator'] = df['Orchestrator'].astype('category')
df['Benchmark'] = df['Benchmark'].astype('category')

df['Exp'] = df['Exp'].astype('category')
# df['Latency'] = df['Latency']
df['Requests'] = df['Requests'].astype(int)

In [10]:
df_0 = df.loc[df['Exp'] == 'exp0']
df_1 = df.loc[df['Exp'] == 'exp1']
df_2 = df.loc[df['Exp'] == 'exp2']
df_3 = df.loc[df['Exp'] == 'exp3']
df_4 = df.loc[df['Exp'] == 'exp4']
df_5 = df.loc[df['Exp'] == 'exp5']
df_6 = df.loc[df['Exp'] == 'exp6']
df_7 = df.loc[df['Exp'] == 'exp7']
df_8 = df.loc[df['Exp'] == 'exp8']
df_9 = df.loc[df['Exp'] == 'exp9']
df_10 = df.loc[df['Exp'] == 'exp10']
df_11 = df.loc[df['Exp'] == 'exp11']

In [11]:
df_12 = df.loc[(df['Exp'] == 'exp12')]
df_13 = df.loc[(df['Exp'] == 'exp13')]
df_14 = df.loc[(df['Exp'] == 'exp14')]
df_15 = df.loc[(df['Exp'] == 'exp15')]
df_all_orch = pd.concat([df_12, df_13, df_14, df_15])
df_all_orch['Exp'] = df_all_orch['Exp'].cat.remove_unused_categories()
df_12_swarm = df.loc[(df['Exp'] == 'exp12') & (df['Orchestrator'] == 'swarm')]
df_13_swarm = df.loc[(df['Exp'] == 'exp13') & (df['Orchestrator'] == 'swarm')]
df_14_swarm = df.loc[(df['Exp'] == 'exp14') & (df['Orchestrator'] == 'swarm')]
df_15_swarm = df.loc[(df['Exp'] == 'exp15') & (df['Orchestrator'] == 'swarm')]
df_12_nomad = df.loc[(df['Exp'] == 'exp12') & (df['Orchestrator'] == 'nomad')]
df_13_nomad = df.loc[(df['Exp'] == 'exp13') & (df['Orchestrator'] == 'nomad')]
df_14_nomad = df.loc[(df['Exp'] == 'exp14') & (df['Orchestrator'] == 'nomad')]
df_15_nomad = df.loc[(df['Exp'] == 'exp15') & (df['Orchestrator'] == 'nomad')]
df_12_k8s = df.loc[(df['Exp'] == 'exp12') & (df['Orchestrator'] == 'k8s')]
df_13_k8s = df.loc[(df['Exp'] == 'exp13') & (df['Orchestrator'] == 'k8s')]
df_14_k8s = df.loc[(df['Exp'] == 'exp14') & (df['Orchestrator'] == 'k8s')]
df_15_k8s = df.loc[(df['Exp'] == 'exp15') & (df['Orchestrator'] == 'k8s')]

In [12]:
# Global settings
fontsize = 20
legend_location = "lower right"

# Exp12 Swarm Tables


In [13]:
df.columns

Index(['Latency', 'Requests', 'Throughput', 'Measured_Throughput', 'Baseline',
       'Orchestrator', 'Benchmark', 'Infinite', 'Exp', 'Availability',
       'Horizontal', 'Vertical', 'Threads', 'Connections', 'Duration', 'Mean',
       'Max', 'StdDev', 'ReqSec'],
      dtype='object')

In [14]:
df_12_swarm.iloc[:,[8,0,1,8,5,2,3,-1,-4,-3,-2]]

Unnamed: 0,Exp,Latency,Requests,Exp.1,Orchestrator,Throughput,Measured_Throughput,ReqSec,Mean,Max,StdDev
20,exp12,3.98,500,exp12,swarm,14922.0,9876,497.36,2.878,26.64,0.741
27,exp12,6.94,500,exp12,swarm,14815.0,9900,493.4,2.594,24.256,1.731
29,exp12,7.75,500,exp12,swarm,14815.0,9900,493.3,2.571,11.296,1.642
35,exp12,21640.0,1000,exp12,swarm,10356.0,7033,344.89,12884.337,24788.992,4122.938
38,exp12,6.39,1000,exp12,swarm,29634.0,19874,987.74,2.083,13.208,1.316
49,exp12,6.22,1500,exp12,swarm,43993.0,29658,1466.15,2.849,13.312,1.57
52,exp12,6.07,1500,exp12,swarm,43994.0,29658,1465.96,1.982,9.904,1.247
66,exp12,24970.0,2000,exp12,swarm,10920.0,7520,363.52,16016.705,26968.064,4742.046
74,exp12,6.33,2000,exp12,swarm,58715.0,39649,1957.01,2.001,12.56,1.302
76,exp12,6.74,2000,exp12,swarm,59189.0,39689,1972.69,3.106,13.048,1.546


In [15]:
df_12.shape

(69, 19)

In [16]:
df_12 = df.loc[(df['Exp'] == 'exp12')]
df_12 = df_12.sort_values(['Orchestrator','Benchmark','Requests']).reset_index(drop=True)
df_12['pct_change_latency'] = df_12.sort_values(['Orchestrator','Benchmark','Requests']).loc[:,['Latency']].pct_change()

In [17]:
index = df_12[df_12['pct_change_latency'] > 100].sort_values(['Orchestrator','Benchmark','Requests']).index
index_new = index.append(index-1)

In [18]:
df_req_after = df_12.loc[index,['Requests','Orchestrator','Benchmark','Exp']]
df_req_before = df_12.loc[index-1,['Requests','Orchestrator','Benchmark','Exp']]

In [20]:
df_req_after['Orchestrator']

5       k8s
10      k8s
18      k8s
36    nomad
52    nomad
58    swarm
61    swarm
67    swarm
Name: Orchestrator, dtype: category
Categories (3, object): ['k8s', 'nomad', 'swarm']

In [21]:
df_req_after.loc[:,['Orchestrator']].reset_index(drop=True).equals(df_req_before.loc[:,['Orchestrator']].reset_index(drop=True))

True

In [161]:
def return_val(i):
    return create_pct_change_latency(globals()['df_%s' % (12+i)])
    
def create_pct_change_latency(df):
    df = df.sort_values(['Orchestrator','Benchmark','Requests']).reset_index(drop=True)
    df['pct_change_latency'] = df.sort_values(['Orchestrator','Benchmark','Requests']).loc[:,['Latency']].pct_change()
    return df

def return_index(df):
    return df[(df['pct_change_latency'] > 100) & (df['Latency'] > 1000) ].sort_values(['Orchestrator','Benchmark','Requests']).index

def req_after_before(df, index):
    return df.loc[index,['Requests','Orchestrator','Benchmark','Exp']], df.loc[index-1,['Requests','Orchestrator','Benchmark','Exp']]

def return_breaking_point(df_req_after, df_req_before):
    return df_req_before.loc[:,'Requests'].astype(str).reset_index(drop=True)  + " - " + df_req_after.loc[:,'Requests'].astype(str).reset_index(drop=True) 

def update_df(df_new, df_old):
    # df_new = pd.DataFrame([])
    df_new['Orchestrator'] = df_req_after.loc[:,['Orchestrator']].reset_index(drop=True)
    df_new['Benchmark'] = df_req_after.loc[:,['Benchmark']].reset_index(drop=True)
    df_new['Exp'] = df_req_after.loc[:,['Exp']].reset_index(drop=True)
    return df_new

# def add_missing_values(df):
#     print(df[(df['pct_change_latency'] > 100)])
#     return(df)
    
    
    

In [158]:
df_12_break = pd.DataFrame([])
df_13_break = pd.DataFrame([])
df_14_break = pd.DataFrame([])
df_15_break = pd.DataFrame([])
df_bp_list = [df_12_break, df_13_break, df_14_break, df_15_break]
for i, curr_df_break in enumerate([df_12_break, df_13_break, df_14_break, df_15_break]):
    val = return_val(i)
    index = return_index(val)
    df_req_after, df_req_before = req_after_before(val, index)
    curr_df_break['breaking_point'] = return_breaking_point(df_req_after, df_req_before)
    curr_df_break = update_df(curr_df_break, val)
    # curr_df_break = add_missing_values(curr_df_break)
    
df_bp = pd.concat([df_12_break, df_13_break, df_14_break, df_15_break])
df_bp = df_bp.reset_index(drop=True)

In [160]:
df_bp

Unnamed: 0,breaking_point,Orchestrator,Benchmark,Exp
0,2500 - 3000,k8s,hr,exp12
1,500 - 1000,k8s,mm,exp12
2,2000 - 3000,k8s,sn,exp12
3,500 - 3000,nomad,mm,exp12
4,10000 - 15000,nomad,sn,exp12
5,2000 - 2500,swarm,hr,exp12
6,500 - 1000,swarm,mm,exp12
7,2000 - 3000,swarm,sn,exp12
8,4000 - 6000,k8s,hr,exp13
9,1000 - 2000,k8s,mm,exp13


In [112]:
df_bp = create_df_bp()

=  breaking_point Orchestrator Benchmark    Exp
0    2500 - 3000          k8s        hr  exp12
1     500 - 1000          k8s        mm  exp12
2    2000 - 3000          k8s        sn  exp12
3     500 - 3000        nomad        mm  exp12
4  10000 - 15000        nomad        sn  exp12
5    2000 - 2500        swarm        hr  exp12
6     500 - 1000        swarm        mm  exp12
7    2000 - 3000        swarm        sn  exp12
=  breaking_point Orchestrator Benchmark    Exp
0    4000 - 6000          k8s        hr  exp13
1    1000 - 2000          k8s        mm  exp13
2    3000 - 4000          k8s        sn  exp13
3    4000 - 6000        swarm        hr  exp13
4    1000 - 2000        swarm        mm  exp13
5    3000 - 4000        swarm        sn  exp13
=  breaking_point Orchestrator Benchmark    Exp
0    20000 - 500        swarm        hr  exp14
1    1000 - 2000        swarm        mm  exp14
2    3000 - 4000        swarm        sn  exp14
=  breaking_point Orchestrator Benchmark    Exp
0    2500

In [None]:
df_bp

Unnamed: 0,breaking_point,Orchestrator,Benchmark,Exp
0,2500 - 3000,k8s,hr,exp15
1,500 - 1000,k8s,mm,exp15
2,2000 - 3000,k8s,sn,exp15
3,500 - 3000,nomad,mm,exp15
4,10000 - 15000,nomad,sn,exp15
5,2000 - 2500,swarm,hr,exp15
6,500 - 1000,swarm,mm,exp15
7,2000 - 3000,swarm,sn,exp15
8,4000 - 6000,k8s,hr,exp15
9,1000 - 2000,k8s,mm,exp15


In [94]:
df_bp.shape

(25, 4)

In [40]:
df_bp.head()

In [341]:
df_bp.shape

(25, 4)

In [278]:
df_new['breaking_point'] = df_req_before.loc[:,'Requests'].astype(str).reset_index(drop=True)  + " - " + df_req_after.loc[:,'Requests'].astype(str).reset_index(drop=True) 



In [280]:
df_new.loc[:,['breaking_point','Orchestrator','Benchmark','Exp']].T

Unnamed: 0,0,1,2,3,4,5,6,7
breaking_point,2500 - 3000,500 - 1000,2000 - 3000,500 - 3000,10000 - 15000,2000 - 2500,500 - 1000,2000 - 3000
Orchestrator,k8s,k8s,k8s,nomad,nomad,swarm,swarm,swarm
Benchmark,hr,mm,sn,mm,sn,hr,mm,sn
Exp,exp12,exp12,exp12,exp12,exp12,exp12,exp12,exp12


In [229]:
df_req_before.loc[:,['Requests']]

Unnamed: 0,Requests
4,2500
9,500
17,2000
35,500
51,10000
57,2000
60,500
66,2000


In [226]:
df_req_after

Unnamed: 0,Requests,Orchestrator,Benchmark,Exp
5,3000,k8s,hr,exp12
10,1000,k8s,mm,exp12
18,3000,k8s,sn,exp12
36,3000,nomad,mm,exp12
52,15000,nomad,sn,exp12
58,2500,swarm,hr,exp12
61,1000,swarm,mm,exp12
67,3000,swarm,sn,exp12


In [211]:
index_new

Int64Index([5, 10, 18, 36, 52, 58, 61, 67, 4, 9, 17, 35, 51, 57, 60, 66], dtype='int64')

In [186]:
df_12[df_12['pct_change_latency'] > 100].shape

(8, 20)

In [None]:
df_new = pd.DataFrame()

In [208]:
df_12[df_12['pct_change_latency'] > 100].sort_values(['Orchestrator','Benchmark','Requests']).T

Unnamed: 0,5,10,18,36,52,58,61,67
Latency,17420.0,21860.0,16730.0,3410.0,2450.0,14970.0,21640.0,22400.0
Requests,3000,1000,3000,3000,15000,2500,1000,3000
Throughput,41852.0,7785.0,45482.0,84696.0,433454.0,40342.0,10356.0,40314.0
Measured_Throughput,28856,3896,29177,55282,287451,22088,7033,25971
Baseline,0,0,0,0,0,0,0,0
Orchestrator,k8s,k8s,k8s,nomad,nomad,swarm,swarm,swarm
Benchmark,hr,mm,sn,mm,sn,hr,mm,sn
Infinite,1,1,1,1,1,1,1,1
Exp,exp12,exp12,exp12,exp12,exp12,exp12,exp12,exp12
Availability,0,0,0,0,0,0,0,0


In [98]:
df_12_swarm_sn = df_12_swarm[df_12_swarm['Benchmark'] == 'sn']
df_12_swarm_mm = df_12_swarm[df_12_swarm['Benchmark'] == 'mm']
df_12_swarm_hr = df_12_swarm[df_12_swarm['Benchmark'] == 'hr']

In [107]:
df_df_12_swarm_sn.loc[:,['Latency']].pct_change()

Unnamed: 0,Latency
27,
49,-0.103746
76,0.083601
91,3322.442136
107,0.028571


In [103]:
df_12_swarm_sn.T.loc[['Latency'],:].apply(lambda: (current-previous)/previous)

TypeError: <lambda>() takes 0 positional arguments but 1 was given

In [99]:
df_12_swarm_mm

Unnamed: 0,Latency,Requests,Throughput,Measured_Throughput,Baseline,Orchestrator,Benchmark,Infinite,Exp,Availability,Horizontal,Vertical,Threads,Connections,Duration,Mean,Max,StdDev,ReqSec
20,3.98,500,14922.0,9876,0,swarm,mm,1,exp12,0,1,1,4,512,30,2.878,26.64,0.741,497.36
35,21640.0,1000,10356.0,7033,0,swarm,mm,1,exp12,0,1,1,4,512,30,12884.337,24788.992,4122.938,344.89
66,24970.0,2000,10920.0,7520,0,swarm,mm,1,exp12,0,1,1,4,512,30,16016.705,26968.064,4742.046,363.52
103,26610.0,3000,10587.0,7248,0,swarm,mm,1,exp12,0,1,1,4,512,30,17559.775,28229.632,5047.048,352.73


In [100]:
df_12_swarm_hr

Unnamed: 0,Latency,Requests,Throughput,Measured_Throughput,Baseline,Orchestrator,Benchmark,Infinite,Exp,Availability,Horizontal,Vertical,Threads,Connections,Duration,Mean,Max,StdDev,ReqSec
29,7.75,500,14815.0,9900,0,swarm,hr,1,exp12,0,1,1,4,512,30,2.571,11.296,1.642,493.3
38,6.39,1000,29634.0,19874,0,swarm,hr,1,exp12,0,1,1,4,512,30,2.083,13.208,1.316,987.74
52,6.07,1500,43994.0,29658,0,swarm,hr,1,exp12,0,1,1,4,512,30,1.982,9.904,1.247,1465.96
74,6.33,2000,58715.0,39649,0,swarm,hr,1,exp12,0,1,1,4,512,30,2.001,12.56,1.302,1957.01
84,14970.0,2500,40342.0,22088,0,swarm,hr,1,exp12,0,1,1,4,512,30,7689.57,16957.44,3511.712,1341.06
102,19760.0,3000,33760.0,22569,0,swarm,hr,1,exp12,0,1,1,4,512,30,12156.241,21659.648,3831.499,1122.31


# All Orch tables

In [None]:
df_all_orch['Exp'].cat.remove_unused_categories()