In [29]:
import pandas as pd;
import matplotlib.pyplot as plt
from sklearn.ensemble import IsolationForest
from matplotlib.dates import DateFormatter

import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.float_format', lambda x: '%.2f' % x)
path = '../../../k6/results/run-2/ssrh/1000/'

features = ['cpu_percent','mem_usage','mem_limit','mem_percent']
services = ["monolith", "recommendations", "teasers", "cdn", "discovery"]
interval = 5


In [30]:
cols = ['metric_name','timestamp','metric_value','check','error','error_code','expected_response','group','method','name','proto','scenario','service','status','subproto','tls_version','url','extra_tags','metadata']
df = pd.read_csv(f'{path}results.csv', skiprows=1, names=cols, sep=',')

# REMOVE WARMUP AND COOLDOWN
start_time = df['timestamp'].min() + 60    
end_time = df['timestamp'].max() - 30     
df = df[(df['timestamp'] >= start_time) & (df['timestamp'] <= end_time)]

# IMPUTE MISSING
df["status"] = df["status"].fillna(-1).astype(int)

dfs = {}
min = int(df['timestamp'].min())
max = int(df['timestamp'].max())

In [31]:
duration_df = df[(df['metric_name'] == 'http_req_duration') & (df['status'] == 200)].copy()
duration_bins = pd.cut(duration_df['timestamp'], 
    bins=range(min, max + interval, interval),
    labels=range(min, max, interval))

dfs['dur_homepage'] = duration_df.groupby(duration_bins)['metric_value'].agg(['mean', 'min', 'max', 'count']).reset_index()
dfs['dur_homepage'].columns = ['timestamp', 'dur_mean', 'dur_min', 'dur_max', 'dur_count']

In [32]:
dur_redirect_df = df[(df['metric_name'] == 'http_req_duration') & (df['status'] == 302)].copy()
dur_redirect_bins = pd.cut(dur_redirect_df['timestamp'], 
              bins=range(min, max + interval, interval),
              labels=range(min, max, interval))

dfs['dur_redirect'] = dur_redirect_df.groupby(dur_redirect_bins)['metric_value'].agg(['mean', 'min', 'max', 'count']).reset_index()
dfs['dur_redirect'].columns = ['timestamp', 'dur_redirect_mean', 'dur_redirect_min', 'dur_redirect_max', 'dur_redirect_count']

In [33]:
throughput_df = df[(df['metric_name'] == 'http_reqs')].copy()
tp_per_sec_bins = pd.cut(throughput_df['timestamp'], 
              bins=range(min, max + interval + 1, interval),
              labels=range(min, max, interval))

tp_per_sec_df = throughput_df.groupby(tp_per_sec_bins)['metric_value'].agg(['sum']).reset_index()
tp_per_sec_df.columns = ['timestamp', 'nrequests']

throughput_bins = pd.cut(tp_per_sec_df['timestamp'], 
              bins=range(min, max + interval + 1, interval),
              labels=range(min, max, interval))
dfs['throughput'] = tp_per_sec_df.groupby(throughput_bins)['nrequests'].agg(['mean', 'min', 'max', 'sum']).reset_index()
dfs['throughput'].columns = ['timestamp', 'throughput_mean', 'throughput_min', 'throughput_max', 'throughput_count']
dfs['throughput'].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 115 entries, 0 to 114
Data columns (total 5 columns):
 #   Column            Non-Null Count  Dtype   
---  ------            --------------  -----   
 0   timestamp         115 non-null    category
 1   throughput_mean   114 non-null    float64 
 2   throughput_min    114 non-null    float64 
 3   throughput_max    114 non-null    float64 
 4   throughput_count  115 non-null    float64 
dtypes: category(1), float64(4)
memory usage: 4.0 KB


In [34]:
error_df = df[df['metric_name'] == 'http_req_failed']
error_bins = pd.cut(error_df['timestamp'], 
              bins=range(min, max + interval + 1, interval),
              labels=range(min, max, interval))
dfs['errors'] = error_df.groupby(error_bins)['metric_value'].agg(['mean', 'sum']).reset_index()
dfs['errors'].columns = ['timestamp', 'err_mean', 'err_count']
dfs['errors']['err_count'].fillna(0)

0     0.00
1     0.00
2     0.00
3     0.00
4     0.00
      ... 
110   0.00
111   0.00
112   0.00
113   0.00
114   0.00
Name: err_count, Length: 115, dtype: float64

In [35]:
merged_df = dfs['dur_homepage'].copy()
for name in ['dur_redirect', 'throughput', 'errors']:
    if name != 'dur_homepage':
        merged_df = merged_df.merge(dfs[name], on='timestamp', how='outer')

merged_df = merged_df.dropna()
merged_df

Unnamed: 0,timestamp,dur_mean,dur_min,dur_max,dur_count,dur_redirect_mean,dur_redirect_min,dur_redirect_max,dur_redirect_count,throughput_mean,throughput_min,throughput_max,throughput_count,err_mean,err_count
0,1735110707,0.65,0.49,1.61,167,7.52,6.02,10.16,167,332.00,332.00,332.00,332.00,0.00,0.00
1,1735110712,0.65,0.49,1.92,166,7.57,4.99,11.89,166,334.00,334.00,334.00,334.00,0.00,0.00
2,1735110717,0.67,0.50,2.89,167,7.68,6.18,13.28,167,334.00,334.00,334.00,334.00,0.00,0.00
3,1735110722,0.66,0.51,2.26,167,7.60,6.40,12.73,167,333.00,333.00,333.00,333.00,0.00,0.00
4,1735110727,0.65,0.51,1.80,166,7.63,5.11,13.17,167,333.00,333.00,333.00,333.00,0.00,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
109,1735111252,0.65,0.50,2.10,166,7.66,6.02,11.10,166,334.00,334.00,334.00,334.00,0.00,0.00
110,1735111257,0.65,0.49,3.73,167,7.75,5.99,25.78,167,334.00,334.00,334.00,334.00,0.00,0.00
111,1735111262,0.62,0.49,1.53,167,7.49,5.91,14.45,167,332.00,332.00,332.00,332.00,0.00,0.00
112,1735111267,0.67,0.48,2.28,166,7.76,6.21,12.64,166,334.00,334.00,334.00,334.00,0.00,0.00


In [36]:
merged_df.to_csv(f'{path}metrics.csv', index=False)