In [9]:
import pandas as pd;

import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.float_format', lambda x: '%.2f' % x)
path = '../../../k6/results/ssrh/5000/'

features = ['cpu_percent','mem_usage','mem_limit','mem_percent']
services = ["monolith", "recommendations", "teasers", "cdn", "discovery"]

dfs = {}

In [10]:
def convert_to_MiB(value):
    if 'GiB' in value:
        return str(int(float(value.replace('GiB', '')) * 1024))
    if 'MiB' in value:
        return value.replace('MiB', '')
    if('B') in value:
        return str(int(float(value.replace('B', '')) / 1024))
    return value

cols = ['timestamp','name','cpu_percent','mem_usage','mem_limit','mem_percent']
docker_df = pd.read_csv(f'{path}docker.csv', skiprows=1, names=cols, sep=',')

# --- SETTINGS ---
min = int(docker_df['timestamp'].min())
max = int(docker_df['timestamp'].max())
interval = 5
# --- SETTINGS ---

docker_df['mem_usage'] = docker_df['mem_usage'].apply(convert_to_MiB).astype(float)
docker_df['mem_limit'] = docker_df['mem_limit'].apply(convert_to_MiB).astype(float)

docker_df['cpu_percent'] = docker_df['cpu_percent'].astype(float)
docker_df['mem_percent'] = docker_df['mem_percent'].astype(float)

dfs['docker'] = pd.pivot_table(
    docker_df[docker_df['name'].isin(['monolith','teasers','recommendations','cdn','discovery'])],
    index='timestamp',
    columns='name',
    values=['cpu_percent', 'mem_usage', 'mem_percent']
).reset_index()

dfs['docker'].columns = [
    f'{col[1]}_{col[0]}' if col[1] else col[0] 
    for col in dfs['docker'].columns
]

# --- Align bins with K6 ---
dfs['docker']['binned_timestamp'] = pd.cut(
    dfs['docker']['timestamp'], 
    bins=range(min, max + interval, interval),
    labels=range(min, max, interval)
)

dfs['docker'] = dfs['docker'].groupby('binned_timestamp').first().reset_index()

dfs['docker']['timestamp'] = dfs['docker']['binned_timestamp']
dfs['docker'] = dfs['docker'].drop('binned_timestamp', axis=1)

dfs['docker']

Unnamed: 0,timestamp,cdn_cpu_percent,discovery_cpu_percent,monolith_cpu_percent,recommendations_cpu_percent,teasers_cpu_percent,cdn_mem_percent,discovery_mem_percent,monolith_mem_percent,recommendations_mem_percent,teasers_mem_percent,cdn_mem_usage,discovery_mem_usage,monolith_mem_usage,recommendations_mem_usage,teasers_mem_usage
0,1735294733,0.00,3.00,208.89,36.16,31.63,0.35,0.47,26.35,2.06,1.88,27.42,36.49,2064.00,161.70,147.30
1,1735294738,0.00,4.71,166.85,39.11,30.80,0.35,0.53,36.07,2.65,2.10,27.42,41.18,2827.00,207.90,164.40
2,1735294743,0.00,2.13,144.83,25.67,25.36,0.35,0.53,36.26,2.68,2.19,27.42,41.59,2841.00,210.40,172.00
3,1735294748,0.01,3.80,128.81,28.06,26.16,0.35,0.62,36.33,3.07,2.13,27.42,48.73,2847.00,240.40,166.70
4,1735294753,0.00,2.66,161.99,27.58,22.67,0.35,0.63,36.82,2.73,1.99,27.42,49.25,2885.00,214.10,156.10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
247,1735295968,0.00,2.36,114.33,22.11,19.36,0.27,0.85,65.04,2.00,1.73,21.36,66.30,5097.00,157.10,135.20
248,1735295973,0.00,1.84,116.14,29.28,26.33,0.27,0.84,65.13,1.92,1.72,21.36,66.11,5104.00,150.40,135.20
249,1735295978,0.00,1.74,115.27,25.80,20.97,0.27,0.84,65.28,1.92,1.73,21.36,66.17,5115.00,150.60,135.40
250,1735295983,0.00,1.66,115.09,20.10,23.18,0.28,0.87,65.20,1.99,1.73,22.21,67.96,5109.00,155.90,135.40


In [11]:
cols = ['metric_name','timestamp','metric_value','check','error','error_code','expected_response','group','method','name','proto','scenario','service','status','subproto','tls_version','url','extra_tags','metadata']
df = pd.read_csv(f'{path}raw_k6.csv', skiprows=1, names=cols, sep=',')

# IMPUTE MISSING
df["status"] = df["status"].fillna(-1).astype(int)

In [12]:
duration_df = df[(df['metric_name'] == 'http_req_duration')].copy()
duration_bins = pd.cut(duration_df['timestamp'], 
    bins=range(min, max + interval, interval),
    labels=range(min, max, interval))

dfs['duration'] = duration_df.groupby(duration_bins)['metric_value'].agg(
    ['mean', 'min', 'max', 'count']
).reset_index(drop=True)
dfs['duration'].columns = ['timestamp', 'duration_mean', 'duration_min', 'duration_max', 'duration_count']
dfs['duration']

Unnamed: 0,timestamp,duration_mean,duration_min,duration_max,duration_count
0,1735294733,26.58,7.79,572.70,436
1,1735294738,8.89,6.64,31.56,417
2,1735294743,8.35,6.34,24.83,417
3,1735294748,7.44,5.36,16.81,416
4,1735294753,7.37,5.23,27.46,417
...,...,...,...,...,...
247,1735295968,7.24,5.52,15.72,417
248,1735295973,6.88,4.52,15.41,416
249,1735295978,7.00,5.39,61.39,417
250,1735295983,6.71,5.51,14.40,417


In [13]:
throughput_df = df[(df['metric_name'] == 'http_reqs')].copy()
tp_per_sec_bins = pd.cut(throughput_df['timestamp'], 
              bins=range(min, max + 1, 1),
              labels=range(min, max, 1))

tp_per_sec_df = throughput_df.groupby(tp_per_sec_bins)['metric_value'].agg(['sum']).reset_index()
tp_per_sec_df.columns = ['timestamp', 'nrequests']

throughput_bins = pd.cut(tp_per_sec_df['timestamp'], 
              bins=range(min, max + interval, interval),
              labels=range(min, max, interval))
dfs['throughput'] = tp_per_sec_df.groupby(throughput_bins)['nrequests'].agg(['mean', 'min', 'max', 'sum']).reset_index()
dfs['throughput'].columns = ['timestamp', 'throughput_mean', 'throughput_min', 'throughput_max', 'throughput_count']
dfs['throughput']


Unnamed: 0,timestamp,throughput_mean,throughput_min,throughput_max,throughput_count
0,1735294733,83.40,83.00,84.00,417.00
1,1735294738,83.20,83.00,84.00,416.00
2,1735294743,83.40,83.00,84.00,417.00
3,1735294748,83.40,83.00,84.00,417.00
4,1735294753,83.20,83.00,84.00,416.00
...,...,...,...,...,...
247,1735295968,83.40,83.00,84.00,417.00
248,1735295973,83.40,83.00,84.00,417.00
249,1735295978,83.20,83.00,84.00,416.00
250,1735295983,83.40,83.00,84.00,417.00


In [14]:
error_df = df[df['metric_name'] == 'http_req_failed']
error_bins = pd.cut(error_df['timestamp'], 
              bins=range(min, max + interval, interval),
              labels=range(min, max, interval))
dfs['errors'] = error_df.groupby(error_bins)['metric_value'].agg(['mean', 'sum']).reset_index()
dfs['errors'].columns = ['timestamp', 'err_mean', 'err_count']
dfs['errors']['err_count'].fillna(0)
dfs['errors']

Unnamed: 0,timestamp,err_mean,err_count
0,1735294733,0.00,0.00
1,1735294738,0.00,0.00
2,1735294743,0.00,0.00
3,1735294748,0.00,0.00
4,1735294753,0.00,0.00
...,...,...,...
247,1735295968,0.00,0.00
248,1735295973,0.00,0.00
249,1735295978,0.00,0.00
250,1735295983,0.00,0.00


In [15]:
merged_df = dfs['docker'].copy()
for name in ['duration', 'throughput', 'errors']:
    merged_df = merged_df.merge(dfs[name], on='timestamp', how='outer')

min = merged_df['timestamp'].min()
merged_df['timestamp'] = merged_df['timestamp'].astype(int) - min

In [16]:
merged_df.to_csv(f'{path}metrics.csv', index=False)