In [19]:
import pandas as pd;

import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.float_format', lambda x: '%.2f' % x)
path = '../../../k6/results/ssrv/10000/'

features = ['cpu_percent','mem_usage','mem_limit','mem_percent']
services = ["monolith", "recommendations", "teasers", "homepage", "cdn", "discovery"]

dfs = {}

In [20]:
def convert_to_MiB(value):
    if 'GiB' in value:
        return str(int(float(value.replace('GiB', '')) * 1024))
    if 'MiB' in value:
        return value.replace('MiB', '')
    if('B') in value:
        return str(int(float(value.replace('B', '')) / 1024))
    return value

cols = ['timestamp','name','cpu_percent','mem_usage','mem_limit','mem_percent']
docker_df = pd.read_csv(f'{path}docker.csv', skiprows=1, names=cols, sep=',')

# --- SETTINGS ---
min = int(docker_df['timestamp'].min())
max = int(docker_df['timestamp'].max())
interval = 5
# --- SETTINGS ---

docker_df['mem_usage'] = docker_df['mem_usage'].apply(convert_to_MiB).astype(float)
docker_df['mem_limit'] = docker_df['mem_limit'].apply(convert_to_MiB).astype(float)

docker_df['cpu_percent'] = docker_df['cpu_percent'].astype(float)
docker_df['mem_percent'] = docker_df['mem_percent'].astype(float)

dfs['docker'] = pd.pivot_table(
    docker_df[docker_df['name'].isin(services)],
    index='timestamp',
    columns='name',
    values=['cpu_percent', 'mem_usage', 'mem_percent']
).reset_index()

dfs['docker'].columns = [
    f'{col[1]}_{col[0]}' if col[1] else col[0] 
    for col in dfs['docker'].columns
]

# --- Align bins with K6 ---
dfs['docker']['binned_timestamp'] = pd.cut(
    dfs['docker']['timestamp'], 
    bins=range(min, max + interval, interval),
    labels=range(min, max, interval)
)

dfs['docker'] = dfs['docker'].groupby('binned_timestamp').first().reset_index()

dfs['docker']['timestamp'] = dfs['docker']['binned_timestamp']
dfs['docker'] = dfs['docker'].drop('binned_timestamp', axis=1)

dfs['docker']

Unnamed: 0,timestamp,cdn_cpu_percent,discovery_cpu_percent,homepage_cpu_percent,monolith_cpu_percent,recommendations_cpu_percent,teasers_cpu_percent,cdn_mem_percent,discovery_mem_percent,homepage_mem_percent,monolith_mem_percent,recommendations_mem_percent,teasers_mem_percent,cdn_mem_usage,discovery_mem_usage,homepage_mem_usage,monolith_mem_usage,recommendations_mem_usage,teasers_mem_usage
0,1735477518,0.00,4.58,85.44,30.20,2.05,2.16,0.32,0.47,2.78,5.16,0.56,0.58,24.89,36.82,218.20,404.60,43.51,45.54
1,1735477523,0.00,3.99,83.46,59.51,0.00,0.00,0.32,0.55,2.81,5.87,0.53,0.57,24.89,43.05,220.20,459.80,41.78,44.32
2,1735477528,0.00,3.77,78.37,48.39,0.00,0.00,0.32,0.68,2.85,5.91,0.53,0.57,24.89,53.64,223.30,463.00,41.78,44.32
3,1735477533,0.00,4.05,79.52,24.82,0.00,0.00,0.32,0.78,2.85,6.37,0.53,0.57,24.80,60.86,223.40,499.10,41.77,44.32
4,1735477538,0.00,3.59,78.96,30.36,0.00,0.00,0.32,0.81,2.87,6.51,0.53,0.57,24.80,63.20,225.10,510.00,41.77,44.32
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
247,1735478753,0.00,3.73,74.07,11.02,0.00,0.00,0.32,0.85,3.21,36.37,0.53,0.56,24.82,66.96,251.70,2850.00,41.91,44.14
248,1735478758,0.00,3.17,73.82,11.11,0.00,0.00,0.32,0.85,3.23,36.37,0.53,0.56,24.82,66.74,252.90,2850.00,41.91,44.14
249,1735478763,0.00,3.05,74.05,11.11,0.00,0.00,0.32,0.85,3.24,36.37,0.53,0.56,24.82,66.82,253.90,2849.00,41.91,44.14
250,1735478768,0.01,3.18,74.11,11.60,0.12,0.10,0.32,0.85,3.25,36.37,0.53,0.56,24.83,66.80,254.50,2849.00,41.91,44.14


In [21]:
cols = ['metric_name','timestamp','metric_value','check','error','error_code','expected_response','group','method','name','proto','scenario','service','status','subproto','tls_version','url','extra_tags','metadata']
df = pd.read_csv(f'{path}raw_k6.csv', skiprows=1, names=cols, sep=',')

# IMPUTE MISSING
df["status"] = df["status"].fillna(-1).astype(int)

In [22]:
duration_df = df[(df['metric_name'] == 'http_req_duration')].copy()
duration_bins = pd.cut(duration_df['timestamp'], 
    bins=range(min, max + interval, interval),
    labels=range(min, max, interval))

dfs['duration'] = duration_df.groupby(duration_bins)['metric_value'].agg(
    ['mean', 'min', 'max', 'count']
).reset_index()

dfs['duration'].columns = ['timestamp', 'duration_mean', 'duration_min', 'duration_max', 'duration_count']
# dfs['duration']

In [23]:
throughput_df = df[(df['metric_name'] == 'http_reqs')].copy()
tp_per_sec_bins = pd.cut(throughput_df['timestamp'], 
              bins=range(min, max + 1, 1),
              labels=range(min, max, 1))

tp_per_sec_df = throughput_df.groupby(tp_per_sec_bins)['metric_value'].agg(['sum']).reset_index()
tp_per_sec_df.columns = ['timestamp', 'nrequests']

throughput_bins = pd.cut(tp_per_sec_df['timestamp'], 
              bins=range(min, max + interval, interval),
              labels=range(min, max, interval))
dfs['throughput'] = tp_per_sec_df.groupby(throughput_bins)['nrequests'].agg(['mean', 'min', 'max', 'sum']).reset_index()
dfs['throughput'].columns = ['timestamp', 'throughput_mean', 'throughput_min', 'throughput_max', 'throughput_count']
dfs['throughput']


Unnamed: 0,timestamp,throughput_mean,throughput_min,throughput_max,throughput_count
0,1735477518,166.60,166.00,167.00,833.00
1,1735477523,166.60,166.00,167.00,833.00
2,1735477528,166.80,166.00,167.00,834.00
3,1735477533,166.60,166.00,167.00,833.00
4,1735477538,166.60,166.00,167.00,833.00
...,...,...,...,...,...
247,1735478753,166.80,166.00,167.00,834.00
248,1735478758,166.60,166.00,167.00,833.00
249,1735478763,166.60,166.00,167.00,833.00
250,1735478768,166.80,166.00,167.00,834.00


In [24]:
error_df = df[df['metric_name'] == 'http_req_failed']
error_bins = pd.cut(error_df['timestamp'], 
              bins=range(min, max + interval, interval),
              labels=range(min, max, interval))
dfs['errors'] = error_df.groupby(error_bins)['metric_value'].agg(['mean', 'sum']).reset_index()
dfs['errors'].columns = ['timestamp', 'err_mean', 'err_count']
dfs['errors']['err_count'].fillna(0)
dfs['errors']

Unnamed: 0,timestamp,err_mean,err_count
0,1735477518,0.00,0.00
1,1735477523,0.00,0.00
2,1735477528,0.00,0.00
3,1735477533,0.00,0.00
4,1735477538,0.00,0.00
...,...,...,...
247,1735478753,0.00,0.00
248,1735478758,0.00,0.00
249,1735478763,0.00,0.00
250,1735478768,0.00,0.00


In [25]:
merged_df = dfs['docker'].copy()
for name in ['duration', 'throughput', 'errors']:
    merged_df = merged_df.merge(dfs[name], on='timestamp', how='outer')

min = merged_df['timestamp'].min()
merged_df['timestamp'] = merged_df['timestamp'].astype(int) - min

In [26]:
merged_df.to_csv(f'{path}metrics.csv', index=False)