In [40]:
import pandas as pd;

import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.float_format', lambda x: '%.2f' % x)
path = '../../../k6/results/monolith/10000/'

features = ['cpu_percent','mem_usage','mem_limit','mem_percent']
services = ["monolith", "recommendations", "teasers", "cdn", "discovery"]

dfs = {}

In [41]:
def convert_to_MiB(value):
    if 'GiB' in value:
        return str(int(float(value.replace('GiB', '')) * 1024))
    if 'MiB' in value:
        return value.replace('MiB', '')
    if('B') in value:
        return str(int(float(value.replace('B', '')) / 1024))
    return value

cols = ['timestamp','name','cpu_percent','mem_usage','mem_limit','mem_percent']
docker_df = pd.read_csv(f'{path}docker.csv', skiprows=1, names=cols, sep=',')

# --- SETTINGS ---
min = int(docker_df['timestamp'].min())
max = int(docker_df['timestamp'].max())
interval = 5
# --- SETTINGS ---

docker_df['mem_usage'] = docker_df['mem_usage'].apply(convert_to_MiB).astype(float)
docker_df['mem_limit'] = docker_df['mem_limit'].apply(convert_to_MiB).astype(float)

docker_df['cpu_percent'] = docker_df['cpu_percent'].astype(float)
docker_df['mem_percent'] = docker_df['mem_percent'].astype(float)

dfs['docker'] = pd.pivot_table(
    docker_df[docker_df['name'].isin(['monolith','teasers','recommendations','cdn','discovery'])],
    index='timestamp',
    columns='name',
    values=['cpu_percent', 'mem_usage', 'mem_percent']
).reset_index()

dfs['docker'].columns = [
    f'{col[1]}_{col[0]}' if col[1] else col[0] 
    for col in dfs['docker'].columns
]

# --- Align bins with K6 ---
dfs['docker']['binned_timestamp'] = pd.cut(
    dfs['docker']['timestamp'], 
    bins=range(min, max + interval, interval),
    labels=range(min, max, interval)
)

dfs['docker'] = dfs['docker'].groupby('binned_timestamp').first().reset_index()

dfs['docker']['timestamp'] = dfs['docker']['binned_timestamp']
dfs['docker'] = dfs['docker'].drop('binned_timestamp', axis=1)

dfs['docker']

Unnamed: 0,timestamp,cdn_cpu_percent,monolith_cpu_percent,cdn_mem_percent,monolith_mem_percent,cdn_mem_usage,monolith_mem_usage
0,1735383696,0.00,127.45,0.83,23.15,64.88,1814.00
1,1735383701,0.00,138.20,0.83,33.83,64.88,2651.00
2,1735383706,0.00,116.54,0.83,35.46,64.88,2779.00
3,1735383711,0.00,132.62,0.83,35.72,64.88,2799.00
4,1735383716,0.00,109.37,0.83,35.88,64.88,2811.00
...,...,...,...,...,...,...,...
247,1735384931,0.00,116.51,0.59,74.42,46.52,5831.00
248,1735384936,0.00,276.52,0.59,74.49,46.52,5838.00
249,1735384941,0.00,111.11,0.59,74.91,46.52,5871.00
250,1735384946,0.00,121.55,0.59,75.47,46.52,5914.00


In [42]:
cols = ['metric_name','timestamp','metric_value','check','error','error_code','expected_response','group','method','name','proto','scenario','service','status','subproto','tls_version','url','extra_tags','metadata']
df = pd.read_csv(f'{path}raw_k6.csv', skiprows=1, names=cols, sep=',')

# IMPUTE MISSING
df["status"] = df["status"].fillna(-1).astype(int)

In [43]:
duration_df = df[(df['metric_name'] == 'http_req_duration')].copy()
duration_bins = pd.cut(duration_df['timestamp'], 
    bins=range(min, max + interval, interval),
    labels=range(min, max, interval))

dfs['duration'] = duration_df.groupby(duration_bins)['metric_value'].agg(
    ['mean', 'min', 'max', 'count']
).reset_index()
dfs['duration']
# dfs['duration'].columns = ['timestamp', 'duration_mean', 'duration_min', 'duration_max', 'duration_count']
# dfs['duration']

Unnamed: 0,timestamp,mean,min,max,count
0,1735383696,8.07,0.91,347.69,816
1,1735383701,1.09,0.78,10.07,833
2,1735383706,1.02,0.69,17.32,833
3,1735383711,0.88,0.66,11.32,834
4,1735383716,0.90,0.66,13.56,833
...,...,...,...,...,...
247,1735384931,0.74,0.59,9.78,833
248,1735384936,0.81,0.57,10.44,834
249,1735384941,0.95,0.63,19.73,833
250,1735384946,0.85,0.59,14.28,833


In [44]:
throughput_df = df[(df['metric_name'] == 'http_reqs')].copy()
tp_per_sec_bins = pd.cut(throughput_df['timestamp'], 
              bins=range(min, max + 1, 1),
              labels=range(min, max, 1))

tp_per_sec_df = throughput_df.groupby(tp_per_sec_bins)['metric_value'].agg(['sum']).reset_index()
tp_per_sec_df.columns = ['timestamp', 'nrequests']

throughput_bins = pd.cut(tp_per_sec_df['timestamp'], 
              bins=range(min, max + interval, interval),
              labels=range(min, max, interval))
dfs['throughput'] = tp_per_sec_df.groupby(throughput_bins)['nrequests'].agg(['mean', 'min', 'max', 'sum']).reset_index()
dfs['throughput'].columns = ['timestamp', 'throughput_mean', 'throughput_min', 'throughput_max', 'throughput_count']
dfs['throughput']


Unnamed: 0,timestamp,throughput_mean,throughput_min,throughput_max,throughput_count
0,1735383696,166.60,166.00,167.00,833.00
1,1735383701,166.80,166.00,167.00,834.00
2,1735383706,166.60,166.00,167.00,833.00
3,1735383711,166.60,166.00,167.00,833.00
4,1735383716,166.80,166.00,167.00,834.00
...,...,...,...,...,...
247,1735384931,166.60,166.00,167.00,833.00
248,1735384936,166.60,166.00,167.00,833.00
249,1735384941,166.80,166.00,167.00,834.00
250,1735384946,166.60,166.00,167.00,833.00


In [45]:
error_df = df[df['metric_name'] == 'http_req_failed']
error_bins = pd.cut(error_df['timestamp'], 
              bins=range(min, max + interval, interval),
              labels=range(min, max, interval))
dfs['errors'] = error_df.groupby(error_bins)['metric_value'].agg(['mean', 'sum']).reset_index()
dfs['errors'].columns = ['timestamp', 'err_mean', 'err_count']
dfs['errors']['err_count'].fillna(0)
dfs['errors']

Unnamed: 0,timestamp,err_mean,err_count
0,1735383696,0.00,0.00
1,1735383701,0.00,0.00
2,1735383706,0.00,0.00
3,1735383711,0.00,0.00
4,1735383716,0.00,0.00
...,...,...,...
247,1735384931,0.00,0.00
248,1735384936,0.00,0.00
249,1735384941,0.00,0.00
250,1735384946,0.00,0.00


In [46]:
merged_df = dfs['docker'].copy()
for name in ['duration', 'throughput', 'errors']:
    merged_df = merged_df.merge(dfs[name], on='timestamp', how='outer')

min = merged_df['timestamp'].min()
merged_df['timestamp'] = merged_df['timestamp'].astype(int) - min

In [47]:
merged_df.to_csv(f'{path}metrics.csv', index=False)