In [20]:
import pandas as pd;

import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.float_format', lambda x: '%.2f' % x)
path = '../../../k6/results/csr/10000/'

features = ['cpu_percent','mem_usage','mem_limit','mem_percent']
services = ["monolith", "recommendations", "teasers", "cdn", "discovery"]

dfs = {}

In [21]:
def convert_to_MiB(value):
    if 'GiB' in value:
        return str(int(float(value.replace('GiB', '')) * 1024))
    if 'MiB' in value:
        return value.replace('MiB', '')
    if('B') in value:
        return str(int(float(value.replace('B', '')) / 1024))
    return value

cols = ['timestamp','name','cpu_percent','mem_usage','mem_limit','mem_percent']
docker_df = pd.read_csv(f'{path}docker.csv', skiprows=1, names=cols, sep=',')

# --- SETTINGS ---
min = int(docker_df['timestamp'].min())
max = int(docker_df['timestamp'].max())
interval = 5
# --- SETTINGS ---

docker_df['mem_usage'] = docker_df['mem_usage'].apply(convert_to_MiB).astype(float)
docker_df['mem_limit'] = docker_df['mem_limit'].apply(convert_to_MiB).astype(float)

docker_df['cpu_percent'] = docker_df['cpu_percent'].astype(float)
docker_df['mem_percent'] = docker_df['mem_percent'].astype(float)

dfs['docker'] = pd.pivot_table(
    docker_df[docker_df['name'].isin(['monolith','teasers','recommendations','cdn','discovery'])],
    index='timestamp',
    columns='name',
    values=['cpu_percent', 'mem_usage', 'mem_percent']
).reset_index()

dfs['docker'].columns = [
    f'{col[1]}_{col[0]}' if col[1] else col[0] 
    for col in dfs['docker'].columns
]

# --- Align bins with K6 ---
dfs['docker']['binned_timestamp'] = pd.cut(
    dfs['docker']['timestamp'], 
    bins=range(min, max + interval, interval),
    labels=range(min, max, interval)
)

dfs['docker'] = dfs['docker'].groupby('binned_timestamp').first().reset_index()

dfs['docker']['timestamp'] = dfs['docker']['binned_timestamp']
dfs['docker'] = dfs['docker'].drop('binned_timestamp', axis=1)

dfs['docker']

Unnamed: 0,timestamp,cdn_cpu_percent,discovery_cpu_percent,monolith_cpu_percent,cdn_mem_percent,discovery_mem_percent,monolith_mem_percent,cdn_mem_usage,discovery_mem_usage,monolith_mem_usage
0,1735378191,0.00,0.00,127.40,0.23,0.23,21.16,17.83,17.70,1658.00
1,1735378196,0.00,0.00,126.81,0.23,0.23,28.21,17.83,17.70,2210.00
2,1735378201,0.00,0.00,112.84,0.23,0.23,34.40,17.83,17.70,2696.00
3,1735378206,0.00,0.00,111.00,0.23,0.23,34.59,17.83,17.70,2710.00
4,1735378211,0.00,0.00,111.16,0.23,0.23,34.66,17.83,17.70,2716.00
...,...,...,...,...,...,...,...,...,...,...
247,1735379426,0.01,0.00,104.10,0.23,0.23,61.74,17.87,17.74,4838.00
248,1735379431,0.00,0.00,104.49,0.23,0.23,61.87,17.87,17.74,4848.00
249,1735379436,0.00,0.00,103.91,0.23,0.23,62.01,17.87,17.74,4859.00
250,1735379441,0.00,0.00,104.25,0.23,0.23,62.11,17.87,17.74,4868.00


In [22]:
cols = ['metric_name','timestamp','metric_value','check','error','error_code','expected_response','group','method','name','proto','scenario','service','status','subproto','tls_version','url','extra_tags','metadata']
df = pd.read_csv(f'{path}raw_k6.csv', skiprows=1, names=cols, sep=',')

# IMPUTE MISSING
df["status"] = df["status"].fillna(-1).astype(int)

In [23]:
duration_df = df[(df['metric_name'] == 'http_req_duration')].copy()
duration_bins = pd.cut(duration_df['timestamp'], 
    bins=range(min, max + interval, interval),
    labels=range(min, max, interval))

dfs['duration'] = duration_df.groupby(duration_bins)['metric_value'].agg(
    ['mean', 'min', 'max', 'count']
).reset_index()
dfs['duration']
# dfs['duration'].columns = ['timestamp', 'duration_mean', 'duration_min', 'duration_max', 'duration_count']
# dfs['duration']

Unnamed: 0,timestamp,mean,min,max,count
0,1735378191,1.05,0.67,5.94,834
1,1735378196,0.78,0.56,4.07,833
2,1735378201,0.68,0.46,5.62,834
3,1735378206,0.64,0.49,4.14,833
4,1735378211,0.63,0.49,5.24,833
...,...,...,...,...,...
247,1735379426,0.60,0.40,5.97,833
248,1735379431,0.62,0.45,9.69,833
249,1735379436,0.61,0.42,6.04,834
250,1735379441,0.63,0.46,8.12,833


In [24]:
throughput_df = df[(df['metric_name'] == 'http_reqs')].copy()
tp_per_sec_bins = pd.cut(throughput_df['timestamp'], 
              bins=range(min, max + 1, 1),
              labels=range(min, max, 1))

tp_per_sec_df = throughput_df.groupby(tp_per_sec_bins)['metric_value'].agg(['sum']).reset_index()
tp_per_sec_df.columns = ['timestamp', 'nrequests']

throughput_bins = pd.cut(tp_per_sec_df['timestamp'], 
              bins=range(min, max + interval, interval),
              labels=range(min, max, interval))
dfs['throughput'] = tp_per_sec_df.groupby(throughput_bins)['nrequests'].agg(['mean', 'min', 'max', 'sum']).reset_index()
dfs['throughput'].columns = ['timestamp', 'throughput_mean', 'throughput_min', 'throughput_max', 'throughput_count']
dfs['throughput']


Unnamed: 0,timestamp,throughput_mean,throughput_min,throughput_max,throughput_count
0,1735378191,166.60,166.00,167.00,833.00
1,1735378196,166.80,166.00,167.00,834.00
2,1735378201,166.60,166.00,167.00,833.00
3,1735378206,166.80,166.00,167.00,834.00
4,1735378211,166.60,166.00,167.00,833.00
...,...,...,...,...,...
247,1735379426,166.80,166.00,167.00,834.00
248,1735379431,166.60,166.00,167.00,833.00
249,1735379436,166.60,166.00,167.00,833.00
250,1735379441,166.80,166.00,167.00,834.00


In [25]:
error_df = df[df['metric_name'] == 'http_req_failed']
error_bins = pd.cut(error_df['timestamp'], 
              bins=range(min, max + interval, interval),
              labels=range(min, max, interval))
dfs['errors'] = error_df.groupby(error_bins)['metric_value'].agg(['mean', 'sum']).reset_index()
dfs['errors'].columns = ['timestamp', 'err_mean', 'err_count']
dfs['errors']['err_count'].fillna(0)
dfs['errors']

Unnamed: 0,timestamp,err_mean,err_count
0,1735378191,0.00,0.00
1,1735378196,0.00,0.00
2,1735378201,0.00,0.00
3,1735378206,0.00,0.00
4,1735378211,0.00,0.00
...,...,...,...
247,1735379426,0.00,0.00
248,1735379431,0.00,0.00
249,1735379436,0.00,0.00
250,1735379441,0.00,0.00


In [26]:
merged_df = dfs['docker'].copy()
for name in ['duration', 'throughput', 'errors']:
    merged_df = merged_df.merge(dfs[name], on='timestamp', how='outer')

min = merged_df['timestamp'].min()
merged_df['timestamp'] = merged_df['timestamp'].astype(int) - min

In [27]:
merged_df.to_csv(f'{path}metrics.csv', index=False)