In [11]:
import pandas as pd;

import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.float_format', lambda x: '%.2f' % x)
path = '../../../k6/results/ssrv/10000/'

features = ['cpu_percent','mem_usage','mem_limit','mem_percent']
services = ["monolith", "recommendations", "teasers", "cdn", "discovery"]

dfs = {}

In [12]:
def convert_to_MiB(value):
    if 'GiB' in value:
        return str(int(float(value.replace('GiB', '')) * 1024))
    if 'MiB' in value:
        return value.replace('MiB', '')
    if('B') in value:
        return str(int(float(value.replace('B', '')) / 1024))
    return value

cols = ['timestamp','name','cpu_percent','mem_usage','mem_limit','mem_percent']
docker_df = pd.read_csv(f'{path}docker.csv', skiprows=1, names=cols, sep=',')

# --- SETTINGS ---
min = int(docker_df['timestamp'].min())
max = int(docker_df['timestamp'].max())
interval = 5
# --- SETTINGS ---

docker_df['mem_usage'] = docker_df['mem_usage'].apply(convert_to_MiB).astype(float)
docker_df['mem_limit'] = docker_df['mem_limit'].apply(convert_to_MiB).astype(float)

docker_df['cpu_percent'] = docker_df['cpu_percent'].astype(float)
docker_df['mem_percent'] = docker_df['mem_percent'].astype(float)

dfs['docker'] = pd.pivot_table(
    docker_df[docker_df['name'].isin(['monolith','teasers','recommendations','cdn','discovery'])],
    index='timestamp',
    columns='name',
    values=['cpu_percent', 'mem_usage', 'mem_percent']
).reset_index()

dfs['docker'].columns = [
    f'{col[1]}_{col[0]}' if col[1] else col[0] 
    for col in dfs['docker'].columns
]

# --- Align bins with K6 ---
dfs['docker']['binned_timestamp'] = pd.cut(
    dfs['docker']['timestamp'], 
    bins=range(min, max + interval, interval),
    labels=range(min, max, interval)
)

dfs['docker'] = dfs['docker'].groupby('binned_timestamp').first().reset_index()

dfs['docker']['timestamp'] = dfs['docker']['binned_timestamp']
dfs['docker'] = dfs['docker'].drop('binned_timestamp', axis=1)

dfs['docker']

Unnamed: 0,timestamp,cdn_cpu_percent,discovery_cpu_percent,monolith_cpu_percent,recommendations_cpu_percent,teasers_cpu_percent,cdn_mem_percent,discovery_mem_percent,monolith_mem_percent,recommendations_mem_percent,teasers_mem_percent,cdn_mem_usage,discovery_mem_usage,monolith_mem_usage,recommendations_mem_usage,teasers_mem_usage
0,1735465480,0.00,3.82,33.15,0.00,0.00,0.34,0.47,4.92,0.57,0.57,26.61,36.54,385.30,44.40,44.30
1,1735465485,0.00,3.45,31.15,0.00,0.00,0.34,0.55,5.73,0.57,0.57,26.61,43.37,449.20,44.40,44.30
2,1735465490,0.00,3.62,40.14,0.00,0.00,0.34,0.60,5.94,0.57,0.57,26.61,47.11,465.50,44.40,44.30
3,1735465495,0.00,3.43,15.17,0.00,0.00,0.34,0.76,6.04,0.57,0.57,26.61,59.72,473.50,44.40,44.30
4,1735465500,0.00,4.15,18.99,0.00,0.00,0.34,0.81,6.07,0.57,0.57,26.61,63.12,475.90,44.40,44.30
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
247,1735466715,0.00,2.83,11.20,0.00,0.00,0.34,0.86,35.05,0.57,0.57,26.43,67.04,2746.00,44.42,44.32
248,1735466720,0.00,3.49,11.45,0.00,0.00,0.34,0.86,35.05,0.57,0.57,26.43,67.08,2746.00,44.42,44.32
249,1735466725,0.00,2.99,11.41,0.00,0.00,0.34,0.86,35.05,0.57,0.57,26.43,67.02,2747.00,44.42,44.32
250,1735466730,0.00,2.91,11.25,0.00,0.00,0.34,0.86,35.05,0.57,0.57,26.43,67.12,2746.00,44.42,44.32


In [13]:
cols = ['metric_name','timestamp','metric_value','check','error','error_code','expected_response','group','method','name','proto','scenario','service','status','subproto','tls_version','url','extra_tags','metadata']
df = pd.read_csv(f'{path}raw_k6.csv', skiprows=1, names=cols, sep=',')

# IMPUTE MISSING
df["status"] = df["status"].fillna(-1).astype(int)

In [14]:
duration_df = df[(df['metric_name'] == 'http_req_duration')].copy()
duration_bins = pd.cut(duration_df['timestamp'], 
    bins=range(min, max + interval, interval),
    labels=range(min, max, interval))

dfs['duration'] = duration_df.groupby(duration_bins)['metric_value'].agg(
    ['mean', 'min', 'max', 'count']
).reset_index()
dfs['duration']
# dfs['duration'].columns = ['timestamp', 'duration_mean', 'duration_min', 'duration_max', 'duration_count']
# dfs['duration']

Unnamed: 0,timestamp,mean,min,max,count
0,1735465480,14.22,7.18,77.93,839
1,1735465485,8.96,7.06,15.92,834
2,1735465490,8.52,6.06,15.51,833
3,1735465495,8.35,6.43,15.51,834
4,1735465500,8.25,5.67,17.80,833
...,...,...,...,...,...
247,1735466715,8.13,6.25,18.88,834
248,1735466720,8.01,5.47,13.54,833
249,1735466725,8.00,5.23,17.31,833
250,1735466730,7.94,5.37,13.23,834


In [15]:
throughput_df = df[(df['metric_name'] == 'http_reqs')].copy()
tp_per_sec_bins = pd.cut(throughput_df['timestamp'], 
              bins=range(min, max + 1, 1),
              labels=range(min, max, 1))

tp_per_sec_df = throughput_df.groupby(tp_per_sec_bins)['metric_value'].agg(['sum']).reset_index()
tp_per_sec_df.columns = ['timestamp', 'nrequests']

throughput_bins = pd.cut(tp_per_sec_df['timestamp'], 
              bins=range(min, max + interval, interval),
              labels=range(min, max, interval))
dfs['throughput'] = tp_per_sec_df.groupby(throughput_bins)['nrequests'].agg(['mean', 'min', 'max', 'sum']).reset_index()
dfs['throughput'].columns = ['timestamp', 'throughput_mean', 'throughput_min', 'throughput_max', 'throughput_count']
dfs['throughput']


Unnamed: 0,timestamp,throughput_mean,throughput_min,throughput_max,throughput_count
0,1735465480,167.00,166.00,168.00,835.00
1,1735465485,166.60,166.00,167.00,833.00
2,1735465490,166.60,166.00,167.00,833.00
3,1735465495,166.60,166.00,167.00,833.00
4,1735465500,166.60,166.00,167.00,833.00
...,...,...,...,...,...
247,1735466715,166.60,166.00,167.00,833.00
248,1735466720,166.80,166.00,167.00,834.00
249,1735466725,166.60,166.00,167.00,833.00
250,1735466730,166.60,166.00,167.00,833.00


In [16]:
error_df = df[df['metric_name'] == 'http_req_failed']
error_bins = pd.cut(error_df['timestamp'], 
              bins=range(min, max + interval, interval),
              labels=range(min, max, interval))
dfs['errors'] = error_df.groupby(error_bins)['metric_value'].agg(['mean', 'sum']).reset_index()
dfs['errors'].columns = ['timestamp', 'err_mean', 'err_count']
dfs['errors']['err_count'].fillna(0)
dfs['errors']

Unnamed: 0,timestamp,err_mean,err_count
0,1735465480,0.00,0.00
1,1735465485,0.00,0.00
2,1735465490,0.00,0.00
3,1735465495,0.00,0.00
4,1735465500,0.00,0.00
...,...,...,...
247,1735466715,0.00,0.00
248,1735466720,0.00,0.00
249,1735466725,0.00,0.00
250,1735466730,0.00,0.00


In [17]:
merged_df = dfs['docker'].copy()
for name in ['duration', 'throughput', 'errors']:
    merged_df = merged_df.merge(dfs[name], on='timestamp', how='outer')

min = merged_df['timestamp'].min()
merged_df['timestamp'] = merged_df['timestamp'].astype(int) - min

In [18]:
merged_df.to_csv(f'{path}metrics.csv', index=False)