In [1]:
import pandas as pd
pd.set_option("display.max_rows", None)
pd.set_option('display.max_colwidth', None)
#import os
#import re
import matplotlib.pyplot as plt
#import pickle
from IPython.display import display, Markdown
import seaborn as sns
from math import floor
import ast
import json

#import dbmsbenchmarker
#import bexhoma
from bexhoma import evaluators

%matplotlib inline

In [9]:
path = r"C:\data\benchmarks"
path = r"/home/perdelt/benchmarks"

In [94]:
def get_workload(code):
    with open(path+"/"+code+"/queries.config",'r') as inp:
        workload_properties = ast.literal_eval(inp.read())
        return workload_properties


def get_performance(evaluation):
    df = evaluation.get_df_benchmarking()
    df = df.sort_values(['experiment_run', 'client'])
    result = df.groupby('client').agg({
        'Goodput (requests/second)': 'sum',
        'Latency Distribution.Average Latency (microseconds)': 'mean'
    }).reset_index()
    return result

def get_loading_time_max(evaluation):
    with open(path+"/"+code+"/connections.config",'r') as inf:
        connections = ast.literal_eval(inf.read())
        pretty_connections = json.dumps(connections, indent=2)
        #print(pretty_connections)
        connections_sorted = sorted(connections, key=lambda c: c['name'])
        result = dict()
        for c in connections_sorted:
            """
            print(c['name'], 
                  c['timeLoad'], 
                  '[s] for', 
                  c['parameter']['connection_parameter']['loading_parameters']['BENCHBASE_TERMINALS'], 
                  'threads on',
                  c['hostsystem']['node'])
            """
            result[c['name']] = {
                'time_load': c['timeLoad'],
                'time_ingest': c['timeIngesting'],
                'time_check': c['timeIndex'],
                'terminals': c['parameter']['connection_parameter']['loading_parameters']['BENCHBASE_TERMINALS'],
                #'target': c['parameter']['connection_parameter']['loading_parameters']['BENCHBASE_TARGET'],
                'pods': c['parameter']['parallelism'],
                'tenant': c['parameter']['TENANT'] if 'TENANT' in c['parameter'] else '',
                'client': c['parameter']['client'],
            }
            #result[c['parameter']['connection_parameter']['loading_parameters']['BENCHBASE_TERMINALS']] = c['timeIngesting']
        df = pd.DataFrame(result).T
        return df#df['time_load'].max()

def show_summary_monitoring_table(evaluate, component):
    df_monitoring = list()
    ##########
    df = evaluate.get_monitoring_metric(metric='total_cpu_util_s', component=component)
    df = df.max().sort_index() - df.min().sort_index() # compute difference of counter
    #df = df.T.max().sort_index() - df.T.min().sort_index() # compute difference of counter
    df_cleaned = pd.DataFrame(df)
    df_cleaned.columns = ["CPU [CPUs]"]
    if not df_cleaned.empty:
        df_monitoring.append(df_cleaned.copy())
    df = evaluate.get_monitoring_metric(metric='total_cpu_memory', component=component)/1024
    #df = evaluate.get_loading_metrics('total_cpu_memory')/1024
    df = df.max().sort_index()
    df_cleaned = pd.DataFrame(df).round(2)
    df_cleaned.columns = ["Max RAM [Gb]"]
    if not df_cleaned.empty:
        df_monitoring.append(df_cleaned.copy())
    ##########
    df = evaluate.get_monitoring_metric(metric='total_cpu_memory_cached', component=component)/1024
    #df = evaluate.get_loading_metrics('total_cpu_memory_cached')/1024
    df = df.max().sort_index()
    df_cleaned = pd.DataFrame(df)
    df_cleaned.columns = ["Max RAM Cached [Gb]"]
    if not df_cleaned.empty:
        df_monitoring.append(df_cleaned.copy())
    return df_monitoring

def get_monitoring_stream(evaluation):
    df_monitoring = show_summary_monitoring_table(evaluation, "stream")
    if len(df_monitoring) > 0:
        print("\n### Stream SUT")
        df = pd.concat(df_monitoring, axis=1).round(2)
        df = df.reindex(index=evaluators.natural_sort(df.index))
        # If your DataFrame is named df and the index is a string like 'PostgreSQL-1-1-1024-0-1'
        df = df.copy()  # avoid modifying original
        df['client'] = df.index.str.rsplit('-', n=1).str[-1]
        #print(df)
        result = df.groupby('client').agg({
            'CPU [CPUs]': 'sum',
            'Max RAM [Gb]': 'sum',
            'Max RAM Cached [Gb]': 'sum',
        }).reset_index()
        return result


# Container

In [121]:
code = '1750686487'
evaluation = evaluators.benchbase(code=code, path=path)
workload = get_workload(code)

In [122]:
df=get_performance(evaluation)
df['type']=workload['tenant_per']
df['num_tenants']=workload['num_tenants']
df

Unnamed: 0,client,Goodput (requests/second),Latency Distribution.Average Latency (microseconds),type,num_tenants
0,1,6206.906833,31782.0,container,2
1,2,7300.489242,26982.5,container,2


In [123]:
get_loading_time_max(evaluation)

Unnamed: 0,time_load,time_ingest,time_check,terminals,pods,tenant,client
PostgreSQL-1-1-16384-0-1,276.0,116.0,159.0,1,1,0,1
PostgreSQL-1-1-16384-0-2,276.0,116.0,159.0,1,1,0,2
PostgreSQL-1-1-16384-1-1,313.0,124.0,188.0,1,1,1,1
PostgreSQL-1-1-16384-1-2,313.0,124.0,188.0,1,1,1,2


In [124]:
get_monitoring_stream(evaluation)


### Stream SUT


Unnamed: 0,client,CPU [CPUs],Max RAM [Gb],Max RAM Cached [Gb]
0,1,4296.2,9.13,11.89
1,2,4561.3,9.51,12.71


# Database

In [125]:
code = '1750685768'
evaluation = evaluators.benchbase(code=code, path=path)

In [126]:
get_performance(evaluation)

Unnamed: 0,client,Goodput (requests/second),Latency Distribution.Average Latency (microseconds)
0,1,3497.94765,56481.5
1,2,8565.863305,22906.5


In [127]:
get_loading_time_max(evaluation)

Unnamed: 0,time_load,time_ingest,time_check,terminals,pods,tenant,client
PostgreSQL-1-1-16384-1,279.0,121.0,157.0,1,2,,1
PostgreSQL-1-1-16384-2,279.0,121.0,157.0,1,2,,2


In [128]:
get_monitoring_stream(evaluation)


### Stream SUT


Unnamed: 0,client,CPU [CPUs],Max RAM [Gb],Max RAM Cached [Gb]
0,1,2146.64,6.58,9.06
1,2,3130.01,7.56,10.74


# Schema

In [118]:
code = '1750685007'
evaluation = evaluators.benchbase(code=code, path=path)
workload = get_workload(code)

In [119]:
df=get_performance(evaluation)
df['type']=workload['tenant_per']
df['num_tenants']=workload['num_tenants']
df

Unnamed: 0,client,Goodput (requests/second),Latency Distribution.Average Latency (microseconds),type,num_tenants
0,1,3364.007487,58816.5,schema,2
1,2,5860.348171,33683.5,schema,2


In [120]:
get_loading_time_max(evaluation)

Unnamed: 0,time_load,time_ingest,time_check,terminals,pods,tenant,client
PostgreSQL-1-1-16384-1,327.0,134.0,192.0,1,2,,1
PostgreSQL-1-1-16384-2,327.0,134.0,192.0,1,2,,2


In [115]:
get_monitoring_stream(evaluation)


### Stream SUT


Unnamed: 0,client,CPU [CPUs],Max RAM [Gb],Max RAM Cached [Gb]
0,1,2266.72,6.74,9.18
1,2,2229.89,7.32,10.2
