### Imports

In [28]:
import sys
sys.path.append(r"") # NOTE: Hier sys path
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [14]:
from visu_script import parse_files as pf

### Parameters

In [15]:
FILES_PATH = r"" #NOTE: Hier Files Path

### Code

In [16]:
case_db_run_instances = pf.get_usecase_instances(FILES_PATH)

In [17]:
records = []
for run in case_db_run_instances:
    records.append({
        "database": run.database,
        "timestamp": run.timestamp,                
        "usecase": run.usecase,                   
        "usecase_full_name": run.usecase_full_name,
        "execution_time_s": run.execution_time,
        "avg_cpu_percent": run.avg_cpu_percent,
        "peak_cpu_percent": run.peak_cpu_percent,
        "avg_mem_mb": run.avg_mem_mb,
        "peak_mem_mb": run.peak_mem_mb
    })

df = pd.DataFrame(records)
df = df.sort_values("timestamp").reset_index(drop=True)

In [18]:
df

Unnamed: 0,database,timestamp,usecase,usecase_full_name,execution_time_s,avg_cpu_percent,peak_cpu_percent,avg_mem_mb,peak_mem_mb
0,mongodb,2025-06-02 14:15:01.310435,6,usecase6_filter_by_bedrooms_and_size,5.0,45.40,86.36,1896.69,1896.69
1,mongodb,2025-06-02 14:15:01.310435,5,usecase5_average_price_per_city,10.0,102.13,103.20,1839.60,1844.11
2,postgres,2025-06-02 14:15:01.310435,6,usecase6_filter_by_bedrooms_and_size,3.0,92.26,92.26,1705.26,1705.26
3,postgres,2025-06-02 14:15:01.310435,5,usecase5_average_price_per_city,2.0,179.17,179.17,1766.34,1766.34
4,postgres,2025-06-02 14:15:01.310435,4,usecase4_price_analysis,0.0,0.00,0.00,0.00,0.00
...,...,...,...,...,...,...,...,...,...
450,clickhouse,2025-06-04 08:00:01.625527,4,usecase4_price_analysis,0.0,2.56,2.56,2254.61,2254.61
451,clickhouse,2025-06-04 08:00:01.625527,3,usecase3_add_solar_panels,0.0,3.82,3.82,2248.06,2248.06
452,clickhouse,2025-06-04 08:00:01.625527,1,usecase1_filter_properties,4.0,13.02,23.10,2288.48,2297.78
453,postgres,2025-06-04 08:00:01.625527,6,usecase6_filter_by_bedrooms_and_size,3.0,83.09,83.09,2220.67,2220.67


### Plot1: Box‐Plot: Distribution of Execution Time by Database

In [26]:
fig = px.box(
    df,
    x="database",
    y="execution_time_s",
    color="database",
    points="all", 
    title="Execution‐Time Distribution by Database"
)
fig.update_layout(
    xaxis_title="Database System",
    yaxis_title="Execution Time (seconds)"
)
fig.show()

### Plot2: Line‐Plot Over Time: How Execution Time Evolves

In [35]:
uc = 7  #NOTE: Here change the usecase and reexecute
df_uc = df[df["usecase"] == uc].copy()

# Group by date (or exact timestamp) & database
# If you want to aggregate per‐day rather than per‐timestamp, extract date:
df_uc["date_only"] = df_uc["timestamp"].dt.date

# Compute mean execution_time per database per day:
grouped = (
    df_uc
    .groupby(["date_only", "database"])
    .execution_time_s
    .mean()
    .reset_index(name="mean_exec_time_s")
)

fig = px.line(
    grouped,
    x="date_only",
    y="mean_exec_time_s",
    color="database",
    markers=True,
    title=f"Average Execution Time for Usecase {uc} Over Time"
)
fig.update_layout(
    xaxis_title="Date",
    yaxis_title="Average Execution Time (s)"
)
fig.show()

### Plot3: Line‐Plot Over Time: How Execution Time Evolves per Operation Type

In [42]:
records = []
for run in case_db_run_instances:
    records.append({
        "database": run.database,
        "timestamp": run.timestamp,
        "usecase_id": run.usecase,                     #
        "usecase_full_name": run.usecase_full_name,    
        "execution_time_s": run.execution_time
    })
df = pd.DataFrame(records)
df = df.sort_values("timestamp").reset_index(drop=True)

# Extract a "date only" column so we can aggregate per day
df["date_only"] = df["timestamp"].dt.date

# Group usecases into operation types 
# Read operations:     usecases 1, 4, 5, 6
# Update operations:   usecases 2, 3
# Bulk Insert:         usecase 7
operation_map = {
    "Read":       [1, 4, 5, 6],
    "Update":     [2, 3],
    "BulkInsert": [7]
}

def categorize_usecase(uid: int) -> str:
    for category, ids in operation_map.items():
        if uid in ids:
            return category
    return "Other"

df["operation_type"] = df["usecase_id"].apply(categorize_usecase)

#Produce one line‐plot per operation type 
for op_type in ["Read", "Update", "BulkInsert"]:
    df_op = df[df["operation_type"] == op_type]
    if df_op.empty:
        print(f"[!] No data for operation type = {op_type}")
        continue

    # Compute daily mean execution_time, grouped by date, database, and full usecase name
    grouped = (
        df_op
        .groupby(["date_only", "database", "usecase_full_name"])
        .execution_time_s
        .mean()
        .reset_index(name="mean_exec_time_s")
    )

    # Create a faceted line‐plot: one facet per usecase_full_name (within this category)
    fig = px.line(
        grouped,
        x="date_only",
        y="mean_exec_time_s",
        color="database",
        line_dash="usecase_full_name",
        markers=True,
        facet_col="usecase_full_name",
        facet_col_wrap=2,
        title=f"Daily Mean Execution Time for {op_type} Operations"
    )

    # Tidy up axis labels and facet titles
    fig.update_layout(
        xaxis_title="Date",
        yaxis_title="Mean Execution Time (s)",
        showlegend=True,
        height=600
    )
    # Shorten facet annotation to show only the usecase_full_name itself:
    fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
    fig.show()

### Plot4: Faceted Bar‐Chart: Execution Time by Usecase & Database

In [None]:
# Compute average exec_time over all runs, grouped by database & usecase:
agg_exec = (
    df
    .groupby(["usecase_full_name", "database"])
    .execution_time_s
    .mean()
    .reset_index(name="mean_execution_time_s")
)

fig = px.bar(
    agg_exec,
    x="usecase_full_name",
    y="mean_execution_time_s",
    color="database",
    barmode="group", 
    title="Average Execution Time per Usecase by Database"
)
fig.update_layout(
    xaxis_title="Usecase",
    yaxis_title="Avg. Execution Time (s)",
    xaxis_tickangle=-45
)
fig.show()


### Plot5: Box‐Plot of Peak CPU Usage by Database

In [37]:
fig = px.box(
    df,
    x="database",
    y="peak_cpu_percent",
    color="database",
    points="all",
    title="Peak CPU (%) Distribution by Database"
)
fig.update_layout(
    xaxis_title="Database",
    yaxis_title="Peak CPU Percent (%)"
)
fig.show()

### Plot6: Box‐Plot of Average Memory (MB) by Database

In [38]:
fig = px.box(
    df,
    x="database",
    y="avg_mem_mb",
    color="database",
    points="all",
    title="Average Memory Usage (MB) by Database"
)
fig.update_layout(
    xaxis_title="Database",
    yaxis_title="Average Memory (MB)"
)
fig.show()