In [None]:
%load_ext autoreload
%autoreload 2

import plotly.express as px
import pandas as pd
import numpy as np
from scipy.stats import (
    bootstrap,
    gmean,
)
from IPython.display import display

from duet.process import *
from duet.constants import *
from common import *

df_raw = load_raw()
unique_suites = df_raw[RF.suite].unique()

In [None]:
# How to shuffle df
df = df_raw[
    (df_raw[RF.benchmark] == "chi-square")
    & (df_raw[AF.hostname] == df_raw[AF.hostname].iloc[0])
][PAIR_ID_COL + TIME_D_NS_COL]
a = df[RF.time_ns].sample(frac=1).reset_index(drop=True)
b = df[RF.time_ns].reset_index(drop=True)
diff = a - b
diff

# CV - Relative Standard Devition

In [None]:
df = preprocess_data(df_raw)

df = (
    df.groupby(ARTIFACT_COL + RUN_ID_COL)
    .agg(
        time_count=(RF.time_ns, len),
        time_mean=(RF.time_ns, "mean"),
        time_var=(RF.time_ns, "var"),
        time_std=(RF.time_ns, "std"),
    )
    .reset_index()
)
df["CV"] = df["time_std"] / df["time_mean"]

for suite in unique_suites:
    fig = px.box(
        df[df[RF.suite] == suite],
        x=RF.benchmark,
        y="CV",
        facet_row=DF.env,
        color=RF.type,
        title=f"Benchmark Time Relative Deviation - {suite}",
    )
    fig.update_xaxes(categoryorder="category ascending")
    fig.update_layout(height=1000)
    fig.show()

# CI

## Synchronized duet

In [None]:
if "syncduet" not in df_raw[RF.type].unique():
    print("No syncduet run")
    raise StopExecution

df = df_raw[
    (df_raw[RF.suite] == "dacapo")
    & (df_raw[RF.benchmark] == "avrora")
    & (df_raw[RF.type] == "syncduet")
]
df = preprocess_data(df)
df = df.pivot_table(
    index=ARTIFACT_COL + RUN_ID_COL + [RF.iteration],
    columns=RF.pair,
    values=[RF.time_ns],
).reset_index()
df.columns = [f"{i}_{j}" if j else i for i, j in df.columns]
df[DF.pair_speedup] = df[RF.time_ns + "_A"] / df[RF.time_ns + "_B"]
df_gmsr = df.groupby(ARTIFACT_COL + RUN_ID_COL).agg(gmsr=(DF.pair_speedup, gmean))
# display(df_gmsr)

df_ggmsr = df_gmsr.groupby(ARTIFACT_COL + BENCHMARK_ID_COL).agg(ggmsr=("gmsr", gmean))
# display(df_ggmsr)

# Bootstrap
bootstrap(data=(df_gmsr["gmsr"],), statistic=gmean)

In [None]:
df_ci_syncduet = pd.DataFrame()
if "syncduet" not in df_raw[RF.type].unique():
    print("No syncduet run")
    raise StopExecution

df_ci_syncduet = compute_ci_syncduet(df_raw, sample_type="run_means")

for suite in unique_suites:
    fig = px.scatter(
        df_ci_syncduet[df_ci_syncduet[RF.suite] == suite],
        x=RF.benchmark,
        y="mid",
        error_y="err",
        color=DF.env,
        title=f"Syncduet pairwise speedup CI - {suite}",
    )
    fig.show()

## Sequenatial

In [None]:
df_ci_seqn = compute_ci_seqn(df_raw, sample_type="run_means")

for suite in unique_suites:
    fig = px.scatter(
        df_ci_seqn[df_ci_seqn[RF.suite] == suite],
        x=RF.benchmark,
        y="mid",
        error_y="err",
        color=DF.env,
        title=f"Seqn duration CI - {suite}",
    )
    fig.show()

## Asynchronous duet

In [None]:
df_ci_duet = compute_ci_duet_no_overlaps(df_raw, sample_type="run_means")

for suite in unique_suites:
    fig = px.scatter(
        df_ci_duet[df_ci_duet[RF.suite] == suite],
        x=RF.benchmark,
        y="mid",
        error_y="err",
        color=DF.env,
        title=f"Duet no-overlap duration CI - {suite}",
    )
    fig.show()

## CI Width Comparison

In [None]:
df_input = pd.concat([df_ci_seqn, df_ci_syncduet, df_ci_duet])

for suite in unique_suites:
    fig = px.bar(
        df_input[df_input[RF.suite] == suite],
        x=RF.benchmark,
        y=DF.ci_width,
        color=RF.type,
        facet_row=DF.env,
        barmode="group",
        title=f"Relative CI Width comparison - {suite}",
    )
    fig.show()