In [None]:
%load_ext autoreload
%autoreload 2

import plotly.express as px
import pandas as pd
import numpy as np
from scipy.stats import (
    bootstrap,
    gmean,
)
from IPython.display import display

from duet.process import *
from duet.constants import *
from common import *

df_prep = preprocess_data(load_raw())
unique_suites = df_prep[RF.suite].unique()

# CV - Relative Standard Deviation

In [None]:
df = (
    df_prep.groupby(BENCHMARK_ENV_COL)
    .agg(
        time_count=(RF.time_ns, len),
        time_mean=(RF.time_ns, "mean"),
        time_var=(RF.time_ns, "var"),
        time_std=(RF.time_ns, "std"),
    )
    .reset_index()
)
df["CV"] = df["time_std"] / df["time_mean"]

for suite in unique_suites:
    fig = px.bar(
        df[df[RF.suite] == suite],
        x=RF.benchmark,
        y="CV",
        facet_col=DF.env,
        color=RF.type,
        barmode="group",
        title=f"Benchmark Time Relative Deviation - {suite}",
    )
    fig.update_xaxes(categoryorder="category ascending")
    fig.show()

# CI

### Determine best duet `overlap_rate`

In [None]:
df = compute_ci(df_prep, np.arange(0.1, 1, 0.1))
df = arbiter_ci_contains_zero(df)
df = (
    df.groupby(by=[DF.env, RF.suite, RF.type, DF.overlap_rate])
    .agg(
        total_count=(DF.match_ci, "count"),
        match_count=(DF.match_ci, "sum"),
        miss_err=(DF.err_ci, np.mean),
    )
    .reset_index()
)
df[DF.match_ratio_ci] = df["match_count"] / df["total_count"]
px.line(
    df,
    x=DF.overlap_rate,
    y=DF.match_ratio_ci,
    color=RF.suite,
    facet_col=DF.env,
    markers=True,
)

In [None]:
df_ci = compute_ci(df_prep, overlap_rates=[0.5])
df_ci

In [None]:
type = "type:pairing"
df_ci[type] = df_ci[RF.type] + ":" + df_ci[DF.overlap_rate].astype(str)

for suite in unique_suites:
    suite_mask = df_ci[RF.suite] == suite
    if suite_mask.any():
        fig = px.scatter(
            df_ci[suite_mask],
            x="benchmark",
            y="mid",
            error_y="err",
            color=type,
            facet_col=DF.env,
            title=f"CI - {suite}",
        )
        fig.update_xaxes(categoryorder="category ascending")
        fig.show()

In [None]:
for suite in unique_suites:
    fig = px.bar(
        df_ci[df_ci[RF.suite] == suite],
        x=RF.benchmark,
        y=DF.ci_width,
        color=type,
        facet_row=DF.env,
        barmode="group",
        title=f"Relative CI Width comparison - {suite}",
    )
    fig.show()

df = (
    df_ci.groupby(by=[DF.env, RF.suite, type])
    .agg(ci_width_mean=(DF.ci_width, np.mean))
    .reset_index()
)
display(
    px.bar(
        df,
        x=RF.suite,
        y="ci_width_mean",
        color=type,
        barmode="group",
        facet_row=DF.env,
        title="Mean relative CI width",
    )
)
display(
    px.box(
        df_ci,
        x=RF.suite,
        y="err",
        facet_col=DF.env,
        color=RF.type,
        title="CI error rate per suite",
    )
)

## Arbitrage

In [None]:
df_pred_ci = arbiter_ci_contains_zero(df_ci)
df = group_predictions(df_pred_ci, utest=False)
px.bar(
    df,
    x=RF.suite,
    y=DF.match_ratio_ci,
    facet_col=DF.env,
    color=RF.type,
    barmode="group",
    title="Correct A/A detection ratio",
)

In [None]:
df_pred_utest = arbiter_utest(df_prep)
df = group_predictions(df_pred_utest, ci=False)
px.bar(
    df,
    x=RF.suite,
    y=DF.match_ratio_ci,
    facet_col=DF.env,
    color=RF.type,
    barmode="group",
    title="Correct A/A detection ratio",
)

---

Backup

---

In [None]:
df = preprocess_data(df_prep)

df = (
    df.groupby(ARTIFACT_COL + RUN_ID_COL)
    .agg(
        time_count=(RF.time_ns, len),
        time_mean=(RF.time_ns, "mean"),
        time_var=(RF.time_ns, "var"),
        time_std=(RF.time_ns, "std"),
    )
    .reset_index()
)
df["CV"] = df["time_std"] / df["time_mean"]

for suite in unique_suites:
    fig = px.box(
        df[df[RF.suite] == suite],
        x=RF.benchmark,
        y="CV",
        facet_row=DF.env,
        color=RF.type,
        title=f"Benchmark Time Relative Deviation per run - {suite}",
    )
    fig.update_xaxes(categoryorder="category ascending")
    fig.show()

In [None]:
df_ci_syncduet = df_prep[df_prep[RF.type] == "syncduet"]
if df_ci_syncduet.shape[0] == 0:
    print("No runs")
    raise StopExecution

df_ci_syncduet = compute_ci_pair_speedup(df_ci_syncduet, sample_type="run_means")

for suite in unique_suites:
    suite_mask = df_ci_syncduet[RF.suite] == suite
    if suite_mask.any():
        fig = px.scatter(
            df_ci_syncduet[suite_mask],
            x=RF.benchmark,
            y="mid",
            error_y="err",
            color=DF.env,
            title=f"Syncduet pairwise speedup CI - {suite}",
        )
        fig.show()