In [1]:
%load_ext autoreload
%autoreload 2

In [110]:
import plotly.express as px
import pandas as pd
import numpy as np
from scipy.stats import (
    bootstrap,
    gmean,
)
from IPython.display import display

from duet.process import (
    compute_overlaps,
    compute_ci_seqn,
    compute_ci_syncduet,
    compute_ci_duet_no_overlaps,
    expand_confidence_interval,
    preprocess_data,
)

from duet.constants import (
    RF,
    AF,
    DF,
    ARTIFACT_COL,
    BASE_COL,
    BENCHMARK_ID_COL,
    PAIR_ID_COL,
    RUN_ID_COL,
    ITER_ID_COL,
    TIME_COL,
    TIME_D_COL,
    TIME_COL,
    TIME_NS_COL,
    OVERLAP_NS_COL,
    OVERLAP_D_NS_COL,
    OVERLAP_COL,
    NS_PER_S,
)

In [3]:
df_raw = pd.read_csv("../results.dacaposcala.csv")
df_raw[RF.start] = pd.to_datetime(df_raw[RF.start_ns], unit="ns")
df_raw[RF.end] = pd.to_datetime(df_raw[RF.end_ns], unit="ns")
df_raw[RF.time] = (df_raw[RF.end] - df_raw[RF.start]).dt.seconds

df_iterations = df_raw[ITER_ID_COL + ARTIFACT_COL + TIME_D_COL]
df_iterations

Unnamed: 0,suite,benchmark,type,runid,pair,iteration,date,hostname,lscpu,meminfo,uname,iteration_start,iteration_end,iteration_time
0,scalabench,scalac,syncduet,2,A,1,Sun Jun 26 04:57:57 PM CEST 2022,teaching.d3s.hide.ms.mff.cuni.cz,Intel(R) Xeon(R) CPU E5-2620 v4 @ 2.10GHz,16375728 kB,Linux teaching.d3s.hide.ms.mff.cuni.cz 5.14.9-...,2022-06-26 20:11:38.324999936,2022-06-26 20:11:51.592631040,13
1,scalabench,scalac,syncduet,2,A,2,Sun Jun 26 04:57:57 PM CEST 2022,teaching.d3s.hide.ms.mff.cuni.cz,Intel(R) Xeon(R) CPU E5-2620 v4 @ 2.10GHz,16375728 kB,Linux teaching.d3s.hide.ms.mff.cuni.cz 5.14.9-...,2022-06-26 20:11:51.713999872,2022-06-26 20:11:55.827837952,4
2,scalabench,scalac,syncduet,2,A,3,Sun Jun 26 04:57:57 PM CEST 2022,teaching.d3s.hide.ms.mff.cuni.cz,Intel(R) Xeon(R) CPU E5-2620 v4 @ 2.10GHz,16375728 kB,Linux teaching.d3s.hide.ms.mff.cuni.cz 5.14.9-...,2022-06-26 20:11:55.900000000,2022-06-26 20:11:59.570012416,3
3,scalabench,scalac,syncduet,2,A,4,Sun Jun 26 04:57:57 PM CEST 2022,teaching.d3s.hide.ms.mff.cuni.cz,Intel(R) Xeon(R) CPU E5-2620 v4 @ 2.10GHz,16375728 kB,Linux teaching.d3s.hide.ms.mff.cuni.cz 5.14.9-...,2022-06-26 20:11:59.640999936,2022-06-26 20:12:02.940681984,3
4,scalabench,scalac,syncduet,2,A,5,Sun Jun 26 04:57:57 PM CEST 2022,teaching.d3s.hide.ms.mff.cuni.cz,Intel(R) Xeon(R) CPU E5-2620 v4 @ 2.10GHz,16375728 kB,Linux teaching.d3s.hide.ms.mff.cuni.cz 5.14.9-...,2022-06-26 20:12:03.028999936,2022-06-26 20:12:06.074457088,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16315,dacapo,h2,seqn,1,A,16,Sun Jun 26 04:57:57 PM CEST 2022,teaching.d3s.hide.ms.mff.cuni.cz,Intel(R) Xeon(R) CPU E5-2620 v4 @ 2.10GHz,16375728 kB,Linux teaching.d3s.hide.ms.mff.cuni.cz 5.14.9-...,2022-06-26 15:17:30.704999936,2022-06-26 15:17:36.653568512,5
16316,dacapo,h2,seqn,1,A,17,Sun Jun 26 04:57:57 PM CEST 2022,teaching.d3s.hide.ms.mff.cuni.cz,Intel(R) Xeon(R) CPU E5-2620 v4 @ 2.10GHz,16375728 kB,Linux teaching.d3s.hide.ms.mff.cuni.cz 5.14.9-...,2022-06-26 15:17:36.945999872,2022-06-26 15:17:42.153336320,5
16317,dacapo,h2,seqn,1,A,18,Sun Jun 26 04:57:57 PM CEST 2022,teaching.d3s.hide.ms.mff.cuni.cz,Intel(R) Xeon(R) CPU E5-2620 v4 @ 2.10GHz,16375728 kB,Linux teaching.d3s.hide.ms.mff.cuni.cz 5.14.9-...,2022-06-26 15:17:42.430000128,2022-06-26 15:17:48.089451776,5
16318,dacapo,h2,seqn,1,A,19,Sun Jun 26 04:57:57 PM CEST 2022,teaching.d3s.hide.ms.mff.cuni.cz,Intel(R) Xeon(R) CPU E5-2620 v4 @ 2.10GHz,16375728 kB,Linux teaching.d3s.hide.ms.mff.cuni.cz 5.14.9-...,2022-06-26 15:17:48.367000064,2022-06-26 15:17:53.384856576,5


# Timeline

In [4]:
df = df_iterations[
    (
        (df_iterations["suite"].isin(["dacapo"]))
        # & (df_iterations["benchmark"].isin(["avrora"]))
        # & (df_iterations["runid"].isin([0]))
        # & (df_iterations["type"].isin(["duet", "syncduet", "seqn"]))
        # & (df_iterations["iteration"] <= 10)
        # & (df_iterations["hostname"].isin(["cirrus-1.graal.d3s.hide.ms.mff.cuni.cz"]))
        & (
            df_iterations["iteration_start"]
            < df_iterations[RF.start].min() + pd.DateOffset(minutes=30)
        )
    )
]
if df.shape[0] < 1000:
    px.timeline(
        df,
        x_start=RF.start,
        x_end=RF.end,
        y=RF.pair,
        color=RF.type,
        hover_data=[RF.benchmark, RF.time],
    ).show()
    px.timeline(
        df,
        x_start=RF.start,
        x_end=RF.end,
        y=RF.iteration,
        color=RF.type,
        hover_data=[RF.benchmark, RF.time],
    ).show()
else:
    print(f"Data too large to display properly {df.shape[0]}")

# Iteration count, duration, preprocessing, and variation

In [5]:
unique_suites = df_iterations[RF.suite].unique()

df_prep = preprocess_data(df_iterations)
df_prep["prep"] = "prep"
df_iterations["prep"] = "raw"
df_input = pd.concat([df_prep, df_iterations])

color_type = "TYPE"
df_input[color_type] = df_input[RF.type] + ":" + df_input["prep"]

groupby_col = RUN_ID_COL + [color_type]
groupby_col.remove(RF.type)

df_grouped = (
    df_input.groupby(groupby_col)
    .agg(
        time_count=(RF.time, len),
        time_mean=(RF.time, "mean"),
        time_var=(RF.time, "var"),
        time_std=(RF.time, "std"),
    )
    .reset_index()
)
df_grouped["time_std_relative"] = df_grouped["time_std"] / df_grouped["time_mean"]
df_grouped

for suite in unique_suites:
    df = df_input[df_input[RF.suite] == suite]
    df = df.sort_values(by=color_type)

    df_itercount = df.groupby(groupby_col)[RF.iteration].count().reset_index()

    fig = px.bar(
        df_itercount,
        x=RF.benchmark,
        y=RF.iteration,
        color=color_type,
        barmode="group",
        title=f"Benchmark Iteration Count - {suite}",
    )
    fig.update_xaxes(categoryorder="category ascending")
    fig.show()

    fig = px.bar(
        df_grouped[df_grouped[RF.suite] == suite],
        x=RF.benchmark,
        y="time_std_relative",
        color=color_type,
        barmode="group",
        log_y=True,
        title=f"Benchmark Time Relative Deviation - {suite} (log y axis)",
    )
    fig.update_xaxes(categoryorder="category ascending")
    fig.show()

    fig = px.box(
        df,
        y=RF.time,
        x=RF.benchmark,
        color=color_type,
        title=f"Benchmark Iteration Duration - {suite}",
    )
    fig.update_xaxes(categoryorder="category ascending")
    fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



# Seqn CI computation

In [44]:
df = df_raw[
    (df_raw[RF.suite] == "dacapo")
    & (df_raw[RF.benchmark] == "avrora")
    & (df_raw[RF.type] == "seqn")
]
df = preprocess_data(df)
# df.groupby(ARTIFACT_COL + PAIR_ID_COL).agg(gmsr=(RF.time_ns, np.mean))
bootstrap(data=(df[RF.time_ns],), statistic=np.mean)

BootstrapResult(confidence_interval=ConfidenceInterval(low=4841889484.585316, high=4903924010.962948), standard_error=15673582.433379425)

In [134]:
df_ci_seqn = compute_ci_seqn(df_raw, sample_type="run_means")

df = expand_confidence_interval(df_ci_seqn)
for suite in unique_suites:
    fig = px.scatter(
        df[df[RF.suite] == suite],
        x=RF.benchmark,
        y="mid",
        error_y="err",
        color=RF.pair,
        title=f"Seqn duration CI - {suite}",
    )
    fig.show()

# Syncduet CI computation


In [117]:
df = df_raw[
    (df_raw[RF.suite] == "dacapo")
    & (df_raw[RF.benchmark] == "avrora")
    & (df_raw[RF.type] == "syncduet")
]
df = preprocess_data(df)
df = df.pivot_table(
    index=ARTIFACT_COL + RUN_ID_COL + [RF.iteration],
    columns=RF.pair,
    values=[RF.time_ns],
).reset_index()
df.columns = [f"{i}_{j}" if j else i for i, j in df.columns]
df[DF.pair_speedup] = df[RF.time_ns + "_A"] / df[RF.time_ns + "_B"]
df_gmsr = df.groupby(ARTIFACT_COL + RUN_ID_COL).agg(gmsr=(DF.pair_speedup, gmean))
display(df_gmsr)
df_ggmsr = df_gmsr.groupby(ARTIFACT_COL + BENCHMARK_ID_COL).agg(ggmsr=("gmsr", gmean))
display(df_ggmsr)

# Bootstrap
bootstrap(data=(df_gmsr["gmsr"],), statistic=gmean)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,gmsr
date,hostname,lscpu,meminfo,uname,suite,benchmark,type,runid,Unnamed: 9_level_1
Sun Jun 26 04:57:57 PM CEST 2022,teaching.d3s.hide.ms.mff.cuni.cz,Intel(R) Xeon(R) CPU E5-2620 v4 @ 2.10GHz,16375728 kB,Linux teaching.d3s.hide.ms.mff.cuni.cz 5.14.9-200.fc34.x86_64 #1 SMP Thu Sep 30 11:55:35 UTC 2021 x86_64 x86_64 x86_64 GNU/Linux,dacapo,avrora,syncduet,0,1.011722
Sun Jun 26 04:57:57 PM CEST 2022,teaching.d3s.hide.ms.mff.cuni.cz,Intel(R) Xeon(R) CPU E5-2620 v4 @ 2.10GHz,16375728 kB,Linux teaching.d3s.hide.ms.mff.cuni.cz 5.14.9-200.fc34.x86_64 #1 SMP Thu Sep 30 11:55:35 UTC 2021 x86_64 x86_64 x86_64 GNU/Linux,dacapo,avrora,syncduet,1,0.993442
Sun Jun 26 04:57:57 PM CEST 2022,teaching.d3s.hide.ms.mff.cuni.cz,Intel(R) Xeon(R) CPU E5-2620 v4 @ 2.10GHz,16375728 kB,Linux teaching.d3s.hide.ms.mff.cuni.cz 5.14.9-200.fc34.x86_64 #1 SMP Thu Sep 30 11:55:35 UTC 2021 x86_64 x86_64 x86_64 GNU/Linux,dacapo,avrora,syncduet,2,1.001229
Sun Jun 26 04:57:57 PM CEST 2022,teaching.d3s.hide.ms.mff.cuni.cz,Intel(R) Xeon(R) CPU E5-2620 v4 @ 2.10GHz,16375728 kB,Linux teaching.d3s.hide.ms.mff.cuni.cz 5.14.9-200.fc34.x86_64 #1 SMP Thu Sep 30 11:55:35 UTC 2021 x86_64 x86_64 x86_64 GNU/Linux,dacapo,avrora,syncduet,3,1.016211


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,ggmsr
date,hostname,lscpu,meminfo,uname,suite,benchmark,type,Unnamed: 8_level_1
Sun Jun 26 04:57:57 PM CEST 2022,teaching.d3s.hide.ms.mff.cuni.cz,Intel(R) Xeon(R) CPU E5-2620 v4 @ 2.10GHz,16375728 kB,Linux teaching.d3s.hide.ms.mff.cuni.cz 5.14.9-200.fc34.x86_64 #1 SMP Thu Sep 30 11:55:35 UTC 2021 x86_64 x86_64 x86_64 GNU/Linux,dacapo,avrora,syncduet,1.005611


BootstrapResult(confidence_interval=ConfidenceInterval(low=0.9953829125676842, high=1.0128422472884415), standard_error=0.004498241147943019)

In [132]:
df_ci_syncduet = compute_ci_syncduet(df_raw, sample_type="run_means")

df = expand_confidence_interval(df_ci_syncduet)
for suite in unique_suites:
    fig = px.scatter(
        df[df[RF.suite] == suite],
        x=RF.benchmark,
        y="mid",
        error_y="err",
        title=f"Syncduet pairwise speedup CI - {suite}",
    )
    fig.show()

# Duet CI computation

In [133]:
df_duet_ci = compute_ci_duet_no_overlaps(df_raw, sample_type="run_means")

df = expand_confidence_interval(df_duet_ci)
for suite in unique_suites:
    fig = px.scatter(
        df[df[RF.suite] == suite],
        x=RF.benchmark,
        y="mid",
        error_y="err",
        color=RF.pair,
        title=f"Duet no-overlap duration CI - {suite}",
    )
    fig.show()

# Runtime analysis

In [None]:
# TODO: this is not accurate, rather compute run duration of seqn and duet runs separately then merge the data frames

# df = (
#    df.groupby(by=RUN_ID_COL + [RF.pair])
#    .agg(pair_start=(RF.start, min), pair_end=(RF.end, max))
#    .reset_index()
# )
# df["pair_duration"] = (df["pair_end"] - df["pair_start"]).dt.seconds
# df["pair_duration"] = np.where(
#    df[RF.type].isin(["duet", "syncduet"]),
#    df["pair_duration"] / 2,
#    df["pair_duration"],
# )
# fig = px.box(
#    df,
#    y="pair_duration",
#    x=RF.benchmark,
#    color=RF.type,
#    title=f"Benchmark Run Duration - {suite}",
# )
# fig.show()

# df = (
#    df_iterations.groupby(by=["suite", "type"])
#    .agg(suite_duration=("pair_duration", sum))
#    .reset_index()
# )
# fig = px.bar(
#    df,
#    x="suite_duration",
#    y=RF.suite,
#    color=RF.type,
#    barmode="group",
#    orientation="h",
#    title="Suite Run Duration",
# )
# fig.show()
#
# df = df.groupby(by=[RF.type]).agg(type_duration=("suite_duration", sum)).reset_index()
# fig = px.bar(
#    df, x="type_duration", y=RF.type, orientation="h", title="Type Run Duration"
# )
# fig.show()

# Duet overlaps

In [95]:
# df_over_raw = pd.read_csv("../results.private-cloud.overlaps.csv")
df_over_raw = compute_overlaps(df_raw)


def filter_overlaps(df):
    return df[
        (
            (df[RF.suite].isin(["speccpu", "renaissance"]))
            # & (df[RF.benchmark].isin(["avrora"]))
            # & (df[RF.runid].isin([1]))
            # & (df[RF.type].isin(["duet"]))
            # & (df[RF.overlap_time_ns] >= NS_PER_S)
            # & (df["hostname"].isin(["cirrus-1.graal.d3s.hide.ms.mff.cuni.cz"]))
        )
    ]


df_over = df_over_raw  # filter_overlaps(df_over_raw)
df_over

Unnamed: 0,suite,benchmark,type,runid,iteration_A,iteration_start_ns_A,iteration_end_ns_A,date,hostname,lscpu,...,iteration_start_ns_B,iteration_end_ns_B,date_B,hostname_B,lscpu_B,meminfo_B,uname_B,overlap_start_ns,overlap_end_ns,overlap_time_ns
0,dacapo,avrora,duet,0,1,1.656259e+18,1.656259e+18,Sun Jun 26 04:57:57 PM CEST 2022,teaching.d3s.hide.ms.mff.cuni.cz,Intel(R) Xeon(R) CPU E5-2620 v4 @ 2.10GHz,...,1.656259e+18,1.656259e+18,Sun Jun 26 04:57:57 PM CEST 2022,teaching.d3s.hide.ms.mff.cuni.cz,Intel(R) Xeon(R) CPU E5-2620 v4 @ 2.10GHz,16375728 kB,Linux teaching.d3s.hide.ms.mff.cuni.cz 5.14.9-...,1.656259e+18,1.656259e+18,7.755249e+09
1,dacapo,avrora,duet,0,1,1.656259e+18,1.656259e+18,Sun Jun 26 04:57:57 PM CEST 2022,teaching.d3s.hide.ms.mff.cuni.cz,Intel(R) Xeon(R) CPU E5-2620 v4 @ 2.10GHz,...,1.656259e+18,1.656259e+18,Sun Jun 26 04:57:57 PM CEST 2022,teaching.d3s.hide.ms.mff.cuni.cz,Intel(R) Xeon(R) CPU E5-2620 v4 @ 2.10GHz,16375728 kB,Linux teaching.d3s.hide.ms.mff.cuni.cz 5.14.9-...,1.656259e+18,1.656259e+18,7.163379e+08
2,dacapo,avrora,duet,0,2,1.656259e+18,1.656259e+18,Sun Jun 26 04:57:57 PM CEST 2022,teaching.d3s.hide.ms.mff.cuni.cz,Intel(R) Xeon(R) CPU E5-2620 v4 @ 2.10GHz,...,1.656259e+18,1.656259e+18,Sun Jun 26 04:57:57 PM CEST 2022,teaching.d3s.hide.ms.mff.cuni.cz,Intel(R) Xeon(R) CPU E5-2620 v4 @ 2.10GHz,16375728 kB,Linux teaching.d3s.hide.ms.mff.cuni.cz 5.14.9-...,1.656259e+18,1.656259e+18,6.387418e+09
3,dacapo,avrora,duet,0,2,1.656259e+18,1.656259e+18,Sun Jun 26 04:57:57 PM CEST 2022,teaching.d3s.hide.ms.mff.cuni.cz,Intel(R) Xeon(R) CPU E5-2620 v4 @ 2.10GHz,...,1.656259e+18,1.656259e+18,Sun Jun 26 04:57:57 PM CEST 2022,teaching.d3s.hide.ms.mff.cuni.cz,Intel(R) Xeon(R) CPU E5-2620 v4 @ 2.10GHz,16375728 kB,Linux teaching.d3s.hide.ms.mff.cuni.cz 5.14.9-...,1.656259e+18,1.656259e+18,3.503588e+08
4,dacapo,avrora,duet,0,3,1.656259e+18,1.656259e+18,Sun Jun 26 04:57:57 PM CEST 2022,teaching.d3s.hide.ms.mff.cuni.cz,Intel(R) Xeon(R) CPU E5-2620 v4 @ 2.10GHz,...,1.656259e+18,1.656259e+18,Sun Jun 26 04:57:57 PM CEST 2022,teaching.d3s.hide.ms.mff.cuni.cz,Intel(R) Xeon(R) CPU E5-2620 v4 @ 2.10GHz,16375728 kB,Linux teaching.d3s.hide.ms.mff.cuni.cz 5.14.9-...,1.656259e+18,1.656259e+18,6.380385e+09
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4622,scalabench,xalan,duet,3,18,1.656297e+18,1.656297e+18,Sun Jun 26 04:57:57 PM CEST 2022,teaching.d3s.hide.ms.mff.cuni.cz,Intel(R) Xeon(R) CPU E5-2620 v4 @ 2.10GHz,...,1.656297e+18,1.656297e+18,Sun Jun 26 04:57:57 PM CEST 2022,teaching.d3s.hide.ms.mff.cuni.cz,Intel(R) Xeon(R) CPU E5-2620 v4 @ 2.10GHz,16375728 kB,Linux teaching.d3s.hide.ms.mff.cuni.cz 5.14.9-...,1.656297e+18,1.656297e+18,1.227697e+09
4623,scalabench,xalan,duet,3,18,1.656297e+18,1.656297e+18,Sun Jun 26 04:57:57 PM CEST 2022,teaching.d3s.hide.ms.mff.cuni.cz,Intel(R) Xeon(R) CPU E5-2620 v4 @ 2.10GHz,...,1.656297e+18,1.656297e+18,Sun Jun 26 04:57:57 PM CEST 2022,teaching.d3s.hide.ms.mff.cuni.cz,Intel(R) Xeon(R) CPU E5-2620 v4 @ 2.10GHz,16375728 kB,Linux teaching.d3s.hide.ms.mff.cuni.cz 5.14.9-...,1.656297e+18,1.656297e+18,1.657277e+08
4624,scalabench,xalan,duet,3,19,1.656297e+18,1.656297e+18,Sun Jun 26 04:57:57 PM CEST 2022,teaching.d3s.hide.ms.mff.cuni.cz,Intel(R) Xeon(R) CPU E5-2620 v4 @ 2.10GHz,...,1.656297e+18,1.656297e+18,Sun Jun 26 04:57:57 PM CEST 2022,teaching.d3s.hide.ms.mff.cuni.cz,Intel(R) Xeon(R) CPU E5-2620 v4 @ 2.10GHz,16375728 kB,Linux teaching.d3s.hide.ms.mff.cuni.cz 5.14.9-...,1.656297e+18,1.656297e+18,1.161444e+09
4625,scalabench,xalan,duet,3,19,1.656297e+18,1.656297e+18,Sun Jun 26 04:57:57 PM CEST 2022,teaching.d3s.hide.ms.mff.cuni.cz,Intel(R) Xeon(R) CPU E5-2620 v4 @ 2.10GHz,...,1.656297e+18,1.656297e+18,Sun Jun 26 04:57:57 PM CEST 2022,teaching.d3s.hide.ms.mff.cuni.cz,Intel(R) Xeon(R) CPU E5-2620 v4 @ 2.10GHz,16375728 kB,Linux teaching.d3s.hide.ms.mff.cuni.cz 5.14.9-...,1.656297e+18,1.656297e+18,3.090427e+08


In [96]:
for suite in df_over[RF.suite].unique():
    fig = px.violin(
        df_over[df_over[RF.suite] == suite],
        x=RF.overlap_time_ns,
        y=RF.benchmark,
        color=RF.type,
        points="all",
        orientation="h",
        title=f"Overlap time: {suite}",
    )
    fig.update_layout(height=3000)
    fig.update_xaxes(categoryorder="category ascending")
    fig.show()