In [9]:
import pandas as pd
import numpy as np

from bokeh.plotting import figure, output_notebook, show
from bokeh.models import ColumnDataSource, LinearColorMapper, LogColorMapper
from bokeh.transform import factor_cmap
from bokeh.palettes import Colorblind, Magma256

output_notebook()
palette = Colorblind[8]
palette_gradient = Magma256

In [10]:
df_iterations = pd.read_csv("../results.renaissance.test.csv")

In [11]:
df_iterations["iteration_start_ns"] = df_iterations.groupby(["benchmark", "pair"])[
    "iteration_time_ns"
].transform(pd.Series.cumsum)
df_iterations["iteration_start_ns"] = df_iterations.groupby(["benchmark", "pair"])[
    "iteration_start_ns"
].shift(1, fill_value=0)
df_iterations["iteration_start_ns"] = (
    df_iterations["iteration_start_ns"] + df_iterations["epoch_start_ms"] * 1000
)

df_iterations["iteration_end_ns"] = (
    df_iterations["iteration_start_ns"] + df_iterations["iteration_time_ns"]
)

df_iterations.describe()

Unnamed: 0,runid,iteration,epoch_start_ms,iteration_time_ns,machine,provider,iteration_start_ns,iteration_end_ns
count,40.0,40.0,40.0,40.0,0.0,0.0,40.0,40.0
mean,0.5,4.5,1647775000000.0,2609017000.0,,,1647804000000000.0,1647807000000000.0
std,0.50637,2.908872,23508.72,1934521000.0,,,14755960000.0,13977090000.0
min,0.0,0.0,1647775000000.0,1535864000.0,,,1647775000000000.0,1647784000000000.0
25%,0.0,2.0,1647775000000.0,1753671000.0,,,1647793000000000.0,1647795000000000.0
50%,0.5,4.5,1647775000000.0,1957464000.0,,,1647802000000000.0,1647806000000000.0
75%,1.0,7.0,1647775000000.0,2348786000.0,,,1647817000000000.0,1647819000000000.0
max,1.0,9.0,1647775000000.0,9081739000.0,,,1647826000000000.0,1647828000000000.0


In [12]:
source = ColumnDataSource(df_iterations[df_iterations.iteration < 10])
pair_cmap = factor_cmap(
    "pair", palette=palette, factors=sorted(df_iterations["pair"].unique())
)
p = figure()
p.circle(x="iteration_start_ns", y="iteration", color=pair_cmap, size=9, source=source)
p.cross(x="iteration_end_ns", y="iteration", color=pair_cmap, size=9, source=source)

show(p)

In [13]:
columns_of_interest = [
    "benchmark",
    "pair",
    "runid",
    "iteration",
    "iteration_time_ns",
    "iteration_start_ns",
    "iteration_end_ns",
]
df = df_iterations[columns_of_interest]

# Compute overlap interval
df_overlap = df[df.pair == "A"].merge(
    df[df.pair == "B"], on=["benchmark", "runid"], suffixes=["_A", "_B"], how="inner"
)
df_overlap["overlap_start_ns"] = df_overlap[
    ["iteration_start_ns_A", "iteration_start_ns_B"]
].max(axis=1)
df_overlap["overlap_end_ns"] = df_overlap[
    ["iteration_end_ns_A", "iteration_end_ns_B"]
].min(axis=1)

df_overlap.drop(["pair_A", "pair_B"], axis=1, inplace=True)

# Filter out non overlapping intervals
df_overlap["overlap"] = df_overlap["overlap_start_ns"].lt(df_overlap["overlap_end_ns"])
df_overlap = df_overlap[df_overlap["overlap"] == True]
df_overlap.reset_index(drop=True, inplace=True)

# Compute overlap properties
df_overlap["overlap_size"] = (
    df_overlap["overlap_end_ns"] - df_overlap["overlap_start_ns"]
)
df_overlap["overlap_proportion_A"] = (
    df_overlap["overlap_size"] / df_overlap["iteration_time_ns_A"]
)
df_overlap["overlap_proportion_B"] = (
    df_overlap["overlap_size"] / df_overlap["iteration_time_ns_B"]
)
df_overlap["iteration_difference_time_ns"] = (
    df_overlap["iteration_time_ns_A"] - df_overlap["iteration_time_ns_B"]
)
df_overlap["iteration_difference_time_ns_size_weighted"] = (
    df_overlap["iteration_time_ns_A"] - df_overlap["iteration_time_ns_B"]
) * df_overlap["overlap_size"]
df_overlap["iteration_difference_time_ns_proportion_weighted"] = (
    df_overlap["iteration_time_ns_A"] - df_overlap["iteration_time_ns_B"]
) * df_overlap["overlap_proportion_A"]
df_overlap["iteration_difference_time_ns_scaled"] = (
    df_overlap["iteration_time_ns_A"] - df_overlap["iteration_time_ns_B"]
) / df_overlap["iteration_time_ns_A"]
df_overlap["iteration_difference_time_ns_scaled_size_weighted"] = (
    (df_overlap["iteration_time_ns_A"] - df_overlap["iteration_time_ns_B"])
    / df_overlap["iteration_time_ns_A"]
) * df_overlap["overlap_size"]
df_overlap["iteration_difference_time_ns_scaled_proportion_weighted"] = (
    (df_overlap["iteration_time_ns_A"] - df_overlap["iteration_time_ns_B"])
    / df_overlap["iteration_time_ns_A"]
) * df_overlap["overlap_proportion_A"]

df_overlap

Unnamed: 0,benchmark,runid,iteration_A,iteration_time_ns_A,iteration_start_ns_A,iteration_end_ns_A,iteration_B,iteration_time_ns_B,iteration_start_ns_B,iteration_end_ns_B,...,overlap,overlap_size,overlap_proportion_A,overlap_proportion_B,iteration_difference_time_ns,iteration_difference_time_ns_size_weighted,iteration_difference_time_ns_proportion_weighted,iteration_difference_time_ns_scaled,iteration_difference_time_ns_scaled_size_weighted,iteration_difference_time_ns_scaled_proportion_weighted
0,chi-square,0,0,9052543216,1647774987700000,1647784040243216,0,9081738837,1647774987700000,1647784069438837,...,True,9052543216,1.0,0.996785,-29195621,-264294620820457136,-29195620.0,-0.003225,-29195620.0,-0.003225
1,chi-square,0,1,1960601313,1647784040243216,1647786000844529,0,9081738837,1647774987700000,1647784069438837,...,True,29195621,0.014891,0.003215,-7121137524,-207906032239582404,-106042000.0,-3.632119,-106042000.0,-0.054086
2,chi-square,0,1,1960601313,1647784040243216,1647786000844529,1,2394048981,1647784069438837,1647786463487818,...,True,1931405692,0.985109,0.806753,-433447668,-837163293159326256,-426993100.0,-0.221079,-426993100.0,-0.217787
3,chi-square,0,2,2594764894,1647786000844529,1647788595609423,1,2394048981,1647784069438837,1647786463487818,...,True,462643289,0.178299,0.193247,200715913,92859870144957857,35787390.0,0.077354,35787390.0,0.013792
4,chi-square,0,2,2594764894,1647786000844529,1647788595609423,2,2662040185,1647786463487818,1647789125528003,...,True,2132121605,0.821701,0.800935,-67275291,-143439101423762055,-55280190.0,-0.025927,-55280190.0,-0.021305
5,chi-square,0,3,2469688466,1647788595609423,1647791065297889,2,2662040185,1647786463487818,1647789125528003,...,True,529918580,0.214569,0.199065,-192351719,-101930749793039020,-41272720.0,-0.077885,-41272720.0,-0.016712
6,chi-square,0,3,2469688466,1647788595609423,1647791065297889,3,2949463436,1647789125528003,1647792074991439,...,True,1939769886,0.785431,0.657669,-479774970,-930653038862553420,-376830100.0,-0.194265,-376830100.0,-0.152582
7,chi-square,0,4,1941899087,1647791065297889,1647793007196976,3,2949463436,1647789125528003,1647792074991439,...,True,1009693550,0.519952,0.342331,-1007564349,-1017331224395248950,-523884700.0,-0.518855,-523884700.0,-0.26978
8,chi-square,0,4,1941899087,1647791065297889,1647793007196976,4,1906221669,1647792074991439,1647793981213108,...,True,932205537,0.480048,0.489033,35677418,33258686605463466,17126890.0,0.018372,17126890.0,0.00882
9,chi-square,0,5,2020537827,1647793007196976,1647795027734803,4,1906221669,1647792074991439,1647793981213108,...,True,974016132,0.482058,0.510967,114316158,111345782040260856,55107000.0,0.056577,55107000.0,0.027273


In [14]:
df_overlap[
    [
        "iteration_time_ns_A",
        "iteration_time_ns_B",
        "overlap_size",
        "iteration_difference_time_ns",
        "iteration_difference_time_ns_size_weighted",
        "iteration_difference_time_ns_proportion_weighted",
        "iteration_difference_time_ns_scaled_size_weighted",
        "iteration_difference_time_ns_scaled_proportion_weighted",
    ]
].describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
iteration_time_ns_A,38.0,2280175000.0,1456050000.0,1535864000.0,1754025000.0,1941899000.0,2099392000.0,9052543000.0
iteration_time_ns_B,38.0,2700905000.0,1962720000.0,1602075000.0,1794969000.0,2024387000.0,2500059000.0,9081739000.0
overlap_size,38.0,1320279000.0,1630331000.0,22608850.0,512999000.0,1031699000.0,1526829000.0,9052543000.0
iteration_difference_time_ns,38.0,-420730300.0,1387741000.0,-7121138000.0,-276942700.0,-41982690.0,34326710.0,305707800.0
iteration_difference_time_ns_size_weighted,38.0,-3.034683e+17,9.801497e+17,-5.822087e+18,-2.760628e+17,-4.717705e+16,1.074245e+16,5.093165e+17
iteration_difference_time_ns_proportion_weighted,38.0,-145378800.0,424298000.0,-2494790000.0,-144715900.0,-21215710.0,5529463.0,151508600.0
iteration_difference_time_ns_scaled_size_weighted,38.0,-145378800.0,424298000.0,-2494790000.0,-144715900.0,-21215710.0,5529463.0,151508600.0
iteration_difference_time_ns_scaled_proportion_weighted,38.0,-0.07026682,0.1867112,-1.069029,-0.07412223,-0.005651569,0.002846487,0.0711372


In [15]:
source = ColumnDataSource(df_overlap)
p = figure()
p.hbar(
    y="index",
    left="overlap_start_ns",
    right="overlap_end_ns",
    source=source,
    color={
        "field": "overlap_size",
        "transform": LogColorMapper(palette=palette_gradient),
    },
)

show(p)

In [16]:
source = ColumnDataSource(df_overlap)
p = figure()
p.vbar(x="overlap_start_ns", top="overlap_size", source=source)

show(p)