In [13]:
import pandas as pd
import plotly.graph_objects as go

In [27]:
def load_and_process_data(path="runs.csv"):
    df = pd.read_csv(path, parse_dates=["timestamp"])
    df["speedup"] = df["sequential_time"] / df["parallel_time"]
    return df

def create_speedup_plot(df, model_name):
    # Filter data for the specified model
    df = df[df["ml_model_name"] == model_name]
    # mean speedup for each num_workers and num_rows
    df_median_speedup = df.groupby(["ml_model_name","num_workers","num_rows"]).median()["speedup"].reset_index()
    # index: x-axis: num_workers, columns: y-axis: num_rows, z-axis: speedup
    pivot = df_median_speedup.pivot(
        index="num_workers",
        columns="num_rows",
        values="speedup"
    )

    fig = go.Figure(
        data=[
            go.Surface(
                x=pivot.columns.values,        # num_rows,
                y=pivot.index.values,          # num_workers,
                z=pivot.values,                # speedup,
                colorscale="Viridis"
            )
        ]
    )

    fig.update_layout(
        title="Speedup plot for {}".format(model_name),
        autosize=True,
        scene=dict(
            xaxis_title="Number of Rows",
            yaxis_title="Number of Workers",
            zaxis_title="Median Speedup",
        ),
        margin=dict(l=65, r=50, b=65, t=90)
    )

    fig.show()


In [30]:
df = load_and_process_data(path="runs.csv")
model_options = df["ml_model_name"].unique().tolist()
print("Available models:", model_options)
create_speedup_plot(df, "LightGBM")

Available models: ['LogisticRegression', 'LightGBM']
