In [98]:
import pandas as pd
import plotly.graph_objects as go

In [99]:
def load_and_process_data(path="runs.csv"):
    df = pd.read_csv(path, parse_dates=["timestamp"])
    df["speedup"] = df["sequential_time"] / df["parallel_time"]
    df["memory_saving"] = df["sequential_memory"] / df["parallel_memory"]
    return df

def create_median_plot(df, model_name, variable_name="speedup"):
    # Filter data for the specified model
    df = df[df["ml_model_name"] == model_name]
    # mean speedup for each num_workers and num_rows
    df_median_variable = df.groupby(["ml_model_name","num_workers","num_rows"]).median()[variable_name].reset_index()
    # index: x-axis: num_workers, columns: y-axis: num_rows, z-axis: speedup
    pivot = df_median_variable.pivot(
        index="num_workers",
        columns="num_rows",
        values=variable_name
    )

    fig = go.Figure(
        data=[
            go.Surface(
                x=pivot.columns.values,        # num_rows,
                y=pivot.index.values,          # num_workers,
                z=pivot.values,                # speedup,
                colorscale="Viridis",
                hovertemplate=
                    "<b>Median Speedup:</b> %{z:.3f}<br>" +
                    "<b>Num Rows:</b> %{x}<br>" +
                    "<b>Num Workers:</b> %{y}<br>" +
                    "<extra></extra>"   # hides trace name box
            )
        ]
    )

    fig.update_layout(
        title="{} plot for {}".format(variable_name.capitalize(), model_name),
        autosize=True,
        scene=dict(
            xaxis_title="Number of Rows",
            yaxis_title="Number of Workers",
            zaxis_title="Median {}".format(variable_name.capitalize()),
            
        ),
        margin=dict(l=65, r=50, b=65, t=90)
    )

    fig.show()

In [100]:
df = load_and_process_data(path="runs.csv")
model_options = df["ml_model_name"].unique().tolist()
print("Available models:", model_options)

Available models: ['LogisticRegression', 'LightGBM']


# Logistic Regression

## Time Analysis

In [101]:
create_median_plot(df, "LogisticRegression", variable_name="speedup")

## Memory Analysis

In [102]:
create_median_plot(df, "LogisticRegression", variable_name="sequential_memory")
create_median_plot(df, "LogisticRegression", variable_name="parallel_memory")


# LightGBM

## Time Analysis

In [103]:
create_median_plot(df, "LightGBM", variable_name="speedup")

## Memory Analysis

In [104]:
create_median_plot(df, "LightGBM", variable_name="sequential_memory")
create_median_plot(df, "LightGBM", variable_name="parallel_memory")