In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from utils import *

In [None]:
file_path = "../data/raw/run_0/baseline/latencies.csv"
config = {
    "remove_outliers": True,
    "outlier_column": "latency",
    "latency_unit": "us"
}

df = process_data(file_path, config)

In [None]:
# Set the theme
sns.set_theme(style="darkgrid")

# Compute the mean and confidence interval for each work_size within each configuration
mean_df = df.groupby(['configuration', 'work_size']).agg({'latency':'mean'}).reset_index()
std_df = df.groupby(['configuration', 'work_size']).agg({'latency':'std'}).reset_index()

# Merge the mean and std dataframes
plot_df = pd.merge(mean_df, std_df, on=['configuration', 'work_size'], suffixes=('_mean', '_std'))

# Create the lineplot
plt.figure(figsize=(10, 6))
lineplot = sns.lineplot(data=plot_df, x='work_size', y='latency_mean', hue='configuration', style='configuration', dashes=False, err_style="bars", ci='sd')

# Add shaded area around each line for variance
for configuration in plot_df['configuration'].unique():
    subset = plot_df[plot_df['configuration'] == configuration]
    plt.fill_between(subset['work_size'], subset['latency_mean'] - subset['latency_std'], subset['latency_mean'] + subset['latency_std'], alpha=0.2)

plt.title('Latency vs Work Size by Configuration')
plt.xlabel('Work Size')
lt.ylabel(f'Latency ({config["latency_unit"]})')
plt.legend(title='Configuration')
plt.tight_layout()
plt.xlim(0,)
plt.ylim(0,)

plt.show()