In [None]:
# %% [markdown]
# # Linux Vector Operation Performance Benchmark Analysis
# Comparing Kali Linux, Ubuntu Desktop, and Ubuntu Server

# %%
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Set style for better looking plots
sns.set(style="whitegrid", palette="muted")
plt.rcParams['figure.figsize'] = (12, 6)

# %%
# Prepare the data
data = {
    "System": ["Kali Linux"]*9 + ["Ubuntu Server"]*9 + ["Ubuntu Desktop"]*9,
    "OS_Version": [
        "6.11.0-26-generic"]*9 + ["6.12.25-amd64"]*9 + ["5.15.0-142-generic"]*9,
    "Run": [1,1,1,2,2,2,3,3,3]*3,
    "Vector_Size": [1e6,5e6,2e7]*9,
    "Time_s": [
        # Kali Linux data
        0.004082, 0.094043, 0.375515,
        0.003267, 0.016281, 0.084113,
        0.004828, 0.019527, 0.073148,
        # Ubuntu Server data
        0.015288, 0.092678, 0.558598,
        0.005406, 0.025753, 0.086896,
        0.004480, 0.021111, 0.113999,
        # Ubuntu Desktop data
        0.003565, 0.236839, 0.882077,
        0.003425, 0.013455, 0.051185,
        0.003044, 0.012636, 0.049898
    ]
}

df = pd.DataFrame(data)

# Convert vector size to millions for better readability
df['Vector_Size_M'] = df['Vector_Size'] / 1e6

# %%
# Plot 1: Performance comparison across systems (average of runs)
plt.figure(figsize=(14, 7))
avg_times = df.groupby(['System', 'Vector_Size_M'])['Time_s'].mean().reset_index()

sns.lineplot(data=avg_times, x='Vector_Size_M', y='Time_s', hue='System', 
             style='System', markers=['o', 's', 'D'], linewidth=2.5, markersize=10)

plt.title('Vector Operation Performance Comparison (Average Across Runs)', fontsize=16)
plt.xlabel('Vector Size (millions)', fontsize=12)
plt.ylabel('Execution Time (seconds)', fontsize=12)
plt.xticks([1, 5, 20])
plt.xscale('log')
plt.yscale('log')
plt.grid(True, which="both", ls="--")
plt.legend(title='Linux Distribution', fontsize=11)
plt.tight_layout()
plt.show()

# %%
# Plot 2: Performance by run number (facets)
g = sns.FacetGrid(df, col='Run', hue='System', height=5, aspect=1.2, 
                  palette=['#0072B2', '#009E73', '#D55E00'])
g.map(sns.lineplot, 'Vector_Size_M', 'Time_s', marker='o', linewidth=2.5)
g.add_legend(title='Linux Distribution')
g.set(xscale='log', yscale='log')
g.set_titles("Run {col_name}")
g.fig.suptitle('Performance Across Different Runs', y=1.05, fontsize=16)
g.set_axis_labels("Vector Size (millions)", "Time (seconds)")
g.set(xticks=[1, 5, 20])
plt.show()

# %%
# Plot 3: Boxplot showing performance distribution
plt.figure(figsize=(12, 7))
sns.boxplot(data=df, x='Vector_Size_M', y='Time_s', hue='System',
            palette=['#0072B2', '#009E73', '#D55E00'])
plt.title('Performance Distribution by Vector Size and System', fontsize=16)
plt.xlabel('Vector Size (millions)', fontsize=12)
plt.ylabel('Execution Time (seconds)', fontsize=12)
plt.yscale('log')
plt.legend(title='Linux Distribution', fontsize=11)
plt.tight_layout()
plt.show()

# %%
# Plot 4: Speedup comparison (relative to Kali Linux)
kali_avg = avg_times[avg_times['System'] == 'Kali Linux'].set_index('Vector_Size_M')['Time_s']
speedup_df = avg_times.copy()
speedup_df['Speedup'] = speedup_df.apply(lambda row: kali_avg[row['Vector_Size_M']]/row['Time_s'], axis=1)

plt.figure(figsize=(12, 6))
sns.barplot(data=speedup_df, x='Vector_Size_M', y='Speedup', hue='System',
            palette=['#0072B2', '#009E73', '#D55E00'])
plt.title('Speedup Relative to Kali Linux', fontsize=16)
plt.xlabel('Vector Size (millions)', fontsize=12)
plt.ylabel('Speedup Factor (Higher is better)', fontsize=12)
plt.axhline(1, color='gray', linestyle='--')
plt.legend(title='Linux Distribution', fontsize=11)
plt.tight_layout()
plt.show()

# %%
# Table: Summary statistics
summary = df.groupby(['System', 'Vector_Size_M'])['Time_s'].agg(['mean', 'std', 'min', 'max'])
summary.style.background_gradient(cmap='YlOrRd').format("{:.5f}")


ModuleNotFoundError: No module named 'pandas'