In [None]:

# %% [markdown]
# # Linux Distribution Performance Benchmark
# ## Comparative Analysis: Kali Linux vs Ubuntu Desktop vs Ubuntu Server

# %%
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Configure visualization
plt.style.use('ggplot')
sns.set_palette("husl")
%matplotlib inline

# %%
# Prepare the dataset
data = {
    "Distribution": ["Ubuntu Server"]*9 + ["Kali Linux"]*9 + ["Ubuntu Desktop"]*9,
    "Version": [
        "6.12.25"]*9 + ["6.11.0-26"]*9 + ["5.15.0-142"]*9,
    "Run": [1,1,1,2,2,2,3,3,3]*3,
    "Vector_Size": [1e6,5e6,2e7]*9,
    "Time_s": [
        # Ubuntu Server
        0.014202, 0.123924, 0.507625,
        0.009982, 0.026007, 0.295844,
        0.011415, 0.038723, 0.096485,
        # Kali Linux
        0.016461, 0.088866, 0.431518,
        0.004097, 0.018172, 0.137146,
        0.006525, 0.021580, 0.134737,
        # Ubuntu Desktop
        0.046846, 0.213789, 0.987970,
        0.006832, 0.012070, 0.050252,
        0.003078, 0.012690, 0.051866
    ]
}

df = pd.DataFrame(data)
df['Vector_Size_M'] = df['Vector_Size'] / 1e6  # Convert to millions

# %% [markdown]
# ## 1. Performance Overview

# %%
plt.figure(figsize=(12, 6))
sns.lineplot(data=df, x='Vector_Size_M', y='Time_s', hue='Distribution',
             style='Distribution', markers=True, dashes=False)
plt.title('Execution Time by Vector Size', fontsize=14)
plt.xlabel('Vector Size (millions)')
plt.ylabel('Time (seconds)')
plt.xscale('log')
plt.yscale('log')
plt.grid(True, which="both", ls="--")
plt.legend(title='Linux Distribution')
plt.show()

# %% [markdown]
# ## 2. Performance Consistency Across Runs

# %%
plt.figure(figsize=(14, 6))
sns.boxplot(data=df, x='Vector_Size_M', y='Time_s', hue='Distribution')
plt.title('Performance Distribution Across Runs', fontsize=14)
plt.xlabel('Vector Size (millions)')
plt.ylabel('Time (seconds)')
plt.yscale('log')
plt.legend(title='Linux Distribution', bbox_to_anchor=(1.05, 1))
plt.tight_layout()
plt.show()

# %% [markdown]
# ## 3. Speedup Relative to Ubuntu Desktop

# %%
# Calculate speedup factors
desktop_avg = df[df['Distribution'] == 'Ubuntu Desktop'].groupby('Vector_Size_M')['Time_s'].mean()
speedup_df = df.copy()
speedup_df['Speedup'] = speedup_df.apply(
    lambda row: desktop_avg[row['Vector_Size_M']] / row['Time_s'], axis=1)

# Plot speedup
plt.figure(figsize=(10, 5))
sns.barplot(data=speedup_df, x='Vector_Size_M', y='Speedup', hue='Distribution')
plt.title('Speedup Relative to Ubuntu Desktop', fontsize=14)
plt.xlabel('Vector Size (millions)')
plt.ylabel('Speedup Factor (Higher is better)')
plt.axhline(1, color='gray', linestyle='--')
plt.legend(title='Linux Distribution')
plt.show()

# %% [markdown]
# ## 4. Statistical Summary

# %%
# Calculate summary statistics
summary = df.groupby(['Distribution', 'Vector_Size_M'])['Time_s'].agg(
    ['mean', 'std', 'min', 'max', 'median']).round(6)
summary.style.background_gradient(cmap='YlOrRd', subset=['mean', 'median'])



ModuleNotFoundError: No module named 'pandas'