# FinCompress — Notebook 05: Analysis and Final Plots

**RUN ON: Local/CPU**

This notebook generates all final publication-quality visualizations from benchmark results.
Run this AFTER completing benchmarking (`python -m fincompress.evaluation.benchmark`).

All plots are saved to `fincompress/results/plots/`.

In [None]:
import os, sys, json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import seaborn as sns
from pathlib import Path

# Setup
os.chdir(str(Path.home() / 'Desktop/UMD - MSML/Sem 4/FinCompress'))  # adjust as needed
sys.path.insert(0, '.')

RESULTS_DIR = Path('fincompress/results')
PLOTS_DIR = RESULTS_DIR / 'plots'
PLOTS_DIR.mkdir(parents=True, exist_ok=True)

seaborn_style = 'whitegrid'
sns.set_style(seaborn_style)
FIGSIZE = (10, 6)
DPI = 150

# Load benchmark results
bench_csv = RESULTS_DIR / 'benchmark_results.csv'
df = pd.read_csv(bench_csv)
print(f'Loaded {len(df)} models from benchmark results')
print(df[['model_name', 'size_mb', 'macro_f1', 'cpu_latency_ms_median']].to_string(index=False))

In [None]:
# --- Plot 1: Pareto Plot ---
# The centerpiece: accuracy vs. latency with bubble size = model size

color_map = {
    'teacher':               '#888888',
    'student_vanilla_kd':    '#4472C4',
    'student_intermediate_kd': '#2196F3',
    'student_ptq':           '#4CAF50',
    'student_qat':           '#66BB6A',
    'pruned_teacher_30pct':  '#FF9800',
    'pruned_teacher_50pct':  '#FF5722',
}

fig, ax = plt.subplots(figsize=FIGSIZE)

for _, row in df.iterrows():
    name = row['model_name']
    color = color_map.get(name, '#333333')
    ax.scatter(
        row['cpu_latency_ms_median'],
        row['macro_f1'],
        s=row['size_mb'] * 20,
        color=color,
        alpha=0.8,
        edgecolors='white',
        linewidth=1.5,
        zorder=3,
    )
    ax.annotate(
        name.replace('_', '\n'),
        xy=(row['cpu_latency_ms_median'], row['macro_f1']),
        xytext=(8, 4),
        textcoords='offset points',
        fontsize=8,
    )

ax.set_xlabel('CPU Latency — Median (ms)', fontsize=12)
ax.set_ylabel('Test Macro F1', fontsize=12)
ax.set_title('Accuracy vs. Latency Trade-off\n(bubble size = model size in MB)', fontsize=13)
ax.text(0.98, 0.02, 'Points toward top-left are better', transform=ax.transAxes,
        ha='right', va='bottom', style='italic', fontsize=9, color='grey')

# Legend
legend_patches = [
    mpatches.Patch(color='#888888', label='Teacher'),
    mpatches.Patch(color='#4472C4', label='Knowledge Distillation'),
    mpatches.Patch(color='#4CAF50', label='INT8 Quantization'),
    mpatches.Patch(color='#FF9800', label='Structured Pruning'),
]
ax.legend(handles=legend_patches, loc='lower right', fontsize=9)
plt.tight_layout()
out = PLOTS_DIR / 'pareto_plot.png'
plt.savefig(out, dpi=DPI, bbox_inches='tight')
plt.show()
print(f'Saved: {out}')

In [None]:
# --- Plot 2: Compression Bar Chart ---
# Normalized relative to teacher

teacher_row = df[df['model_name'] == 'teacher'].iloc[0]

fig, ax = plt.subplots(figsize=FIGSIZE)

models = df['model_name'].tolist()
x = np.arange(len(models))
width = 0.25

size_ratios    = (df['size_mb'] / teacher_row['size_mb']).tolist()
latency_ratios = (df['cpu_latency_ms_median'] / teacher_row['cpu_latency_ms_median']).tolist()
f1_ratios      = (df['macro_f1'] / teacher_row['macro_f1']).tolist()

bars1 = ax.bar(x - width,    size_ratios,    width, label='Size',    color='steelblue',     alpha=0.85)
bars2 = ax.bar(x,            latency_ratios, width, label='Latency', color='coral',         alpha=0.85)
bars3 = ax.bar(x + width,    f1_ratios,      width, label='F1',      color='mediumseagreen', alpha=0.85)

ax.axhline(y=1.0, color='black', linestyle='--', linewidth=1.5, label='Teacher baseline')
ax.set_xticks(x)
ax.set_xticklabels([m.replace('_', '\n') for m in models], fontsize=8)
ax.set_ylabel('Ratio relative to teacher (lower = more compressed)')
ax.set_title('Compression Ratios Relative to Teacher (lower = more compressed)')
ax.legend()
plt.tight_layout()
out = PLOTS_DIR / 'compression_bar_chart.png'
plt.savefig(out, dpi=DPI, bbox_inches='tight')
plt.show()
print(f'Saved: {out}')

In [None]:
# --- Plot 3: Latency Distribution (box plots) ---

latency_raw_path = RESULTS_DIR / 'latency_raw.json'
with open(latency_raw_path) as f:
    raw = json.load(f)

fig, ax = plt.subplots(figsize=FIGSIZE)
model_names = list(raw.keys())
data = [raw[m] for m in model_names]
ax.boxplot(data, labels=[m.replace('_', '\n') for m in model_names],
           medianprops=dict(color='red', linewidth=2),
           flierprops=dict(marker='.', alpha=0.3),
           patch_artist=True,
           boxprops=dict(facecolor='#AED6F1', alpha=0.7))
ax.set_ylabel('Latency (ms)')
ax.set_xlabel('Model')
ax.set_title('Per-Sample CPU Latency Distribution (ms)\nmedian preferred over mean — see right-skewed outliers')
ax.grid(True, axis='y', alpha=0.3)
plt.tight_layout()
out = PLOTS_DIR / 'latency_distribution.png'
plt.savefig(out, dpi=DPI, bbox_inches='tight')
plt.show()
print(f'Saved: {out}')

In [None]:
# --- Plot 4: F1 Degradation Bar Chart ---

teacher_f1 = df[df['model_name'] == 'teacher']['macro_f1'].iloc[0]
df_others = df[df['model_name'] != 'teacher'].copy()
df_others['f1_drop'] = df_others['macro_f1'] - teacher_f1
df_others = df_others.sort_values('f1_drop', ascending=False)

def bar_color(drop):
    if drop > -0.02: return '#28a745'
    elif drop > -0.05: return '#e6a817'
    else: return '#dc3545'

colors = [bar_color(d) for d in df_others['f1_drop']]

fig, ax = plt.subplots(figsize=FIGSIZE)
bars = ax.barh(df_others['model_name'], df_others['f1_drop'], color=colors)
ax.axvline(x=0, color='black', linestyle='--', linewidth=1.5)
ax.set_xlabel('Macro F1 change relative to teacher')
ax.set_title('F1 Degradation Relative to Teacher\n(green < 2% drop, red > 5% drop)')
for bar, val in zip(bars, df_others['f1_drop']):
    ax.text(val - 0.001 if val < 0 else val + 0.001, bar.get_y() + bar.get_height()/2,
            f'{val:+.4f}', va='center', ha='right' if val < 0 else 'left', fontsize=9)
plt.tight_layout()
out = PLOTS_DIR / 'f1_degradation_bar.png'
plt.savefig(out, dpi=DPI, bbox_inches='tight')
plt.show()
print(f'Saved: {out}')

In [None]:
# --- Plot 5: Pruning Curve ---

curve_path = RESULTS_DIR / 'pruning_curve.csv'
curve_df = pd.read_csv(curve_path)

fig, ax = plt.subplots(figsize=FIGSIZE)
ax.plot(curve_df['heads_pruned_pct'], curve_df['val_f1'], 'o-', color='steelblue', linewidth=2)

# Find cliff
peak_f1 = curve_df['val_f1'].max()
cliff_rows = curve_df[curve_df['val_f1'] < peak_f1 - 0.02]
if not cliff_rows.empty:
    cliff_x = cliff_rows.iloc[0]['heads_pruned_pct']
    ax.axvline(x=cliff_x, color='red', linestyle='--', linewidth=2, label=f'Accuracy cliff ({cliff_x:.0f}%)')
    ax.axvspan(cliff_x, curve_df['heads_pruned_pct'].max() + 2, alpha=0.15, color='red')
    ax.annotate('Accuracy cliff', xy=(cliff_x, peak_f1 - 0.01), xytext=(cliff_x + 2, peak_f1),
                arrowprops=dict(arrowstyle='->', color='red'), color='red', fontsize=10)

ax.set_xlabel('Cumulative Heads Pruned (%)', fontsize=12)
ax.set_ylabel('Val Macro F1', fontsize=12)
ax.set_title('Validation F1 vs. Attention Head Sparsity\n(red region = accuracy cliff)', fontsize=13)
ax.legend()
ax.grid(True, alpha=0.3)
plt.tight_layout()
out = PLOTS_DIR / 'pruning_curve.png'
plt.savefig(out, dpi=DPI, bbox_inches='tight')
plt.show()
print(f'Saved: {out}')

In [None]:
# --- Plot 6: KD Loss Curves (side-by-side subplots) ---

vanilla_path = Path('fincompress/logs/vanilla_kd_training.csv')
inter_path   = Path('fincompress/logs/intermediate_kd_training.csv')

fig, axes = plt.subplots(1, 2, figsize=(16, 6))
fig.suptitle('Knowledge Distillation Training Dynamics', fontsize=14)

# Left: Vanilla KD
if vanilla_path.exists():
    v_df = pd.read_csv(vanilla_path)
    axes[0].plot(v_df['epoch'], v_df['train_total_loss'], 'o-', color='steelblue', label='Total Loss')
    axes[0].plot(v_df['epoch'], v_df['train_ce_loss'],    's-', color='coral',     label='CE Loss')
    axes[0].plot(v_df['epoch'], v_df['train_kl_loss'],    '^-', color='green',     label='KL Loss')
axes[0].set_xlabel('Epoch'); axes[0].set_ylabel('Loss')
axes[0].set_title('Vanilla KD Loss Components')
axes[0].legend(); axes[0].grid(True, alpha=0.3)

# Right: Intermediate KD
if inter_path.exists():
    i_df = pd.read_csv(inter_path)
    axes[1].plot(i_df['epoch'], i_df['train_total_loss'],   'o-', color='steelblue',    label='Total Loss')
    axes[1].plot(i_df['epoch'], i_df['train_ce_loss'],      's-', color='coral',        label='CE Loss')
    axes[1].plot(i_df['epoch'], i_df['train_kl_loss'],      '^-', color='green',        label='KL Loss')
    axes[1].plot(i_df['epoch'], i_df['train_hidden_loss'],  'D-', color='darkorange',   label='Hidden MSE')
    axes[1].plot(i_df['epoch'], i_df['train_attn_loss'],    'v-', color='purple',       label='Attn MSE')
axes[1].set_xlabel('Epoch'); axes[1].set_ylabel('Loss')
axes[1].set_title('Intermediate KD Loss Components')
axes[1].legend(); axes[1].grid(True, alpha=0.3)

plt.tight_layout()
out = PLOTS_DIR / 'kd_loss_curves.png'
plt.savefig(out, dpi=DPI, bbox_inches='tight')
plt.show()
print(f'Saved: {out}')
print(f'\nAll 6 plots saved to: {PLOTS_DIR}')