# Model Comparison Summary

This notebook compares all trained models and generates summary tables and figures for the manuscript.

**Models Compared**:
- **Multiclass Classification** (18 fault classes):
  - XGBoost
  - LSTM
  - LSTM-FCN
  - CNN-Transformer
  - TransKal (Transformer + Kalman Filter)
  
- **Binary Anomaly Detection** (normal vs fault):
  - LSTM Autoencoder
  - Convolutional Autoencoder

**Outputs**:
- Summary tables: `outputs/metrics/model_comparison_*.csv`
- Comparison figures: `outputs/figures/model_comparison_*.png`

## Configuration

In [None]:
import os
import json
from pathlib import Path

print("="*60)
print("Model Comparison Summary")
print("="*60)

# Paths
OUTPUT_DIR = Path('../outputs')
METRICS_DIR = OUTPUT_DIR / 'metrics'
FIGURES_DIR = OUTPUT_DIR / 'figures'

# Model definitions
MULTICLASS_MODELS = ['xgboost', 'lstm', 'lstm_fcn', 'cnn_transformer', 'transkal']
BINARY_MODELS = ['lstm_autoencoder', 'conv_autoencoder']

MODEL_DISPLAY_NAMES = {
    'xgboost': 'XGBoost',
    'lstm': 'LSTM',
    'lstm_fcn': 'LSTM-FCN',
    'cnn_transformer': 'CNN-Transformer',
    'transkal': 'TransKal',
    'lstm_autoencoder': 'LSTM-AE',
    'conv_autoencoder': 'Conv-AE'
}

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# Set style
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette('husl')

print("✓ Imports successful")

## Load Metrics

In [None]:
print("\nLoading multiclass model metrics...")

multiclass_metrics = {}
for model in MULTICLASS_MODELS:
    metrics_file = METRICS_DIR / f'{model}_metrics.json'
    if metrics_file.exists():
        with open(metrics_file) as f:
            multiclass_metrics[model] = json.load(f)
        print(f"  ✓ Loaded {model}")
    else:
        print(f"  ✗ Missing {model} (file: {metrics_file})")

print(f"\nLoaded {len(multiclass_metrics)}/{len(MULTICLASS_MODELS)} multiclass models")

In [None]:
print("\nLoading binary model metrics...")

binary_metrics = {}
for model in BINARY_MODELS:
    metrics_file = METRICS_DIR / f'{model}_metrics.json'
    if metrics_file.exists():
        with open(metrics_file) as f:
            binary_metrics[model] = json.load(f)
        print(f"  ✓ Loaded {model}")
    else:
        print(f"  ✗ Missing {model} (file: {metrics_file})")

print(f"\nLoaded {len(binary_metrics)}/{len(BINARY_MODELS)} binary models")

## Multiclass Classification Results

In [None]:
print("\n" + "="*60)
print("MULTICLASS CLASSIFICATION RESULTS")
print("="*60)

# Build comparison table
rows = []
for model in MULTICLASS_MODELS:
    if model not in multiclass_metrics:
        continue
    m = multiclass_metrics[model]
    rows.append({
        'Model': MODEL_DISPLAY_NAMES[model],
        'Accuracy': m['accuracy'],
        'Balanced Acc': m['balanced_accuracy'],
        'F1 (Weighted)': m['f1_weighted'],
        'F1 (Macro)': m['f1_macro'],
        'Precision': m['precision_weighted'],
        'Recall': m['recall_weighted'],
        'Training Time (s)': m.get('training_time_seconds', 0)
    })

multiclass_df = pd.DataFrame(rows)
multiclass_df = multiclass_df.set_index('Model')

# Format for display
display_df = multiclass_df.copy()
for col in ['Accuracy', 'Balanced Acc', 'F1 (Weighted)', 'F1 (Macro)', 'Precision', 'Recall']:
    display_df[col] = display_df[col].apply(lambda x: f"{x:.4f}")
display_df['Training Time (s)'] = display_df['Training Time (s)'].apply(lambda x: f"{x:.1f}")

print("\n" + display_df.to_string())

# Save to CSV
multiclass_df.to_csv(METRICS_DIR / 'model_comparison_multiclass.csv')
print(f"\n✓ Saved to {METRICS_DIR / 'model_comparison_multiclass.csv'}")

In [None]:
# Highlight best model for each metric
print("\nBest Model by Metric:")
print("-" * 40)
for col in ['Accuracy', 'Balanced Acc', 'F1 (Weighted)', 'F1 (Macro)', 'Precision', 'Recall']:
    best_model = multiclass_df[col].idxmax()
    best_value = multiclass_df[col].max()
    print(f"  {col}: {best_model} ({best_value:.4f})")

In [None]:
# Bar chart comparison
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Accuracy comparison
models = multiclass_df.index.tolist()
x = np.arange(len(models))
width = 0.35

ax = axes[0]
bars1 = ax.bar(x - width/2, multiclass_df['Accuracy'], width, label='Accuracy', color='steelblue')
bars2 = ax.bar(x + width/2, multiclass_df['Balanced Acc'], width, label='Balanced Accuracy', color='coral')
ax.set_xlabel('Model')
ax.set_ylabel('Score')
ax.set_title('Accuracy Comparison (Multiclass)')
ax.set_xticks(x)
ax.set_xticklabels(models, rotation=15, ha='right')
ax.legend()
ax.set_ylim(0.8, 1.0)
ax.grid(axis='y', alpha=0.3)

# F1 comparison
ax = axes[1]
bars1 = ax.bar(x - width/2, multiclass_df['F1 (Weighted)'], width, label='F1 (Weighted)', color='steelblue')
bars2 = ax.bar(x + width/2, multiclass_df['F1 (Macro)'], width, label='F1 (Macro)', color='coral')
ax.set_xlabel('Model')
ax.set_ylabel('F1 Score')
ax.set_title('F1 Score Comparison (Multiclass)')
ax.set_xticks(x)
ax.set_xticklabels(models, rotation=15, ha='right')
ax.legend()
ax.set_ylim(0.8, 1.0)
ax.grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.savefig(FIGURES_DIR / 'model_comparison_multiclass_bars.png', dpi=150, bbox_inches='tight')
plt.show()
print(f"✓ Saved to {FIGURES_DIR / 'model_comparison_multiclass_bars.png'}")

In [None]:
# Radar chart
from math import pi

metrics_cols = ['Accuracy', 'Balanced Acc', 'F1 (Weighted)', 'F1 (Macro)', 'Precision', 'Recall']
N = len(metrics_cols)

angles = [n / float(N) * 2 * pi for n in range(N)]
angles += angles[:1]  # Complete the loop

fig, ax = plt.subplots(figsize=(10, 10), subplot_kw=dict(polar=True))

colors = plt.cm.husl(np.linspace(0, 1, len(multiclass_df)))

for idx, (model, row) in enumerate(multiclass_df.iterrows()):
    values = [row[col] for col in metrics_cols]
    values += values[:1]  # Complete the loop
    ax.plot(angles, values, 'o-', linewidth=2, label=model, color=colors[idx])
    ax.fill(angles, values, alpha=0.1, color=colors[idx])

ax.set_xticks(angles[:-1])
ax.set_xticklabels(metrics_cols)
ax.set_ylim(0.85, 1.0)
ax.set_title('Multiclass Model Comparison', size=14, y=1.08)
ax.legend(loc='upper right', bbox_to_anchor=(1.3, 1.0))

plt.tight_layout()
plt.savefig(FIGURES_DIR / 'model_comparison_multiclass_radar.png', dpi=150, bbox_inches='tight')
plt.show()
print(f"✓ Saved to {FIGURES_DIR / 'model_comparison_multiclass_radar.png'}")

## Per-Class F1 Score Comparison

In [None]:
# Extract per-class F1 scores
print("\nPer-Class F1 Scores:")

perclass_data = {}
for model in MULTICLASS_MODELS:
    if model not in multiclass_metrics:
        continue
    m = multiclass_metrics[model]
    if 'per_class_f1' in m:
        perclass_data[MODEL_DISPLAY_NAMES[model]] = m['per_class_f1']

if perclass_data:
    perclass_df = pd.DataFrame(perclass_data)
    perclass_df.index.name = 'Fault Class'
    
    # Heatmap
    fig, ax = plt.subplots(figsize=(12, 10))
    sns.heatmap(perclass_df, annot=True, fmt='.3f', cmap='RdYlGn', 
                vmin=0.7, vmax=1.0, ax=ax, cbar_kws={'label': 'F1 Score'})
    ax.set_xlabel('Model')
    ax.set_ylabel('Fault Class')
    ax.set_title('Per-Class F1 Scores by Model')
    plt.tight_layout()
    plt.savefig(FIGURES_DIR / 'model_comparison_perclass_f1_heatmap.png', dpi=150, bbox_inches='tight')
    plt.show()
    print(f"✓ Saved to {FIGURES_DIR / 'model_comparison_perclass_f1_heatmap.png'}")
    
    # Save to CSV
    perclass_df.to_csv(METRICS_DIR / 'model_comparison_perclass_f1.csv')
    print(f"✓ Saved to {METRICS_DIR / 'model_comparison_perclass_f1.csv'}")
else:
    print("No per-class F1 data available")

In [None]:
# Identify difficult classes
if perclass_data:
    print("\nMost Difficult Classes (lowest average F1):")
    avg_f1 = perclass_df.mean(axis=1).sort_values()
    print(avg_f1.head(5).to_string())
    
    print("\n\nEasiest Classes (highest average F1):")
    print(avg_f1.tail(5).to_string())

## Binary Anomaly Detection Results

In [None]:
print("\n" + "="*60)
print("BINARY ANOMALY DETECTION RESULTS")
print("="*60)

# Build comparison table
rows = []
for model in BINARY_MODELS:
    if model not in binary_metrics:
        continue
    m = binary_metrics[model]
    rows.append({
        'Model': MODEL_DISPLAY_NAMES[model],
        'Accuracy': m['accuracy'],
        'Balanced Acc': m['balanced_accuracy'],
        'F1 (Weighted)': m['f1_weighted'],
        'F1 (Binary)': m['f1_binary'],
        'Precision': m['precision'],
        'Recall': m['recall'],
        'ROC-AUC': m['roc_auc'],
        'PR-AUC': m['pr_auc'],
        'Training Time (s)': m.get('training_time_seconds', 0)
    })

if rows:
    binary_df = pd.DataFrame(rows)
    binary_df = binary_df.set_index('Model')
    
    # Format for display
    display_df = binary_df.copy()
    for col in ['Accuracy', 'Balanced Acc', 'F1 (Weighted)', 'F1 (Binary)', 'Precision', 'Recall', 'ROC-AUC', 'PR-AUC']:
        display_df[col] = display_df[col].apply(lambda x: f"{x:.4f}")
    display_df['Training Time (s)'] = display_df['Training Time (s)'].apply(lambda x: f"{x:.1f}")
    
    print("\n" + display_df.to_string())
    
    # Save to CSV
    binary_df.to_csv(METRICS_DIR / 'model_comparison_binary.csv')
    print(f"\n✓ Saved to {METRICS_DIR / 'model_comparison_binary.csv'}")
else:
    print("No binary model metrics available")

In [None]:
if rows:
    # Bar chart comparison
    fig, axes = plt.subplots(1, 2, figsize=(12, 5))
    
    models = binary_df.index.tolist()
    x = np.arange(len(models))
    width = 0.2
    
    # Classification metrics
    ax = axes[0]
    bars1 = ax.bar(x - width, binary_df['Accuracy'], width, label='Accuracy', color='steelblue')
    bars2 = ax.bar(x, binary_df['F1 (Weighted)'], width, label='F1 (Weighted)', color='coral')
    bars3 = ax.bar(x + width, binary_df['Balanced Acc'], width, label='Balanced Acc', color='green')
    ax.set_xlabel('Model')
    ax.set_ylabel('Score')
    ax.set_title('Classification Metrics (Binary)')
    ax.set_xticks(x)
    ax.set_xticklabels(models)
    ax.legend()
    ax.set_ylim(0.8, 1.0)
    ax.grid(axis='y', alpha=0.3)
    
    # AUC metrics
    ax = axes[1]
    bars1 = ax.bar(x - width/2, binary_df['ROC-AUC'], width, label='ROC-AUC', color='steelblue')
    bars2 = ax.bar(x + width/2, binary_df['PR-AUC'], width, label='PR-AUC', color='coral')
    ax.set_xlabel('Model')
    ax.set_ylabel('AUC')
    ax.set_title('AUC Metrics (Binary)')
    ax.set_xticks(x)
    ax.set_xticklabels(models)
    ax.legend()
    ax.set_ylim(0.8, 1.0)
    ax.grid(axis='y', alpha=0.3)
    
    plt.tight_layout()
    plt.savefig(FIGURES_DIR / 'model_comparison_binary_bars.png', dpi=150, bbox_inches='tight')
    plt.show()
    print(f"✓ Saved to {FIGURES_DIR / 'model_comparison_binary_bars.png'}")

## Combined Summary

In [None]:
print("\n" + "="*60)
print("COMBINED MODEL SUMMARY")
print("="*60)

# Build unified summary
rows = []

# Multiclass models
for model in MULTICLASS_MODELS:
    if model not in multiclass_metrics:
        continue
    m = multiclass_metrics[model]
    rows.append({
        'Model': MODEL_DISPLAY_NAMES[model],
        'Task': 'Multiclass',
        'Accuracy': m['accuracy'],
        'F1 (Weighted)': m['f1_weighted'],
        'Precision': m['precision_weighted'],
        'Recall': m['recall_weighted'],
        'Training Time (s)': m.get('training_time_seconds', 0)
    })

# Binary models
for model in BINARY_MODELS:
    if model not in binary_metrics:
        continue
    m = binary_metrics[model]
    rows.append({
        'Model': MODEL_DISPLAY_NAMES[model],
        'Task': 'Binary',
        'Accuracy': m['accuracy'],
        'F1 (Weighted)': m['f1_weighted'],
        'Precision': m['precision'],
        'Recall': m['recall'],
        'Training Time (s)': m.get('training_time_seconds', 0)
    })

combined_df = pd.DataFrame(rows)

# Format for display
display_df = combined_df.copy()
for col in ['Accuracy', 'F1 (Weighted)', 'Precision', 'Recall']:
    display_df[col] = display_df[col].apply(lambda x: f"{x:.4f}")
display_df['Training Time (s)'] = display_df['Training Time (s)'].apply(lambda x: f"{x:.1f}")

print("\n" + display_df.to_string(index=False))

# Save to CSV
combined_df.to_csv(METRICS_DIR / 'model_comparison_combined.csv', index=False)
print(f"\n✓ Saved to {METRICS_DIR / 'model_comparison_combined.csv'}")

In [None]:
# Combined visualization
fig, ax = plt.subplots(figsize=(12, 6))

# Group by task
multiclass_models = combined_df[combined_df['Task'] == 'Multiclass']
binary_models = combined_df[combined_df['Task'] == 'Binary']

all_models = combined_df['Model'].tolist()
x = np.arange(len(all_models))
width = 0.35

# Color by task
colors = ['steelblue' if t == 'Multiclass' else 'coral' for t in combined_df['Task']]

bars = ax.bar(x, combined_df['F1 (Weighted)'], color=colors, edgecolor='black')

# Add value labels
for bar, val in zip(bars, combined_df['F1 (Weighted)']):
    ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.005, 
            f'{val:.3f}', ha='center', va='bottom', fontsize=9)

ax.set_xlabel('Model')
ax.set_ylabel('F1 Score (Weighted)')
ax.set_title('Model Performance Comparison')
ax.set_xticks(x)
ax.set_xticklabels(all_models, rotation=15, ha='right')
ax.set_ylim(0.85, 1.0)
ax.grid(axis='y', alpha=0.3)

# Legend
from matplotlib.patches import Patch
legend_elements = [Patch(facecolor='steelblue', edgecolor='black', label='Multiclass'),
                   Patch(facecolor='coral', edgecolor='black', label='Binary')]
ax.legend(handles=legend_elements, loc='lower right')

plt.tight_layout()
plt.savefig(FIGURES_DIR / 'model_comparison_combined.png', dpi=150, bbox_inches='tight')
plt.show()
print(f"✓ Saved to {FIGURES_DIR / 'model_comparison_combined.png'}")

## LaTeX Tables for Manuscript

In [None]:
print("\n" + "="*60)
print("LATEX TABLE: MULTICLASS RESULTS")
print("="*60)

if len(multiclass_metrics) > 0:
    latex_rows = []
    for model in MULTICLASS_MODELS:
        if model not in multiclass_metrics:
            continue
        m = multiclass_metrics[model]
        latex_rows.append(
            f"{MODEL_DISPLAY_NAMES[model]} & {m['accuracy']:.4f} & {m['balanced_accuracy']:.4f} & "
            f"{m['f1_weighted']:.4f} & {m['f1_macro']:.4f} & {m['precision_weighted']:.4f} & {m['recall_weighted']:.4f} \\\\"
        )
    
    latex_table = """\\begin{table}[htbp]
\\centering
\\caption{Multiclass Fault Classification Results}
\\label{tab:multiclass-results}
\\begin{tabular}{lcccccc}
\\toprule
Model & Accuracy & Balanced Acc & F1 (Weighted) & F1 (Macro) & Precision & Recall \\\\
\\midrule
""" + "\n".join(latex_rows) + """
\\bottomrule
\\end{tabular}
\\end{table}"""
    
    print(latex_table)
    
    # Save to file
    with open(METRICS_DIR / 'table_multiclass_results.tex', 'w') as f:
        f.write(latex_table)
    print(f"\n✓ Saved to {METRICS_DIR / 'table_multiclass_results.tex'}")

In [None]:
print("\n" + "="*60)
print("LATEX TABLE: BINARY RESULTS")
print("="*60)

if len(binary_metrics) > 0:
    latex_rows = []
    for model in BINARY_MODELS:
        if model not in binary_metrics:
            continue
        m = binary_metrics[model]
        latex_rows.append(
            f"{MODEL_DISPLAY_NAMES[model]} & {m['accuracy']:.4f} & {m['f1_weighted']:.4f} & "
            f"{m['precision']:.4f} & {m['recall']:.4f} & {m['roc_auc']:.4f} & {m['pr_auc']:.4f} \\\\"
        )
    
    latex_table = """\\begin{table}[htbp]
\\centering
\\caption{Binary Anomaly Detection Results}
\\label{tab:binary-results}
\\begin{tabular}{lcccccc}
\\toprule
Model & Accuracy & F1 (Weighted) & Precision & Recall & ROC-AUC & PR-AUC \\\\
\\midrule
""" + "\n".join(latex_rows) + """
\\bottomrule
\\end{tabular}
\\end{table}"""
    
    print(latex_table)
    
    # Save to file
    with open(METRICS_DIR / 'table_binary_results.tex', 'w') as f:
        f.write(latex_table)
    print(f"\n✓ Saved to {METRICS_DIR / 'table_binary_results.tex'}")

## Key Findings Summary

In [None]:
print("\n" + "="*60)
print("KEY FINDINGS")
print("="*60)

print("\n### Multiclass Fault Classification ###")
if len(multiclass_metrics) > 0:
    best_acc_model = max(multiclass_metrics.items(), key=lambda x: x[1]['accuracy'])
    best_f1_model = max(multiclass_metrics.items(), key=lambda x: x[1]['f1_weighted'])
    
    print(f"\nBest Accuracy: {MODEL_DISPLAY_NAMES[best_acc_model[0]]} ({best_acc_model[1]['accuracy']:.4f})")
    print(f"Best F1 (Weighted): {MODEL_DISPLAY_NAMES[best_f1_model[0]]} ({best_f1_model[1]['f1_weighted']:.4f})")
    
    # Range of performance
    accs = [m['accuracy'] for m in multiclass_metrics.values()]
    f1s = [m['f1_weighted'] for m in multiclass_metrics.values()]
    print(f"\nAccuracy range: {min(accs):.4f} - {max(accs):.4f}")
    print(f"F1 (Weighted) range: {min(f1s):.4f} - {max(f1s):.4f}")

print("\n### Binary Anomaly Detection ###")
if len(binary_metrics) > 0:
    best_f1_model = max(binary_metrics.items(), key=lambda x: x[1]['f1_weighted'])
    best_auc_model = max(binary_metrics.items(), key=lambda x: x[1]['roc_auc'])
    
    print(f"\nBest F1 (Weighted): {MODEL_DISPLAY_NAMES[best_f1_model[0]]} ({best_f1_model[1]['f1_weighted']:.4f})")
    print(f"Best ROC-AUC: {MODEL_DISPLAY_NAMES[best_auc_model[0]]} ({best_auc_model[1]['roc_auc']:.4f})")

print("\n" + "="*60)
print("✓ Model Comparison Summary Complete!")
print("="*60)