# Check the label_encoder files are all the same between training runs

In [1]:
# The function below will:
# - Find all label_encoder.joblib files recursively in models
# - Load each encoder and compare their classes_ attributes
# - Print a clear report showing which encoders match and which differ
# - Return True if all are equivalent, False otherwise
import numpy as np
import os
from joblib import load
from sklearn.preprocessing import LabelEncoder

In [2]:

def compare_label_encoders():
    # Find all label_encoder.joblib files
    encoder_files = []
    for root, dirs, files in os.walk("models"):
        for file in files:
            if file == "label_encoder.joblib":
                encoder_files.append(os.path.join(root, file))

    if not encoder_files:
        print("No label_encoder.joblib files found.")
        return True

    # Load and compare each encoder
    encoders = []
    for file in encoder_files:
        encoder = load(file)
        if not isinstance(encoder, LabelEncoder):
            print(f"File {file} is not a LabelEncoder.")
            return False
        encoders.append(encoder)

    # Compare classes_ attributes
    first_classes = encoders[0].classes_
    all_equivalent = True
    for i, encoder in enumerate(encoders[1:], start=2):
        if not np.array_equal(first_classes, encoder.classes_):
            print(f"Encoder {i} differs from the first encoder.")
            all_equivalent = False

    return all_equivalent

In [3]:
compare_label_encoders()

True

# Create comprehensive comparison charts for all baseline models

In [8]:
import os, json, time, warnings
from pathlib import Path
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import glob
from tqdm import tqdm
import warnings

# RANDOM_STATE = 42
# np.random.seed(RANDOM_STATE)

## do not display DtypeWarning from Pandas
warnings.filterwarnings("ignore", category=pd.errors.DtypeWarning)
warnings.filterwarnings("ignore", category=FutureWarning)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', 1000)
pd.set_option('display.max_colwidth', None)

In [9]:
def create_comparison_visualizations(summary_df, outdir):
    """
    Create comprehensive comparison charts for all baseline models
    """
    print(f"\nGenerating comparison visualizations...")
    
    # 1. Bar chart comparing all metrics across models
    fig, axes = plt.subplots(2, 2, figsize=(15, 12))
    metrics = ['Accuracy', 'F1_macro', 'Precision_macro', 'Recall_macro']
    
    for idx, metric in enumerate(metrics):
        ax = axes[idx // 2, idx % 2]
        summary_df.plot(x='model', y=metric, kind='bar', ax=ax, legend=False, color='steelblue')
        ax.set_title(f'{metric} Comparison', fontsize=14, fontweight='bold')
        ax.set_xlabel('Model', fontsize=12)
        ax.set_ylabel(metric, fontsize=12)
        ax.set_xticklabels(summary_df['model'], rotation=45, ha='right')
        ax.grid(axis='y', alpha=0.3)
        
        # Add value labels on bars
        for container in ax.containers:
            ax.bar_label(container, fmt='%.3f', padding=3)
    
    plt.tight_layout()
    plt.savefig(outdir / "metrics_comparison_bars.png", dpi=300, bbox_inches='tight')
    plt.close()
    
    # 2. Radar/Spider chart for multi-metric comparison
    from math import pi
    
    fig, ax = plt.subplots(figsize=(10, 10), subplot_kw=dict(projection='polar'))
    
    angles = [n / len(metrics) * 2 * pi for n in range(len(metrics))]
    angles += angles[:1]
    
    ax.set_theta_offset(pi / 2)
    ax.set_theta_direction(-1)
    ax.set_xticks(angles[:-1])
    ax.set_xticklabels(metrics)
    
    for idx, row in summary_df.iterrows():
        values = row[metrics].tolist()
        values += values[:1]
        ax.plot(angles, values, 'o-', linewidth=2, label=row['model'])
        ax.fill(angles, values, alpha=0.15)
    
    ax.set_ylim(0, 1)
    ax.legend(loc='upper right', bbox_to_anchor=(1.3, 1.1))
    ax.set_title('Multi-Metric Performance Comparison', size=16, fontweight='bold', pad=20)
    ax.grid(True)
    
    plt.tight_layout()
    plt.savefig(outdir / "metrics_radar_chart.png", dpi=300, bbox_inches='tight')
    plt.close()
    
    # 3. Heatmap of all metrics
    plt.figure(figsize=(10, 6))
    metrics_matrix = summary_df.set_index('model')[metrics]
    sns.heatmap(metrics_matrix, annot=True, fmt='.3f', cmap='YlGnBu', 
                cbar_kws={'label': 'Score'}, linewidths=0.5)
    plt.title('Performance Metrics Heatmap', fontsize=16, fontweight='bold', pad=15)
    plt.xlabel('Metrics', fontsize=12)
    plt.ylabel('Models', fontsize=12)
    plt.tight_layout()
    plt.savefig(outdir / "metrics_heatmap.png", dpi=300, bbox_inches='tight')
    plt.close()
    
    # 4. Model ranking visualization
    fig, ax = plt.subplots(figsize=(12, 6))
    
    # Calculate average rank for each model
    ranks = summary_df[metrics].rank(ascending=False)
    ranks['model'] = summary_df['model']
    ranks['avg_rank'] = ranks[metrics].mean(axis=1)
    ranks = ranks.sort_values('avg_rank')
    
    x_pos = np.arange(len(ranks))
    bars = ax.barh(x_pos, ranks['avg_rank'], color='coral')
    ax.set_yticks(x_pos)
    ax.set_yticklabels(ranks['model'])
    ax.invert_yaxis()
    ax.set_xlabel('Average Rank (lower is better)', fontsize=12)
    ax.set_title('Model Ranking Based on Average Performance', fontsize=16, fontweight='bold')
    ax.grid(axis='x', alpha=0.3)
    
    # Add value labels
    for i, (bar, val) in enumerate(zip(bars, ranks['avg_rank'])):
        ax.text(val + 0.05, i, f'{val:.2f}', va='center')
    
    plt.tight_layout()
    plt.savefig(outdir / "model_ranking.png", dpi=300, bbox_inches='tight')
    plt.close()
    
    # 5. Box plot showing metric distribution
    fig, ax = plt.subplots(figsize=(12, 6))
    melted = summary_df.melt(id_vars='model', value_vars=metrics, 
                             var_name='Metric', value_name='Score')
    sns.boxplot(data=melted, x='Metric', y='Score', ax=ax, palette='Set2')
    sns.swarmplot(data=melted, x='Metric', y='Score', color='black', alpha=0.5, ax=ax)
    ax.set_title('Distribution of Metrics Across All Models', fontsize=16, fontweight='bold')
    ax.set_ylabel('Score', fontsize=12)
    ax.grid(axis='y', alpha=0.3)
    plt.tight_layout()
    plt.savefig(outdir / "metrics_distribution.png", dpi=300, bbox_inches='tight')
    plt.close()
    
    print(f"✓ Generated 5 comparison visualizations in {outdir}")
    print("  - metrics_comparison_bars.png")
    print("  - metrics_radar_chart.png")
    print("  - metrics_heatmap.png")
    print("  - model_ranking.png")
    print("  - metrics_distribution.png")

In [11]:
outdir = Path("models/baseline_comparisons_no_MLP")
outdir.mkdir(parents=True, exist_ok=True)
summary_df = pd.read_csv("models/holdout_summary.csv")
summary_df.drop(summary_df[summary_df['model'].str.contains("MLP")].index, inplace=True)
summary_df

Unnamed: 0,model,Accuracy,F1_macro,F1_weighted,Precision_macro,Precision_weighted,Recall_macro,Recall_weighted,training_time_sec
0,SVC_GPU,0.983935,0.751124,0.983591,0.824535,0.985091,0.734439,0.983935,3190.860011
2,XGBoost_CUDA,0.989746,0.781287,0.990526,0.764588,0.991875,0.808206,0.989746,999.0
3,LogisticRRegression,0.975162,0.711888,0.974237,0.747405,0.974518,0.707329,0.975162,999.0
4,KNN_GPU,0.989518,0.794595,0.989863,0.787207,0.990382,0.802869,0.989518,999.0
5,RandomForest_GPU,0.98667,0.766923,0.988261,0.749062,0.990827,0.80592,0.98667,999.0


In [12]:
create_comparison_visualizations(summary_df, outdir)



Generating comparison visualizations...
✓ Generated 5 comparison visualizations in models\baseline_comparisons_no_MLP
  - metrics_comparison_bars.png
  - metrics_radar_chart.png
  - metrics_heatmap.png
  - model_ranking.png
  - metrics_distribution.png
