## 1. Setup and Data Loading

We will load the `cv_fold_metrics.csv` and `mda_scores.csv` files generated by the baseline training script for all thematic models.

In [None]:
import logging
import sys
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# --- Setup Project Paths ---
PROJECT_ROOT_PATH = Path.cwd().parent
if str(PROJECT_ROOT_PATH / 'src') not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT_PATH / 'src'))

BASE_REPORT_DIR = PROJECT_ROOT_PATH / "data" / "reports" / "supervised_learning" / "thematic_models"

# --- Plotting Style ---
sns.set_theme(style="whitegrid", palette="viridis")
plt.rcParams['figure.figsize'] = (14, 7)
plt.rcParams['figure.dpi'] = 100

In [None]:
def load_all_fold_metrics(report_dir: Path) -> pd.DataFrame:
    """Loads and combines all cv_fold_metrics.csv files."""
    all_metrics = []
    metric_files = list(report_dir.glob("**/cv_results/cv_fold_metrics.csv"))
    
    if not metric_files:
        print(f"Warning: No 'cv_fold_metrics.csv' files found in {report_dir}")
        return pd.DataFrame()
    
    for file_path in metric_files:
        try:
            run_name = file_path.parent.parent.name
            model_type = run_name.split('_')[-1]
            theme_name = run_name.removesuffix(f'_{model_type}')
            
            df = pd.read_csv(file_path)
            df['theme'] = theme_name
            df['model_type'] = model_type
            all_metrics.append(df)
        except Exception as e:
            print(f"Could not process file {file_path}: {e}")
            
    return pd.concat(all_metrics, ignore_index=True)

fold_metrics_df = load_all_fold_metrics(BASE_REPORT_DIR)

if not fold_metrics_df.empty:
    print(f"Loaded fold metrics for {fold_metrics_df['theme'].nunique()} themes.")
    display(fold_metrics_df.head())

In [None]:
def plot_walk_forward_performance(metrics_df: pd.DataFrame, metric: str = 'f1_macro'):
    """Plots a specific metric across CV folds for each theme."""
    if metrics_df.empty or metric not in metrics_df.columns:
        print(f"Metric '{metric}' not found or DataFrame is empty.")
        return
    
    themes = sorted(metrics_df['theme'].unique())
    
    for theme in themes:
        plt.figure(figsize=(12, 6))
        theme_df = metrics_df[metrics_df['theme'] == theme]
        
        sns.lineplot(
            data=theme_df,
            x='fold',
            y=metric,
            hue='model_type',
            style='model_type',
            markers=True,
            dashes=False,
            lw=2.5
        )
        
        plt.title(f'Walk-Forward Performance Stability for Theme: \n{theme}', fontsize=16, pad=20)
        plt.xlabel('Cross-Validation Fold (Chronological)', fontsize=12)
        plt.ylabel(f'{metric.replace("_", " ").title()}', fontsize=12)
        plt.xticks(sorted(theme_df['fold'].unique()))
        plt.legend(title='Model Type')
        plt.grid(True, which='both', linestyle='--')
        plt.show()

# Plot for a primary metric like F1-score
plot_walk_forward_performance(fold_metrics_df, metric='f1_macro')

# Plot for a probability-based metric like LogLoss
plot_walk_forward_performance(fold_metrics_df, metric='log_loss')

## 3. Feature Importance Stability

This analysis was previously implemented in the `39_analyze_thematic_results.py` notebook. It uses boxplots to show the distribution of Mean Decrease Accuracy (MDA) scores for each feature across all CV folds. A tight distribution indicates that a feature's importance is stable, while a wide distribution suggests its contribution varies significantly depending on the time period. This is a critical check for model reliability.

In [None]:
def load_all_mda_results(report_dir: Path) -> pd.DataFrame:
    """Loads and combines all mda_scores.csv files."""
    all_mda = []
    mda_files = list(report_dir.glob("**/mda_results/mda_scores.csv"))
    
    if not mda_files:
        print(f"Warning: No 'mda_scores.csv' files found in {report_dir}")
        return pd.DataFrame()
    
    for file_path in mda_files:
        try:
            run_name = file_path.parent.parent.name
            model_type = run_name.split('_')[-1]
            theme_name = run_name.removesuffix(f'_{model_type}')
            
            df = pd.read_csv(file_path)
            df['theme'] = theme_name
            df['model_type'] = model_type
            all_mda.append(df)
        except Exception as e:
            print(f"Could not process file {file_path}: {e}")
            
    return pd.concat(all_mda, ignore_index=True)

mda_df = load_all_mda_results(BASE_REPORT_DIR)

if not mda_df.empty:
    print(f"Loaded MDA results for {mda_df['theme'].nunique()} themes.")
    display(mda_df.head())

In [None]:
def plot_feature_importance_stability(mda_per_fold_df: pd.DataFrame, top_n: int = 20):
    """For each theme/model combination, plot the stability of its top features."""
    if mda_per_fold_df.empty:
        return

    for (theme, model_type), group_df in mda_per_fold_df.groupby(['theme', 'model_type']):
        mean_importances = group_df.groupby('feature')['importance'].mean().sort_values(ascending=False)
        top_features = mean_importances.head(top_n).index.tolist()
        
        if not top_features:
            continue
        
        df_top = group_df[group_df['feature'].isin(top_features)]

        plt.figure(figsize=(12, max(8, len(top_features) * 0.4)))
        
        sns.boxplot(
            x='importance',
            y='feature',
            data=df_top,
            order=top_features,
            orient='h',
            palette='mako'
        )
        
        plt.title(f'Feature Importance Stability: {theme}\nModel: {model_type}', fontsize=16, pad=20)
        plt.xlabel('MDA Importance (Increase in LogLoss)', fontsize=12)
        plt.ylabel('Feature', fontsize=12)
        plt.axvline(x=0, color='r', linestyle='--', linewidth=1.2, label='Zero Importance')
        plt.legend(loc='lower right')
        plt.tight_layout()
        plt.show()

plot_feature_importance_stability(mda_df)