# Generalized ML Model Analysis

This notebook provides a flexible framework for analyzing trained ML model outputs and performance metrics across different training runs and datasets.

## Configuration

In [None]:
import os
import sys
import json
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px

# Configure plotting
sns.set_theme(style="whitegrid")
plt.rcParams['figure.figsize'] = (14, 5)

### Model Configuration

In [None]:
# ============================================================================
# CONFIGURATION SECTION - Modify these parameters to analyze different models
# ============================================================================

# Base path for training results
# Set this to your training results directory or use environment variable
BASE_PATH = os.environ.get(
    "TRAINING_RESULTS_DIR",
    "./models/test_training"  # Replace with your training results directory
)

# Model directory (relative to BASE_PATH)
# Examples:
# - "database_1/ads_height_test_1"
# - "database_2/surface_order_2_cn"
# - "database_3/test_cn"
# - "database_3/segmented_test"
MODEL_DIRECTORY = os.environ.get(
    "MODEL_DIRECTORY",
    "database_3/test_cn"  # Replace with your model directory
)

# Define which datasets to load (modify as needed)
DATASETS_TO_LOAD = {
    'training': 'training.csv',
    'train_set': 'train_set.csv',
    'test_set': 'test_set.csv',
    'validation_set': 'validation_set.csv',
    'uncertainty': 'uq.csv'
}

# Model output path
MODEL_PATH = os.path.join(BASE_PATH, MODEL_DIRECTORY)

# ============================================================================
# NOTE: Set environment variables for different systems:
# export TRAINING_RESULTS_DIR=/path/to/training/results
# export MODEL_DIRECTORY=database_3/test_cn
# ============================================================================

print(f"Loading model from: {MODEL_PATH}")
print(f"Model directory exists: {os.path.exists(MODEL_PATH)}")

### Load Datasets

In [None]:
# Load datasets
data = {}
for name, filename in DATASETS_TO_LOAD.items():
    filepath = os.path.join(MODEL_PATH, filename)
    if os.path.exists(filepath):
        data[name] = pd.read_csv(filepath)
        print(f"✓ Loaded {name}: {data[name].shape[0]} rows, {data[name].shape[1]} columns")
    else:
        print(f"✗ {filename} not found")

# Extract data frames for convenience
df_train = data.get('training')
df_train_set = data.get('train_set')
df_test_set = data.get('test_set')
df_val_set = data.get('validation_set')
df_uq = data.get('uncertainty')

## Dataset Information

In [None]:
# Display info about test set
if df_test_set is not None:
    print("Test Set Information:")
    print(df_test_set.info())
    print("\nFirst few rows:")
    print(df_test_set.head())

## Performance Metrics Summary

In [None]:
# Calculate performance metrics
if df_test_set is not None:
    print("\n" + "="*60)
    print("TEST SET PERFORMANCE METRICS")
    print("="*60)
    
    if 'Abs_error_eV' in df_test_set.columns:
        mae = df_test_set['Abs_error_eV'].mean()
        rmse = np.sqrt((df_test_set['Abs_error_eV']**2).mean())
        print(f"Mean Absolute Error (MAE): {mae:.4f} eV")
        print(f"Root Mean Squared Error (RMSE): {rmse:.4f} eV")
    
    if 'Predicted_energy_eV' in df_test_set.columns and 'True_energy_eV' in df_test_set.columns:
        from sklearn.metrics import r2_score
        r2 = r2_score(df_test_set['True_energy_eV'], df_test_set['Predicted_energy_eV'])
        print(f"R² Score: {r2:.4f}")
    
    print(f"\nNumber of test samples: {len(df_test_set)}")
    print(f"Columns: {list(df_test_set.columns)}")

## Learning Curves

In [None]:
# Plot learning curves
if df_train is not None:
    fig, axs = plt.subplots(1, 3, figsize=(18, 5))
    
    # MAE curves
    if 'Epoch' in df_train.columns:
        if 'Train_MAE_eV' in df_train.columns:
            axs[0].plot(df_train['Epoch'], df_train['Train_MAE_eV'], label='Train MAE', marker='o', markersize=3)
        if 'Val_MAE_eV' in df_train.columns:
            axs[0].plot(df_train['Epoch'], df_train['Val_MAE_eV'], label='Validation MAE', marker='s', markersize=3)
        if 'Test_MAE_eV' in df_train.columns:
            axs[0].plot(df_train['Epoch'], df_train['Test_MAE_eV'], label='Test MAE', marker='^', markersize=3)
        
        axs[0].set_xlabel('Epoch')
        axs[0].set_ylabel('MAE (eV)')
        axs[0].set_title('Learning Curves: MAE')
        axs[0].legend()
        axs[0].grid(True, alpha=0.3)
    
    # Loss curves
    if 'Epoch' in df_train.columns:
        if 'Train_Loss' in df_train.columns:
            axs[1].plot(df_train['Epoch'], df_train['Train_Loss'], label='Train Loss', marker='o', markersize=3)
        if 'Val_Loss' in df_train.columns:
            axs[1].plot(df_train['Epoch'], df_train['Val_Loss'], label='Validation Loss', marker='s', markersize=3)
        
        axs[1].set_xlabel('Epoch')
        axs[1].set_ylabel('Loss')
        axs[1].set_title('Learning Curves: Loss')
        axs[1].legend()
        axs[1].grid(True, alpha=0.3)
    
    # R² or other metric
    if 'Epoch' in df_train.columns and 'R2_Score' in df_train.columns:
        axs[2].plot(df_train['Epoch'], df_train['R2_Score'], label='R² Score', marker='o', markersize=3, color='green')
        axs[2].set_xlabel('Epoch')
        axs[2].set_ylabel('R² Score')
        axs[2].set_title('R² Score Over Training')
        axs[2].legend()
        axs[2].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()

## Prediction vs True Values

In [None]:
# Scatter plot of predictions vs true values
if df_test_set is not None and 'Predicted_energy_eV' in df_test_set.columns and 'True_energy_eV' in df_test_set.columns:
    fig, ax = plt.subplots(figsize=(10, 10))
    
    ax.scatter(df_test_set['True_energy_eV'], df_test_set['Predicted_energy_eV'], alpha=0.6, s=50)
    
    # Plot perfect prediction line
    min_val = min(df_test_set['True_energy_eV'].min(), df_test_set['Predicted_energy_eV'].min())
    max_val = max(df_test_set['True_energy_eV'].max(), df_test_set['Predicted_energy_eV'].max())
    ax.plot([min_val, max_val], [min_val, max_val], 'r--', lw=2, label='Perfect Prediction')
    
    ax.set_xlabel('True Energy (eV)')
    ax.set_ylabel('Predicted Energy (eV)')
    ax.set_title('Model Predictions vs True Values')
    ax.legend()
    ax.grid(True, alpha=0.3)
    ax.set_aspect('equal')
    
    plt.tight_layout()
    plt.show()

## Error Analysis

In [None]:
# Error distribution analysis
if df_test_set is not None and 'Abs_error_eV' in df_test_set.columns:
    fig, axs = plt.subplots(2, 2, figsize=(14, 10))
    
    # Histogram of absolute errors
    axs[0, 0].hist(df_test_set['Abs_error_eV'], bins=50, edgecolor='black', alpha=0.7)
    axs[0, 0].set_xlabel('Absolute Error (eV)')
    axs[0, 0].set_ylabel('Count')
    axs[0, 0].set_title('Distribution of Absolute Errors')
    axs[0, 0].axvline(df_test_set['Abs_error_eV'].mean(), color='r', linestyle='--', label=f'Mean: {df_test_set["Abs_error_eV"].mean():.3f}')
    axs[0, 0].legend()
    
    # Sorted errors
    sorted_errors = np.sort(df_test_set['Abs_error_eV'].values)
    axs[0, 1].plot(sorted_errors, linewidth=2)
    axs[0, 1].set_xlabel('Sample Index (sorted)')
    axs[0, 1].set_ylabel('Absolute Error (eV)')
    axs[0, 1].set_title('Sorted Absolute Errors')
    axs[0, 1].grid(True, alpha=0.3)
    
    # Error statistics
    stats_text = f"""
    Error Statistics:
    Mean: {df_test_set['Abs_error_eV'].mean():.4f} eV
    Median: {df_test_set['Abs_error_eV'].median():.4f} eV
    Std Dev: {df_test_set['Abs_error_eV'].std():.4f} eV
    Min: {df_test_set['Abs_error_eV'].min():.4f} eV
    Max: {df_test_set['Abs_error_eV'].max():.4f} eV
    95th Percentile: {df_test_set['Abs_error_eV'].quantile(0.95):.4f} eV
    """
    axs[1, 0].text(0.1, 0.5, stats_text, fontsize=11, family='monospace')
    axs[1, 0].axis('off')
    
    # Box plot of errors by percentile
    error_ranges = pd.cut(df_test_set['Abs_error_eV'], bins=5, labels=['0-20%', '20-40%', '40-60%', '60-80%', '80-100%'])
    axs[1, 1].boxplot([df_test_set[error_ranges == label]['Abs_error_eV'].values for label in ['0-20%', '20-40%', '40-60%', '60-80%', '80-100%']],
                      labels=['0-20%', '20-40%', '40-60%', '60-80%', '80-100%'])
    axs[1, 1].set_ylabel('Absolute Error (eV)')
    axs[1, 1].set_title('Error Distribution by Percentile')
    
    plt.tight_layout()
    plt.show()

## Uncertainty Analysis

## Material-wise Performance

# Summary
print("\n" + "="*60)
print("ANALYSIS COMPLETE")
print("="*60)
print(f"Model: {MODEL_DIRECTORY}")
print(f"Path: {MODEL_PATH}")