In [None]:
# %% [markdown]
# # Insurance Claim Severity Modeling - EDA & Results Dashboard
# 
# This notebook provides a comprehensive overview of the insurance claim severity modeling pipeline, including data exploration, model results, and insights.
# 

# %% [markdown]
# ## 1. Setup and Configuration

# %%
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json
import pickle
import warnings
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from pathlib import Path
import sys
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import seaborn as sns

# Set up paths
BASE_PATH = Path.cwd().parent
DATA_PATH = BASE_PATH / "data"
MODELS_PATH = BASE_PATH / "models"
RESULTS_PATH = BASE_PATH / "Results"
NOTEBOOKS_PATH = BASE_PATH / "notebooks"

# Add src to path
sys.path.append(str(BASE_PATH / "src"))

# Configure plotting
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['font.size'] = 12

warnings.filterwarnings('ignore')

print("Setup complete!")
print(f"Base path: {BASE_PATH}")

# %% [markdown]
# ## 2. Load Data

# %%
def load_data():
    """Load all available datasets"""
    data_dict = {}
    
    # Load processed data
    try:
        processed_path = DATA_PATH / "processed"
        files = list(processed_path.glob("*.csv"))
        
        for file in files:
            name = file.stem
            print(f"Loading {name}...")
            try:
                data_dict[name] = pd.read_csv(file)
                print(f"  Shape: {data_dict[name].shape}")
            except Exception as e:
                print(f"  Error loading {file}: {e}")
                
    except Exception as e:
        print(f"Error loading processed data: {e}")
    
    return data_dict

# Load the data
print("Loading datasets...")
data_dict = load_data()

# Display available datasets
print("\nAvailable datasets:")
for name, df in data_dict.items():
    print(f"  - {name}: {df.shape}")


In [None]:

# %% [markdown]
# ## 3. Data Exploration

# %%
if 'claim_policies' in data_dict:
    df = data_dict['claim_policies'].copy()
    
    print("="*60)
    print("DATA EXPLORATION")
    print("="*60)
    
    # Basic info
    print(f"\nüìä Dataset Shape: {df.shape}")
    print(f"üìã Columns: {len(df.columns)}")
    
    # Display first few rows
    print("\nFirst 5 rows:")
    display(df.head())
    
    # Data types
    print("\nData Types:")
    dtype_counts = df.dtypes.value_counts()
    for dtype, count in dtype_counts.items():
        print(f"  {dtype}: {count} columns")
    
    # Missing values
    print("\nMissing Values:")
    missing = df.isnull().sum()
    missing_pct = (missing / len(df) * 100).round(2)
    missing_df = pd.DataFrame({
        'Missing Count': missing,
        'Missing %': missing_pct
    })
    missing_df = missing_df[missing_df['Missing Count'] > 0].sort_values('Missing %', ascending=False)
    
    if len(missing_df) > 0:
        display(missing_df)
    else:
        print("  No missing values found!")
    
    # Target variable analysis
    if 'TotalClaims' in df.columns:
        print("\nüéØ Target Variable Analysis (TotalClaims):")
        target_stats = df['TotalClaims'].describe()
        display(pd.DataFrame(target_stats).T)
        
        # Check for extreme values
        print(f"\nüîç Extreme Values Check:")
        print(f"  Min: R{df['TotalClaims'].min():,.2f}")
        print(f"  Max: R{df['TotalClaims'].max():,.2f}")
        print(f"  Mean: R{df['TotalClaims'].mean():,.2f}")
        print(f"  Std: R{df['TotalClaims'].std():,.2f}")
        
        # Check for zeros or negative values
        zero_claims = (df['TotalClaims'] == 0).sum()
        negative_claims = (df['TotalClaims'] < 0).sum()
        print(f"  Zero claims: {zero_claims} ({zero_claims/len(df)*100:.2f}%)")
        print(f"  Negative claims: {negative_claims} ({negative_claims/len(df)*100:.2f}%)")
        
        # Plot target distribution
        fig, axes = plt.subplots(1, 2, figsize=(15, 5))
        
        # Histogram
        axes[0].hist(df['TotalClaims'], bins=50, edgecolor='black', alpha=0.7)
        axes[0].axvline(df['TotalClaims'].mean(), color='red', linestyle='--', 
                       label=f'Mean: R{df["TotalClaims"].mean():,.2f}')
        axes[0].axvline(df['TotalClaims'].median(), color='green', linestyle='--',
                       label=f'Median: R{df["TotalClaims"].median():,.2f}')
        axes[0].set_xlabel('Claim Amount (R)')
        axes[0].set_ylabel('Frequency')
        axes[0].set_title('Distribution of Claim Amounts')
        axes[0].legend()
        axes[0].grid(True, alpha=0.3)
        
        # Log transformation
        if (df['TotalClaims'] > 0).all():
            log_claims = np.log1p(df['TotalClaims'])
            axes[1].hist(log_claims, bins=50, edgecolor='black', alpha=0.7)
            axes[1].set_xlabel('Log(1 + Claim Amount)')
            axes[1].set_ylabel('Frequency')
            axes[1].set_title('Log-Transformed Claim Amounts')
            axes[1].grid(True, alpha=0.3)
        else:
            axes[1].text(0.5, 0.5, 'Log transform not possible\n(negative values present)',
                        ha='center', va='center', transform=axes[1].transAxes)
            axes[1].set_title('Log-Transformed Claim Amounts')
        
        plt.tight_layout()
        plt.show()
        
        # Top claims
        print("\nüí∞ Top 10 Largest Claims:")
        top_claims = df.nlargest(10, 'TotalClaims')[['PolicyID', 'TotalClaims']].copy()
        top_claims['TotalClaims'] = top_claims['TotalClaims'].apply(lambda x: f'R{x:,.2f}')
        display(top_claims)


In [None]:

# %% [markdown]
# ## 4. Load Model Results

# %%
def load_model_results():
    """Load model evaluation results"""
    results = {}
    
    # Load model comparison
    model_comp_path = MODELS_PATH / "model_comparison.json"
    if model_comp_path.exists():
        try:
            with open(model_comp_path, 'r') as f:
                model_data = json.load(f)
            
            print(f"Model comparison data type: {type(model_data)}")
            
            # Handle different structures
            if isinstance(model_data, list):
                print(f"Model comparison is a list with {len(model_data)} items")
                results['model_comparison'] = model_data
            elif isinstance(model_data, dict):
                print(f"Model comparison keys: {list(model_data.keys())}")
                
                # Check if it has a nested structure
                if 'model_comparison' in model_data:
                    print("Found nested 'model_comparison' key")
                    nested_data = model_data['model_comparison']
                    print(f"Nested data type: {type(nested_data)}")
                    results['model_comparison'] = nested_data
                    
                    # Store other keys if present
                    for key in ['detailed_metrics', 'best_model']:
                        if key in model_data:
                            results[key] = model_data[key]
                else:
                    results['model_comparison'] = model_data
            
            print(f"‚úì Loaded model comparison results")
        except Exception as e:
            print(f"Error loading model comparison: {e}")
            import traceback
            traceback.print_exc()
    
    # Load cross-validation results
    cv_path = MODELS_PATH / "cross_validation_results.json"
    if cv_path.exists():
        with open(cv_path, 'r') as f:
            results['cv_results'] = json.load(f)
        print(f"‚úì Loaded cross-validation results")
    
    # Load Lasso best params
    lasso_params_path = MODELS_PATH / "Lasso_best_params.json"
    if lasso_params_path.exists():
        with open(lasso_params_path, 'r') as f:
            results['lasso_params'] = json.load(f)
        print(f"‚úì Loaded Lasso parameters")
    
    # Load Linear Regression best params
    lr_params_path = MODELS_PATH / "LinearRegression_best_params.json"
    if lr_params_path.exists():
        with open(lr_params_path, 'r') as f:
            results['lr_params'] = json.load(f)
        print(f"‚úì Loaded Linear Regression parameters")
    
    # Load task 4 reports
    task4_json_path = RESULTS_PATH / "Task4_Reports" / "task4_comprehensive_report.json"
    if task4_json_path.exists():
        with open(task4_json_path, 'r') as f:
            results['task4_report'] = json.load(f)
        print(f"‚úì Loaded Task 4 comprehensive report")
    
    task4_md_path = RESULTS_PATH / "Task4_Reports" / "task4_final_report.md"
    if task4_md_path.exists():
        with open(task4_md_path, 'r') as f:
            results['task4_md'] = f.read()
        print(f"‚úì Loaded Task 4 markdown report")
    
    return results

print("Loading model results...")
model_results = load_model_results()

# Display what we loaded
print("\nüìä Loaded model results:")
for key, value in model_results.items():
    if key != 'task4_md':  # Don't print large markdown content
        print(f"  {key}: {type(value)}")


In [None]:

# %% [markdown]
# ## 5. Model Performance Analysis

# %%
if 'model_comparison' in model_results:
    print("="*60)
    print("MODEL PERFORMANCE ANALYSIS")
    print("="*60)
    
    model_data = model_results['model_comparison']
    print(f"Model comparison data type: {type(model_data)}")
    
    if isinstance(model_data, list):
        print(f"Model comparison is a list with {len(model_data)} items")
        print("\nFirst item in list:")
        print(f"  Type: {type(model_data[0])}")
        if isinstance(model_data[0], dict):
            print(f"  Keys: {list(model_data[0].keys())}")
    
    # Check for detailed_metrics separately
    if 'detailed_metrics' in model_results:
        print("\n" + "="*60)
        print("DETAILED MODEL METRICS ANALYSIS")
        print("="*60)
        
        detailed_metrics = model_results['detailed_metrics']
        print(f"Detailed metrics type: {type(detailed_metrics)}")
        
        if isinstance(detailed_metrics, dict):
            print(f"Number of models in detailed metrics: {len(detailed_metrics)}")
            print(f"Models: {list(detailed_metrics.keys())}")
            
            # Analyze metrics for each model
            model_analysis = []
            
            for model_name, metrics in detailed_metrics.items():
                if isinstance(metrics, dict):
                    print(f"\nüìä {model_name}:")
                    
                    # Check for problematic values
                    problem_flags = []
                    
                    for metric_name, value in metrics.items():
                        if pd.isna(value) or np.isinf(value) or (isinstance(value, (int, float)) and abs(value) > 1e100):
                            problem_flags.append(metric_name)
                            print(f"  ‚ö†Ô∏è  {metric_name}: {value} (PROBLEMATIC)")
                        else:
                            print(f"  ‚úì {metric_name}: {value}")
                    
                    model_analysis.append({
                        'Model': model_name,
                        'Has_Problems': len(problem_flags) > 0,
                        'Problem_Metrics': problem_flags,
                        'R2': metrics.get('r2', metrics.get('R2', np.nan)),
                        'MAE': metrics.get('mae', metrics.get('MAE', np.nan)),
                        'RMSE': metrics.get('rmse', metrics.get('RMSE', np.nan))
                    })
            
            # Create summary DataFrame
            if model_analysis:
                analysis_df = pd.DataFrame(model_analysis)
                print("\n" + "="*60)
                print("MODEL PROBLEM SUMMARY")
                print("="*60)
                display(analysis_df)
                
                # Count problematic models
                problematic_models = analysis_df[analysis_df['Has_Problems']]
                print(f"\n‚ö†Ô∏è  {len(problematic_models)} models have problematic metrics")
                print(f"‚úÖ {len(analysis_df) - len(problematic_models)} models have valid metrics")
                
                # Show models without problems
                valid_models = analysis_df[~analysis_df['Has_Problems']]
                if len(valid_models) > 0:
                    print("\n‚úÖ Models with valid metrics:")
                    for _, row in valid_models.iterrows():
                        print(f"  - {row['Model']}: R¬≤={row['R2']:.4f}, MAE={row['MAE']:.2f}, RMSE={row['RMSE']:.2f}")
                
                # Show models with problems
                if len(problematic_models) > 0:
                    print("\n‚ö†Ô∏è  Models with problematic metrics:")
                    for _, row in problematic_models.iterrows():
                        print(f"  - {row['Model']}: Problems in {row['Problem_Metrics']}")


In [None]:

# %% [markdown]
# ## 6. Debug Model Issues

# %%
print("="*60)
print("MODEL ISSUE DEBUGGING")
print("="*60)

# Check if we have train/test data
print("\nüîç Checking for training and test data...")
train_path = DATA_PATH / "processed" / "train_data.csv"
test_path = DATA_PATH / "processed" / "test_data.csv"

if train_path.exists() and test_path.exists():
    train_df = pd.read_csv(train_path)
    test_df = pd.read_csv(test_path)
    
    print(f"Training data shape: {train_df.shape}")
    print(f"Test data shape: {test_df.shape}")
    
    # Check target variable
    target_cols = ['Log_TotalClaims', 'TotalClaims']
    available_targets = [col for col in target_cols if col in train_df.columns]
    
    if available_targets:
        target_col = available_targets[0]
        print(f"\nüéØ Target variable: {target_col}")
        
        # Check target distribution
        y_train = train_df[target_col]
        y_test = test_df[target_col]
        
        print(f"Training target stats:")
        print(f"  Min: {y_train.min():.2f}")
        print(f"  Max: {y_train.max():.2f}")
        print(f"  Mean: {y_train.mean():.2f}")
        print(f"  Std: {y_train.std():.2f}")
        
        print(f"\nTest target stats:")
        print(f"  Min: {y_test.min():.2f}")
        print(f"  Max: {y_test.max():.2f}")
        print(f"  Mean: {y_test.mean():.2f}")
        print(f"  Std: {y_test.std():.2f}")
        
        # Check for extreme values
        train_extreme = (y_train.abs() > 1e10).sum()
        test_extreme = (y_test.abs() > 1e10).sum()
        
        if train_extreme > 0 or test_extreme > 0:
            print(f"\n‚ö†Ô∏è  WARNING: Found extreme values in target!")
            print(f"  Training: {train_extreme} extreme values")
            print(f"  Test: {test_extreme} extreme values")
            
            # Show extreme values
            if train_extreme > 0:
                print(f"\n  Training extreme values:")
                extreme_train = y_train[y_train.abs() > 1e10]
                print(f"    {extreme_train.head()}")
            
            if test_extreme > 0:
                print(f"\n  Test extreme values:")
                extreme_test = y_test[y_test.abs() > 1e10]
                print(f"    {extreme_test.head()}")
    else:
        print("‚ùå No target variables found in training data")
else:
    print("‚ùå Training or test data not found")

# Check feature columns
print("\nüîç Checking feature columns...")
if train_path.exists():
    # Get feature columns (exclude target columns)
    feature_cols = [col for col in train_df.columns 
                   if col not in ['Log_TotalClaims', 'TotalClaims', 'HighClaim']]
    
    print(f"Number of features: {len(feature_cols)}")
    print(f"First 10 features: {feature_cols[:10]}")
    
    # Check for NaN/inf in features
    print("\nüîç Checking for NaN/Inf in features...")
    for col in feature_cols[:5]:  # Check first 5 features
        if col in train_df.columns:
            nan_count = train_df[col].isna().sum()
            inf_count = np.isinf(train_df[col]).sum() if train_df[col].dtype in ['float64', 'int64'] else 0
            if nan_count > 0 or inf_count > 0:
                print(f"  ‚ö†Ô∏è  {col}: {nan_count} NaN, {inf_count} Inf")
            else:
                print(f"  ‚úì {col}: No NaN/Inf")


In [None]:

# %% [markdown]
# ## 7. Try to Load and Test a Single Model

# %%
print("="*60)
print("TESTING INDIVIDUAL MODEL LOADING")
print("="*60)

# Try to load a simple model
test_model_name = "LinearRegression"
model_path = MODELS_PATH / f"{test_model_name}.pkl"

if model_path.exists():
    print(f"Testing {test_model_name} model...")
    try:
        with open(model_path, 'rb') as f:
            model = pickle.load(f)
        
        print(f"‚úÖ Successfully loaded {test_model_name} model")
        print(f"  Model type: {type(model)}")
        
        # Check if model has been fitted
        if hasattr(model, 'coef_'):
            print(f"  Model has coefficients: {len(model.coef_) if hasattr(model.coef_, '__len__') else 1}")
        if hasattr(model, 'feature_importances_'):
            print(f"  Model has feature importances")
        
        # Try to make a prediction
        if train_path.exists() and test_path.exists():
            # Prepare features
            X_train = train_df[feature_cols]
            X_test = test_df[feature_cols]
            
            # Check if preprocessor exists
            preprocessor_path = MODELS_PATH / "preprocessor.pkl"
            if preprocessor_path.exists():
                print(f"\nüîß Loading preprocessor...")
                with open(preprocessor_path, 'rb') as f:
                    preprocessor = pickle.load(f)
                
                print(f"‚úÖ Preprocessor loaded")
                
                # Transform features
                try:
                    X_train_transformed = preprocessor.transform(X_train)
                    X_test_transformed = preprocessor.transform(X_test)
                    
                    print(f"‚úÖ Features transformed successfully")
                    print(f"  Training features shape: {X_train_transformed.shape}")
                    print(f"  Test features shape: {X_test_transformed.shape}")
                    
                    # Make prediction
                    try:
                        y_pred = model.predict(X_test_transformed[:5])  # Predict on first 5 samples
                        print(f"\n‚úÖ Made predictions on 5 samples:")
                        print(f"  Predictions: {y_pred}")
                        
                        # Compare with actual
                        if available_targets:
                            y_actual = test_df[target_col].values[:5]
                            print(f"  Actual values: {y_actual}")
                            print(f"  Differences: {y_pred - y_actual}")
                        
                    except Exception as e:
                        print(f"‚ùå Error making predictions: {e}")
                        
                except Exception as e:
                    print(f"‚ùå Error transforming features: {e}")
            else:
                print(f"‚ùå Preprocessor not found at {preprocessor_path}")
        
    except Exception as e:
        print(f"‚ùå Error loading model: {e}")
        import traceback
        traceback.print_exc()
else:
    print(f"‚ùå Model {test_model_name} not found at {model_path}")


In [None]:

# %% [markdown]
# ## 8. Check Cross-Validation Results

# %%
if 'cv_results' in model_results:
    print("="*60)
    print("CROSS-VALIDATION RESULTS ANALYSIS")
    print("="*60)
    
    cv_data = model_results['cv_results']
    print(f"CV data type: {type(cv_data)}")
    
    if isinstance(cv_data, dict):
        print(f"Number of models in CV: {len(cv_data)}")
        
        # Analyze each model's CV results
        cv_summary = []
        
        for model_name, scores in cv_data.items():
            print(f"\nüìä {model_name}:")
            
            if isinstance(scores, dict):
                # Check for problematic values
                model_issues = []
                valid_metrics = []
                
                for metric_name, values in scores.items():
                    if isinstance(values, list):
                        # Check each value in the list
                        problem_values = []
                        for val in values:
                            if pd.isna(val) or np.isinf(val) or (isinstance(val, (int, float)) and abs(val) > 1e100):
                                problem_values.append(val)
                        
                        if problem_values:
                            model_issues.append(f"{metric_name} has {len(problem_values)} problematic values")
                        else:
                            valid_metrics.append(metric_name)
                
                if model_issues:
                    print(f"  ‚ö†Ô∏è  Issues: {', '.join(model_issues)}")
                if valid_metrics:
                    print(f"  ‚úÖ Valid metrics: {', '.join(valid_metrics)}")
                
                cv_summary.append({
                    'Model': model_name,
                    'Has_Issues': len(model_issues) > 0,
                    'Issues': model_issues,
                    'Valid_Metrics': valid_metrics
                })
        
        # Create summary
        if cv_summary:
            summary_df = pd.DataFrame(cv_summary)
            print("\n" + "="*60)
            print("CROSS-VALIDATION SUMMARY")
            print("="*60)
            display(summary_df)


In [None]:

# %% [markdown]
# ## 9. Root Cause Analysis

# %%
print("="*60)
print("ROOT CAUSE ANALYSIS")
print("="*60)

print("\nüîç Based on the analysis, here are potential issues:")
print("\n1. **Data Issues**:")
print("   ‚Ä¢ Target variable may have extreme values")
print("   ‚Ä¢ Features may contain NaN or infinite values")
print("   ‚Ä¢ Data may not be properly scaled")

print("\n2. **Model Training Issues**:")
print("   ‚Ä¢ Models may not have converged properly")
print("   ‚Ä¢ Hyperparameters may be poorly chosen")
print("   ‚Ä¢ Data leakage between train and test sets")

print("\n3. **Evaluation Issues**:")
print("   ‚Ä¢ Metrics calculation may have errors")
print("   ‚Ä¢ Predictions may be extremely large")
print("   ‚Ä¢ Log transformation issues")

print("\nüöÄ **Recommended Fixes**:")
print("\n1. **Check Data Preparation**:")
print("   - Verify that Log_TotalClaims was calculated correctly")
print("   - Check for and remove extreme outliers")
print("   - Ensure proper feature scaling")

print("\n2. **Retrain Models**:")
print("   - Use simpler models first (LinearRegression)")
print("   - Add regularization (Ridge, Lasso)")
print("   - Limit tree depths for tree-based models")

print("\n3. **Debug Step-by-Step**:")
print("   - Train one model at a time")
print("   - Check predictions after each step")
print("   - Verify metric calculations manually")


In [None]:

# %% [markdown]
# ## 10. Create Diagnostic Report

# %%
def create_diagnostic_report():
    """Create a diagnostic report of issues found"""
    print("="*60)
    print("DIAGNOSTIC REPORT")
    print("="*60)
    
    report = {
        "timestamp": pd.Timestamp.now().strftime("%Y-%m-%d %H:%M:%S"),
        "issues_found": [],
        "data_checks": {},
        "model_checks": {},
        "recommendations": []
    }
    
    # Data checks
    if 'claim_policies' in data_dict:
        df = data_dict['claim_policies']
        report["data_checks"]["dataset_shape"] = df.shape
        
        if 'TotalClaims' in df.columns:
            target_stats = df['TotalClaims'].describe().to_dict()
            report["data_checks"]["target_statistics"] = target_stats
            
            # Check for extreme values
            extreme_mask = (df['TotalClaims'].abs() > 1e10)
            if extreme_mask.any():
                report["issues_found"].append("Extreme values in TotalClaims")
                report["data_checks"]["extreme_values_count"] = int(extreme_mask.sum())
    
    # Model checks
    if 'detailed_metrics' in model_results:
        detailed_metrics = model_results['detailed_metrics']
        report["model_checks"]["models_evaluated"] = list(detailed_metrics.keys())
        
        # Check each model
        problematic_models = []
        for model_name, metrics in detailed_metrics.items():
            if isinstance(metrics, dict):
                for metric_name, value in metrics.items():
                    if pd.isna(value) or np.isinf(value) or (isinstance(value, (int, float)) and abs(value) > 1e100):
                        problematic_models.append(model_name)
                        break
        
        if problematic_models:
            report["issues_found"].append(f"Problematic metrics in {len(problematic_models)} models")
            report["model_checks"]["problematic_models"] = problematic_models
    
    # Recommendations
    report["recommendations"] = [
        "1. Check data preparation pipeline for errors",
        "2. Verify target variable transformation (Log_TotalClaims)",
        "3. Remove or cap extreme outliers",
        "4. Retrain models with proper regularization",
        "5. Start with simple LinearRegression as baseline"
    ]
    
    # Save report
    report_path = RESULTS_PATH / "model_diagnostic_report.json"
    with open(report_path, 'w') as f:
        json.dump(report, f, indent=2, default=str)
    
    print(f"\n‚úÖ Diagnostic report saved to: {report_path}")
    
    # Display summary
    print("\nüìã Report Summary:")
    print(f"  Issues found: {len(report['issues_found'])}")
    for issue in report['issues_found']:
        print(f"  ‚Ä¢ {issue}")
    
    print("\nüí° Recommendations:")
    for rec in report['recommendations']:
        print(f"  {rec}")
    
    return report

# Create diagnostic report
diagnostic_report = create_diagnostic_report()


In [None]:

# %% [markdown]
# ## 11. Quick Fix: Recalculate Metrics Manually

# %%
print("="*60)
print("QUICK FIX: MANUAL METRIC CALCULATION")
print("="*60)

# Try to recalculate metrics for one model
if train_path.exists() and test_path.exists() and 'preprocessor.pkl' in [p.name for p in MODELS_PATH.glob('*.pkl')]:
    print("\nAttempting to recalculate metrics manually...")
    
    # Load data
    train_df = pd.read_csv(train_path)
    test_df = pd.read_csv(test_path)
    
    # Get target and features
    target_col = 'Log_TotalClaims' if 'Log_TotalClaims' in test_df.columns else 'TotalClaims'
    feature_cols = [col for col in test_df.columns 
                   if col not in ['Log_TotalClaims', 'TotalClaims', 'HighClaim']]
    
    if target_col in test_df.columns and feature_cols:
        X_test = test_df[feature_cols]
        y_true = test_df[target_col]
        
        # Load preprocessor
        with open(MODELS_PATH / "preprocessor.pkl", 'rb') as f:
            preprocessor = pickle.load(f)
        
        # Transform features
        X_test_transformed = preprocessor.transform(X_test)
        
        # Test each model
        model_files = list(MODELS_PATH.glob("*.pkl"))
        model_files = [f for f in model_files if f.name != "preprocessor.pkl"]
        
        print(f"\nFound {len(model_files)} model files to test")
        
        recalculated_metrics = {}
        
        for model_file in model_files[:3]:  # Test first 3 models
            model_name = model_file.stem
            print(f"\nüß™ Testing {model_name}...")
            
            try:
                # Load model
                with open(model_file, 'rb') as f:
                    model = pickle.load(f)
                
                # Make predictions
                y_pred = model.predict(X_test_transformed)
                
                # Calculate metrics
                r2 = r2_score(y_true, y_pred)
                mae = mean_absolute_error(y_true, y_pred)
                rmse = np.sqrt(mean_squared_error(y_true, y_pred))
                
                # Check for problems
                if pd.isna(r2) or np.isinf(r2) or abs(r2) > 1e10:
                    print(f"  ‚ö†Ô∏è  R¬≤ problematic: {r2}")
                else:
                    print(f"  ‚úÖ R¬≤: {r2:.4f}")
                
                if pd.isna(mae) or np.isinf(mae) or abs(mae) > 1e10:
                    print(f"  ‚ö†Ô∏è  MAE problematic: {mae}")
                else:
                    print(f"  ‚úÖ MAE: {mae:.4f}")
                
                if pd.isna(rmse) or np.isinf(rmse) or abs(rmse) > 1e10:
                    print(f"  ‚ö†Ô∏è  RMSE problematic: {rmse}")
                else:
                    print(f"  ‚úÖ RMSE: {rmse:.4f}")
                
                # Store if metrics are reasonable
                if not (pd.isna(r2) or np.isinf(r2) or abs(r2) > 1e10):
                    recalculated_metrics[model_name] = {
                        'r2': float(r2),
                        'mae': float(mae),
                        'rmse': float(rmse)
                    }
                    
            except Exception as e:
                print(f"  ‚ùå Error testing {model_name}: {e}")
        
        # Display recalculated metrics
        if recalculated_metrics:
            print("\n" + "="*60)
            print("RECALCULATED METRICS (VALID MODELS)")
            print("="*60)
            
            recalc_df = pd.DataFrame(recalculated_metrics).T
            recalc_df = recalc_df.sort_values('r2', ascending=False)
            display(recalc_df)
            
            # Save recalculated metrics
            recalc_path = MODELS_PATH / "recalculated_metrics.json"
            with open(recalc_path, 'w') as f:
                json.dump(recalculated_metrics, f, indent=2)
            
            print(f"\n‚úÖ Recalculated metrics saved to: {recalc_path}")
        else:
            print("\n‚ùå No models produced valid metrics")
    else:
        print("‚ùå Required columns not found")
else:
    print("‚ùå Required files not found for recalculation")


In [None]:

# %% [markdown]
# ## 12. Next Steps

# %%
print("="*60)
print("NEXT STEPS & ACTION PLAN")
print("="*60)

print("\nüö® **CRITICAL ISSUES IDENTIFIED**:")
print("1. Models are producing infinite/NaN metrics")
print("2. This indicates serious problems in data, training, or evaluation")
print("3. The current results cannot be used for business decisions")

print("\nüîß **IMMEDIATE ACTIONS REQUIRED**:")
print("\n1. **Fix Data Issues**:")
print("   - Check the data_preparation.py script")
print("   - Verify Log_TotalClaims calculation")
print("   - Remove extreme outliers")
print("   - Code to run: python src/data_preparation.py --debug")

print("\n2. **Retrain Models**:")
print("   - Start with LinearRegression as baseline")
print("   - Add proper regularization")
print("   - Use smaller dataset for debugging")
print("   - Code to run: python src/modelling/main.py --simple")

print("\n3. **Debug Step-by-Step**:")
print("   - Train one model at a time")
print("   - Print intermediate predictions")
print("   - Verify each calculation")

print("\nüìä **VALIDATION CHECKLIST**:")
print("‚úÖ Data has no NaN/Inf values")
print("‚úÖ Target variable is properly transformed")
print("‚úÖ Features are properly scaled")
print("‚úÖ Models converge during training")
print("‚úÖ Predictions are reasonable (not extreme)")
print("‚úÖ Metrics are calculated correctly")

print("\nüí° **TROUBLESHOOTING TIPS**:")
print("‚Ä¢ Start with a small sample of data (1000 rows)")
print("‚Ä¢ Use simple LinearRegression first")
print("‚Ä¢ Print shapes and values at each step")
print("‚Ä¢ Compare predictions with actual values")
print("‚Ä¢ Check for data leakage")

print("\n" + "="*60)
print("END OF DIAGNOSTIC ANALYSIS")
print("="*60)