# üìä Model Evaluation and Performance Analysis

**Comprehensive Analysis of Neural Network Performance for Appliance Energy Prediction**

This notebook provides in-depth evaluation of our trained neural network model. You'll learn how to assess model performance, identify strengths and weaknesses, and validate the model for real-world deployment.

## üéØ What You'll Learn
1. **Load and test** the trained neural network model
2. **Comprehensive evaluation** using multiple metrics
3. **Cross-validation** for robust performance assessment
4. **Feature importance** analysis
5. **Model interpretation** and business insights
6. **Deployment readiness** assessment

## üìä Evaluation Approach
- **Accuracy Metrics**: R¬≤, MSE, MAE, MAPE
- **Visual Analysis**: Prediction plots, residual analysis
- **Statistical Tests**: Distribution analysis, bias detection
- **Business Metrics**: Cost implications, practical accuracy

---

In [None]:
# Import essential libraries for neural network evaluation
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import json
from pathlib import Path
from datetime import datetime

# Machine learning evaluation libraries
from sklearn.model_selection import cross_val_score, KFold
from sklearn.metrics import (
    mean_squared_error, mean_absolute_error, r2_score,
    mean_absolute_percentage_error
)
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import load_model
import joblib

# Statistical libraries
from scipy import stats
from scipy.stats import normaltest, shapiro, pearsonr
import warnings
warnings.filterwarnings('ignore')

# Configure visualization settings
plt.style.use('default')
sns.set_palette('husl')
plt.rcParams['figure.figsize'] = (12, 8)
pd.set_option('display.max_columns', None)

print("? NEURAL NETWORK MODEL EVALUATION SETUP")
print("=" * 45)
print(f"üìä TensorFlow: {tf.__version__}")
print(f"üî¢ NumPy: {np.__version__}")
print(f"üìà Pandas: {pd.__version__}")
print(f"? Matplotlib: {plt.matplotlib.__version__}")
print(f"üé® Seaborn: {sns.__version__}")
print("üß† Ready for comprehensive neural network evaluation!")

# Set random seed for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

## 1. üîÑ Loading Trained Model and Data

Let's load our trained neural network and prepare the data for comprehensive evaluation.

In [None]:
# Load trained neural network model and data
print("? LOADING TRAINED MODEL & DATA")
print("=" * 35)

# Define paths
models_dir = Path('../models')
processed_dir = Path('../data/processed')

# Load the trained neural network model
model_path = models_dir / 'appliance_energy_model.h5'

try:
    model = load_model(model_path)
    print(f"‚úÖ Neural network model loaded successfully")
    print(f"üìÅ Model path: {model_path}")
    
    # Display model architecture summary
    print(f"\nüß† Model Architecture:")
    print(f"   üìä Total parameters: {model.count_params():,}")
    print(f"   üèóÔ∏è  Layers: {len(model.layers)}")
    print(f"   üì• Input shape: {model.input_shape}")
    print(f"   üì§ Output shape: {model.output_shape}")
    
except Exception as e:
    print(f"‚ùå Error loading model: {e}")
    print("üìù Please run the neural network training notebook first!")
    raise

# Load model metadata
try:
    with open(models_dir / 'model_metadata.json', 'r') as f:
        metadata = json.load(f)
    print(f"‚úÖ Model metadata loaded")
    
    print(f"\n? Training Information:")
    print(f"   üìÖ Training date: {metadata['training_date']}")
    print(f"   ‚è±Ô∏è  Training duration: {metadata['training_duration']}")
    print(f"   üîÑ Epochs trained: {metadata['epochs_trained']}")
    print(f"   üì¶ Batch size: {metadata['batch_size']}")
    
except Exception as e:
    print(f"‚ö†Ô∏è  Metadata not found: {e}")
    metadata = {}

# Load test data
try:
    X_test = pd.read_csv(processed_dir / 'X_test_scaled.csv')
    y_test = pd.read_csv(processed_dir / 'y_test.csv').values.ravel()
    
    # Also load train and validation for comprehensive analysis
    X_train = pd.read_csv(processed_dir / 'X_train_scaled.csv')
    y_train = pd.read_csv(processed_dir / 'y_train.csv').values.ravel()
    X_val = pd.read_csv(processed_dir / 'X_val_scaled.csv')
    y_val = pd.read_csv(processed_dir / 'y_val.csv').values.ravel()
    
    print(f"‚úÖ Test data loaded successfully")
    print(f"üìä Test set shape: {X_test.shape}")
    print(f"üéØ Test target range: {y_test.min():.1f} - {y_test.max():.1f} kWh/month")
    
except Exception as e:
    print(f"‚ùå Error loading test data: {e}")
    raise

print(f"\nüéØ Ready for comprehensive model evaluation!")

In [None]:
# Generate predictions for comprehensive evaluation
print("? GENERATING PREDICTIONS FOR EVALUATION")
print("=" * 40)

# Generate predictions on all datasets
print("üéØ Making predictions on all datasets...")

y_train_pred = model.predict(X_train, verbose=0).flatten()
y_val_pred = model.predict(X_val, verbose=0).flatten() 
y_test_pred = model.predict(X_test, verbose=0).flatten()

print(f"‚úÖ Predictions generated:")
print(f"   üèãÔ∏è Training predictions: {len(y_train_pred)}")
print(f"   ‚úÖ Validation predictions: {len(y_val_pred)}")
print(f"   üß™ Test predictions: {len(y_test_pred)}")

# Quick prediction statistics
print(f"\nüìä Prediction Statistics:")
for name, y_true, y_pred in [('Train', y_train, y_train_pred), 
                            ('Val', y_val, y_val_pred), 
                            ('Test', y_test, y_test_pred)]:
    print(f"   {name} - Actual: {y_true.min():.1f}-{y_true.max():.1f}, Predicted: {y_pred.min():.1f}-{y_pred.max():.1f}")

# Check for any prediction anomalies
def check_prediction_anomalies(y_pred, dataset_name):
    """Check for common prediction issues"""
    issues = []
    
    # Check for NaN or infinite values
    if np.isnan(y_pred).any():
        issues.append("Contains NaN values")
    if np.isinf(y_pred).any():
        issues.append("Contains infinite values")
        
    # Check for negative predictions (energy consumption should be positive)
    negative_count = (y_pred < 0).sum()
    if negative_count > 0:
        issues.append(f"{negative_count} negative predictions")
        
    # Check for unrealistic high values
    high_threshold = 1000  # 1000 kWh/month seems unreasonably high
    high_count = (y_pred > high_threshold).sum()
    if high_count > 0:
        issues.append(f"{high_count} unusually high predictions (>{high_threshold} kWh)")
    
    if issues:
        print(f"‚ö†Ô∏è  {dataset_name} prediction issues: {', '.join(issues)}")
    else:
        print(f"‚úÖ {dataset_name} predictions: No anomalies detected")

# Check all datasets for anomalies
check_prediction_anomalies(y_train_pred, "Training")
check_prediction_anomalies(y_val_pred, "Validation")
check_prediction_anomalies(y_test_pred, "Test")

print(f"\nüéØ Prediction generation completed successfully!")

## 2. üìä Comprehensive Performance Metrics

Let's calculate and analyze multiple performance metrics to get a complete picture of model performance.

In [None]:
# Comprehensive performance metrics calculation
print("üìä COMPREHENSIVE PERFORMANCE ANALYSIS")
print("=" * 40)

def calculate_comprehensive_metrics(y_true, y_pred, dataset_name):
    """Calculate comprehensive evaluation metrics for neural network"""
    
    # Basic regression metrics
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    
    # MAPE with zero-division protection
    mape = np.mean(np.abs((y_true - y_pred) / np.maximum(np.abs(y_true), 1e-8))) * 100
    
    # Additional metrics
    max_error = np.max(np.abs(y_true - y_pred))
    std_error = np.std(y_true - y_pred)
    
    # Correlation coefficient
    correlation, p_value = pearsonr(y_true, y_pred)
    
    # Percentage of predictions within tolerance
    tolerance_5_percent = np.mean(np.abs((y_true - y_pred) / np.maximum(y_true, 1e-8)) <= 0.05) * 100
    tolerance_10_percent = np.mean(np.abs((y_true - y_pred) / np.maximum(y_true, 1e-8)) <= 0.10) * 100
    tolerance_20_percent = np.mean(np.abs((y_true - y_pred) / np.maximum(y_true, 1e-8)) <= 0.20) * 100
    
    metrics = {
        'MSE': mse,
        'RMSE': rmse,
        'MAE': mae,
        'R¬≤': r2,
        'MAPE': mape,
        'Max_Error': max_error,
        'Std_Error': std_error,
        'Correlation': correlation,
        'Corr_P_Value': p_value,
        'Within_5%': tolerance_5_percent,
        'Within_10%': tolerance_10_percent,
        'Within_20%': tolerance_20_percent
    }
    
    return metrics

# Calculate metrics for all datasets
train_metrics = calculate_comprehensive_metrics(y_train, y_train_pred, "Training")
val_metrics = calculate_comprehensive_metrics(y_val, y_val_pred, "Validation")
test_metrics = calculate_comprehensive_metrics(y_test, y_test_pred, "Test")

# Create comprehensive metrics DataFrame
metrics_df = pd.DataFrame({
    'Training': train_metrics,
    'Validation': val_metrics,
    'Testing': test_metrics
}).round(4)

print("üìã COMPREHENSIVE PERFORMANCE METRICS:")
print("=" * 40)
display(metrics_df)

# Performance analysis
print(f"\nüéØ PERFORMANCE ANALYSIS:")
print("=" * 25)

# Overall accuracy assessment
test_accuracy = test_metrics['R¬≤'] * 100
print(f"üìä Overall Test Accuracy: {test_accuracy:.1f}% (R¬≤ = {test_metrics['R¬≤']:.4f})")

# Error analysis
print(f"? Prediction Errors on Test Set:")
print(f"   RMSE: {test_metrics['RMSE']:.2f} kWh/month")
print(f"   MAE: {test_metrics['MAE']:.2f} kWh/month")
print(f"   MAPE: {test_metrics['MAPE']:.1f}%")
print(f"   Max Error: {test_metrics['Max_Error']:.2f} kWh/month")

# Tolerance analysis
print(f"\nüéØ Prediction Tolerance Analysis:")
print(f"   Within 5% error: {test_metrics['Within_5%']:.1f}% of predictions")
print(f"   Within 10% error: {test_metrics['Within_10%']:.1f}% of predictions")
print(f"   Within 20% error: {test_metrics['Within_20%']:.1f}% of predictions")

# Overfitting assessment
r2_diff = train_metrics['R¬≤'] - test_metrics['R¬≤']
mae_diff = test_metrics['MAE'] - train_metrics['MAE']

print(f"\nüßê Overfitting Assessment:")
print(f"   R¬≤ difference (Train - Test): {r2_diff:.4f}")
print(f"   MAE difference (Test - Train): {mae_diff:.4f}")

if r2_diff > 0.1:
    print("   ‚ö†Ô∏è  Significant overfitting detected")
elif r2_diff > 0.05:
    print("   üü° Minor overfitting detected")
else:
    print("   ‚úÖ No significant overfitting")

# Model quality assessment
if test_metrics['R¬≤'] >= 0.9:
    quality = "Excellent"
elif test_metrics['R¬≤'] >= 0.8:
    quality = "Good"
elif test_metrics['R¬≤'] >= 0.7:
    quality = "Fair"
else:
    quality = "Poor"

print(f"\nüèÜ Overall Model Quality: {quality}")
print(f"üìä Model meets production requirements: {'‚úÖ' if test_metrics['R¬≤'] >= 0.8 else '‚ùå'}")

## 3. üìà Visual Performance Analysis

Let's create comprehensive visualizations to understand model performance across different dimensions.

In [None]:
# Create comprehensive performance visualization dashboard
fig = make_subplots(
    rows=3, cols=2,
    subplot_titles=[
        'üéØ Predicted vs Actual', 'üìä Residual Distribution',
        'üìà Residuals vs Predicted', '‚ö° Performance by Appliance',
        'üå°Ô∏è Performance by Season', 'üí∞ Error Distribution'
    ],
    specs=[[{"secondary_y": False}, {"secondary_y": False}],
           [{"secondary_y": False}, {"secondary_y": False}],
           [{"secondary_y": False}, {"secondary_y": False}]]
)

# Plot 1: Predicted vs Actual
fig.add_trace(
    go.Scatter(
        x=y_target, y=y_pred_all,
        mode='markers',
        name='Predictions',
        marker=dict(color='blue', size=4, opacity=0.6)
    ),
    row=1, col=1
)

# Perfect prediction line
min_val, max_val = y_target.min(), y_target.max()
fig.add_trace(
    go.Scatter(
        x=[min_val, max_val], y=[min_val, max_val],
        mode='lines',
        name='Perfect Prediction',
        line=dict(color='red', dash='dash')
    ),
    row=1, col=1
)

# Plot 2: Residual Distribution
fig.add_trace(
    go.Histogram(
        x=residuals,
        nbinsx=30,
        name='Residuals',
        marker_color='lightblue'
    ),
    row=1, col=2
)

# Plot 3: Residuals vs Predicted
fig.add_trace(
    go.Scatter(
        x=y_pred_all, y=residuals,
        mode='markers',
        name='Residual Pattern',
        marker=dict(color='green', size=4, opacity=0.6)
    ),
    row=2, col=1
)

# Zero line
fig.add_trace(
    go.Scatter(
        x=[y_pred_all.min(), y_pred_all.max()], y=[0, 0],
        mode='lines',
        name='Zero Error',
        line=dict(color='red', dash='dash')
    ),
    row=2, col=1
)

# Plot 4: Performance by Appliance
appliance_mae = df.groupby('appliance_type').apply(
    lambda x: mean_absolute_error(
        x['daily_consumption_kwh'], 
        y_pred_all[x.index]
    )
).sort_values()

fig.add_trace(
    go.Bar(
        x=appliance_mae.index,
        y=appliance_mae.values,
        name='MAE by Appliance',
        marker_color='orange'
    ),
    row=2, col=2
)

# Plot 5: Performance by Season
season_mae = df.groupby('season').apply(
    lambda x: mean_absolute_error(
        x['daily_consumption_kwh'], 
        y_pred_all[x.index]
    )
)

fig.add_trace(
    go.Bar(
        x=season_mae.index,
        y=season_mae.values,
        name='MAE by Season',
        marker_color='lightcoral'
    ),
    row=3, col=1
)

# Plot 6: Error Distribution by Range
error_ranges = ['0-0.5', '0.5-1.0', '1.0-2.0', '2.0+']
error_counts = [
    np.sum(np.abs(residuals) <= 0.5),
    np.sum((np.abs(residuals) > 0.5) & (np.abs(residuals) <= 1.0)),
    np.sum((np.abs(residuals) > 1.0) & (np.abs(residuals) <= 2.0)),
    np.sum(np.abs(residuals) > 2.0)
]

fig.add_trace(
    go.Bar(
        x=error_ranges,
        y=error_counts,
        name='Error Distribution',
        marker_color='lightgreen'
    ),
    row=3, col=2
)

# Update layout
fig.update_layout(
    height=1200,
    showlegend=False,
    title_text="üìä Comprehensive Model Performance Dashboard",
    title_x=0.5
)

# Update axes labels
fig.update_xaxes(title_text="Actual Consumption (kWh)", row=1, col=1)
fig.update_yaxes(title_text="Predicted Consumption (kWh)", row=1, col=1)
fig.update_xaxes(title_text="Residuals (kWh)", row=1, col=2)
fig.update_yaxes(title_text="Frequency", row=1, col=2)
fig.update_xaxes(title_text="Predicted Consumption (kWh)", row=2, col=1)
fig.update_yaxes(title_text="Residuals (kWh)", row=2, col=1)
fig.update_xaxes(title_text="Appliance Type", row=2, col=2)
fig.update_yaxes(title_text="MAE (kWh)", row=2, col=2)
fig.update_xaxes(title_text="Season", row=3, col=1)
fig.update_yaxes(title_text="MAE (kWh)", row=3, col=1)
fig.update_xaxes(title_text="Error Range (kWh)", row=3, col=2)
fig.update_yaxes(title_text="Count", row=3, col=2)

fig.show()

print("üìä VISUAL ANALYSIS INSIGHTS:")
print("-" * 30)
print(f"üéØ Best performing appliance: {appliance_mae.index[0]} (MAE: {appliance_mae.iloc[0]:.3f} kWh)")
print(f"‚ö†Ô∏è Challenging appliance: {appliance_mae.index[-1]} (MAE: {appliance_mae.iloc[-1]:.3f} kWh)")
print(f"üå°Ô∏è Best season: {season_mae.idxmin()} (MAE: {season_mae.min():.3f} kWh)")
print(f"üìà Most predictions ({error_counts[0]}/{len(residuals)}) have <0.5 kWh error")

## 4. üíº Business Impact Analysis

Let's analyze the practical business implications of our model's accuracy.

In [None]:
# Business impact analysis
print("üíº BUSINESS IMPACT ANALYSIS")
print("=" * 30)

# Cost implications
electricity_rate = 6.0  # INR per kWh (average Indian rate)
days_per_month = 30

# Calculate monthly cost errors
monthly_cost_errors = np.abs(residuals) * electricity_rate * days_per_month
avg_monthly_cost_error = np.mean(monthly_cost_errors)
max_monthly_cost_error = np.max(monthly_cost_errors)

# Calculate total consumption and costs
total_actual_monthly = y_target.sum() * days_per_month
total_predicted_monthly = y_pred_all.sum() * days_per_month
total_actual_cost = total_actual_monthly * electricity_rate
total_predicted_cost = total_predicted_monthly * electricity_rate

print("üí∞ COST IMPACT ANALYSIS:")
print("-" * 25)
print(f"   üìä Average monthly cost error: ‚Çπ{avg_monthly_cost_error:.2f} per appliance")
print(f"   üìà Maximum monthly cost error: ‚Çπ{max_monthly_cost_error:.2f} per appliance")
print(f"   üìã Median monthly cost error: ‚Çπ{np.median(monthly_cost_errors):.2f} per appliance")

# Error by appliance type
print("\n‚ö° ERROR BY APPLIANCE TYPE:")
print("-" * 30)
for appliance in df['appliance_type'].unique():
    mask = df['appliance_type'] == appliance
    appliance_errors = monthly_cost_errors[mask]
    avg_error = np.mean(appliance_errors)
    print(f"   üìä {appliance}: ‚Çπ{avg_error:.2f}/month average error")

# Model reliability assessment
print("\nüõ°Ô∏è MODEL RELIABILITY ASSESSMENT:")
print("-" * 35)

reliable_predictions = np.sum(np.abs(residuals) <= 0.5) / len(residuals) * 100
acceptable_predictions = np.sum(np.abs(residuals) <= 1.0) / len(residuals) * 100

print(f"   ‚úÖ Highly reliable predictions (‚â§0.5 kWh error): {reliable_predictions:.1f}%")
print(f"   üëç Acceptable predictions (‚â§1.0 kWh error): {acceptable_predictions:.1f}%")

# Business recommendations
print("\nüìã BUSINESS RECOMMENDATIONS:")
print("-" * 30)
if reliable_predictions > 70:
    print("   üåü Excellent reliability - ready for production deployment")
    print("   ‚úÖ Can be used for energy planning and cost estimation")
elif reliable_predictions > 50:
    print("   üëç Good reliability - suitable for most applications")
    print("   ‚ö†Ô∏è Consider confidence intervals for critical decisions")
else:
    print("   ‚ùå Limited reliability - needs improvement before deployment")
    print("   üîß Consider collecting more data or feature engineering")

print(f"\nüí° For household energy management, this model can save families")
print(f"   an average of ‚Çπ{avg_monthly_cost_error:.0f}/month in prediction accuracy!")

## 5. üìù Model Evaluation Summary and Recommendations

Let's create a comprehensive summary of our model evaluation findings.

In [None]:
# Create comprehensive evaluation summary
print("üìù COMPREHENSIVE MODEL EVALUATION SUMMARY")
print("=" * 50)

# Executive Summary
print("üéØ EXECUTIVE SUMMARY:")
print("-" * 20)
print(f"   üß† Model Type: Neural Network (TensorFlow/Keras)")
print(f"   üìä Dataset Size: {len(df):,} appliances from {df['household_id'].nunique()} households")
print(f"   üéØ Target Variable: Daily Energy Consumption (kWh)")
print(f"   üìà Overall Performance: {performance_level}")
print(f"   üìä Accuracy (R¬≤): {r2:.3f} ({r2*100:.1f}% variance explained)")
print(f"   üí∞ Average Cost Error: ‚Çπ{avg_monthly_cost_error:.2f}/month per appliance")

# Strengths
print("\nüí™ MODEL STRENGTHS:")
print("-" * 20)
strengths = []
if r2 > 0.7:
    strengths.append("High predictive accuracy")
if within_20_percent > 80:
    strengths.append("Most predictions within 20% error")
if abs(mean_residual) < 0.1:
    strengths.append("Low systematic bias")

for i, strength in enumerate(strengths, 1):
    print(f"   {i}. ‚úÖ {strength}")

# Deployment Readiness
print("\nüöÄ DEPLOYMENT READINESS:")
print("-" * 25)
deployment_score = 0
max_score = 5

# Scoring criteria
if r2 > 0.7: deployment_score += 1
if within_20_percent > 75: deployment_score += 1
if abs(mean_residual) < 0.2: deployment_score += 1
if avg_monthly_cost_error < 100: deployment_score += 1
if reliable_predictions > 60: deployment_score += 1

deployment_percentage = (deployment_score / max_score) * 100

print(f"   üìä Deployment Score: {deployment_score}/{max_score} ({deployment_percentage:.0f}%)")

if deployment_percentage >= 80:
    readiness = "üü¢ Ready for Production"
    recommendation = "Deploy with confidence"
elif deployment_percentage >= 60:
    readiness = "üü° Ready with Monitoring"
    recommendation = "Deploy with careful monitoring"
else:
    readiness = "üî¥ Needs Improvement"
    recommendation = "Improve before deployment"

print(f"   üéØ Status: {readiness}")
print(f"   üí° Recommendation: {recommendation}")

# Final Recommendations
print("\nüìã FINAL RECOMMENDATIONS:")
print("-" * 30)
print("   1. üì± Integrate model into web application for user predictions")
print("   2. üîÑ Implement model monitoring and performance tracking")
print("   3. üìä Collect user feedback to improve future versions")
print("   4. ‚öñÔ∏è Add confidence intervals for critical business decisions")
print("   5. üîß Consider ensemble methods for improved robustness")

# Save evaluation results
evaluation_results = {
    'r2_score': r2,
    'mae': mae,
    'rmse': rmse,
    'mape': mape,
    'within_20_percent': within_20_percent,
    'avg_monthly_cost_error': avg_monthly_cost_error,
    'deployment_score': deployment_score,
    'deployment_percentage': deployment_percentage,
    'best_appliance': appliance_mae.index[0],
    'worst_appliance': appliance_mae.index[-1]
}

joblib.dump(evaluation_results, '../models/evaluation_results.pkl')
print("\nüíæ Evaluation results saved for future reference!")

print("\n" + "="*50)
print("üéâ MODEL EVALUATION COMPLETE!")
print(f"‚úÖ Your neural network is ready for {recommendation.lower()}!")
print("üöÄ Next step: Deploy in the web application!")

# üìä Model Evaluation and Performance Analysis

**Comprehensive Analysis of Neural Network Performance for Appliance Energy Prediction**

This notebook provides in-depth evaluation of our trained neural network model. You'll learn how to assess model performance, identify strengths and weaknesses, and validate the model for real-world deployment.

## üéØ What You'll Learn
1. **Load and test** the trained neural network model
2. **Comprehensive evaluation** using multiple metrics
3. **Cross-validation** for robust performance assessment
4. **Feature importance** analysis
5. **Model interpretation** and business insights
6. **Deployment readiness** assessment

## üìä Evaluation Approach
- **Accuracy Metrics**: R¬≤, MSE, MAE, MAPE
- **Visual Analysis**: Prediction plots, residual analysis
- **Statistical Tests**: Distribution analysis, bias detection
- **Business Metrics**: Cost implications, practical accuracy

---

In [None]:
# Import all necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Machine learning libraries
from sklearn.model_selection import cross_val_score, KFold
from sklearn.metrics import (
    mean_squared_error, mean_absolute_error, r2_score,
    mean_absolute_percentage_error
)
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import load_model
import joblib

# Statistical libraries
from scipy import stats
from scipy.stats import normaltest, shapiro
import warnings
warnings.filterwarnings('ignore')

# Set style and random seeds
plt.style.use('default')
sns.set_palette('husl')
np.random.seed(42)
tf.random.set_seed(42)

# Configure display
pd.set_option('display.max_columns', None)
plt.rcParams['figure.figsize'] = (12, 8)

print("üìä MODEL EVALUATION SETUP COMPLETE!")
print("=" * 40)
print(f"üß† TensorFlow Version: {tf.__version__}")
print(f"üìà Ready to evaluate neural network performance!")