In [1]:
# Model Comparison and Evaluation
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import joblib
import tensorflow as tf
from tensorflow.keras.models import load_model

print("Evaluation setup complete!")

Evaluation setup complete!


In [2]:
# Load predictions from all models (you'll add these as team members complete)
# For now, we have your Dense model predictions

# Dense Model (YOURS - already exists)
dense_results = pd.read_csv('dense_model_predictions.csv')
print("Dense model predictions loaded")

# Create placeholders for other models (team will add these)
try:
    lstm_results = pd.read_csv('lstm_model_predictions.csv')
    print("LSTM model predictions loaded")
except:
    lstm_results = None
    print("LSTM predictions not available yet")

try:
    cnn_results = pd.read_csv('cnn_model_predictions.csv')
    print("CNN model predictions loaded")
except:
    cnn_results = None
    print(" CNN predictions not available yet")

try:
    gru_results = pd.read_csv('gru_model_predictions.csv')
    print(" GRU model predictions loaded")
except:
    gru_results = None
    print(" GRU predictions not available yet")

print(f"\n Dense model samples: {len(dense_results)}")

FileNotFoundError: [Errno 2] No such file or directory: 'dense_model_predictions.csv'

In [None]:
# Function to calculate all metrics
def calculate_metrics(actual, predicted, model_name):
    mae = mean_absolute_error(actual, predicted)
    rmse = np.sqrt(mean_squared_error(actual, predicted))
    r2 = r2_score(actual, predicted)

    return {
        'Model': model_name,
        'MAE': round(mae, 4),
        'RMSE': round(rmse, 4),
        'R²': round(r2, 4)
    }

# Calculate metrics for available models
metrics_data = []

# Dense Model
metrics_data.append(calculate_metrics(
    dense_results['actual'],
    dense_results['predicted'],
    'Dense'
))

# LSTM Model
if lstm_results is not None:
    metrics_data.append(calculate_metrics(
        lstm_results['actual'],
        lstm_results['predicted'],
        'LSTM'
    ))

# CNN Model
if cnn_results is not None:
    metrics_data.append(calculate_metrics(
        cnn_results['actual'],
        cnn_results['predicted'],
        'CNN'
    ))

# GRU Model
if gru_results is not None:
    metrics_data.append(calculate_metrics(
        gru_results['actual'],
        gru_results['predicted'],
        'GRU'
    ))

# Create comparison DataFrame
comparison_df = pd.DataFrame(metrics_data)
print("MODEL COMPARISON METRICS:")
print(comparison_df)

In [None]:
# Bar chart comparison of metrics
plt.figure(figsize=(15, 5))

# MAE Comparison
plt.subplot(1, 3, 1)
plt.bar(comparison_df['Model'], comparison_df['MAE'], color=['blue', 'orange', 'green', 'red'][:len(comparison_df)])
plt.title('MAE Comparison (Lower is Better)', fontweight='bold')
plt.ylabel('Mean Absolute Error')
plt.xticks(rotation=45)
plt.grid(True, alpha=0.3)

# RMSE Comparison
plt.subplot(1, 3, 2)
plt.bar(comparison_df['Model'], comparison_df['RMSE'], color=['blue', 'orange', 'green', 'red'][:len(comparison_df)])
plt.title('RMSE Comparison (Lower is Better)', fontweight='bold')
plt.ylabel('Root Mean Squared Error')
plt.xticks(rotation=45)
plt.grid(True, alpha=0.3)

# R² Comparison
plt.subplot(1, 3, 3)
plt.bar(comparison_df['Model'], comparison_df['R²'], color=['blue', 'orange', 'green', 'red'][:len(comparison_df)])
plt.title('R² Comparison (Higher is Better)', fontweight='bold')
plt.ylabel('R-squared Score')
plt.xticks(rotation=45)
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
# Plot actual vs predictions for all models
plt.figure(figsize=(15, 10))

# Plot 1: All models predictions
plt.subplot(2, 1, 1)
sample_range = range(min(30, len(dense_results)))

# Plot actual values
plt.plot(sample_range, dense_results['actual'].values[:30],
         label='Actual', color='black', linewidth=3, marker='o')

# Plot model predictions
plt.plot(sample_range, dense_results['predicted'].values[:30],
         label='Dense', alpha=0.8, marker='s')

if lstm_results is not None:
    plt.plot(sample_range, lstm_results['predicted'].values[:30],
             label='LSTM', alpha=0.8, marker='^')

if cnn_results is not None:
    plt.plot(sample_range, cnn_results['predicted'].values[:30],
             label='CNN', alpha=0.8, marker='d')

if gru_results is not None:
    plt.plot(sample_range, gru_results['predicted'].values[:30],
             label='GRU', alpha=0.8, marker='v')

plt.title('Model Predictions Comparison (First 30 Samples)', fontsize=14, fontweight='bold')
plt.xlabel('Test Samples')
plt.ylabel('Capacity')
plt.legend()
plt.grid(True, alpha=0.3)

# Plot 2: Error distribution
plt.subplot(2, 1, 2)
errors_data = []

# Calculate errors
dense_errors = dense_results['predicted'] - dense_results['actual']
errors_data.append(dense_errors)

if lstm_results is not None:
    lstm_errors = lstm_results['predicted'] - lstm_results['actual']
    errors_data.append(lstm_errors)

if cnn_results is not None:
    cnn_errors = cnn_results['predicted'] - cnn_results['actual']
    errors_data.append(cnn_errors)

if gru_results is not None:
    gru_errors = gru_results['predicted'] - gru_results['actual']
    errors_data.append(gru_errors)

model_names = ['Dense']
if lstm_results is not None: model_names.append('LSTM')
if cnn_results is not None: model_names.append('CNN')
if gru_results is not None: model_names.append('GRU')

plt.boxplot(errors_data, labels=model_names)
plt.title('Prediction Error Distribution', fontsize=14, fontweight='bold')
plt.ylabel('Prediction Error (Predicted - Actual)')
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
# Rank models by performance
print("MODEL RANKINGS:")
print("="*50)

# Rank by MAE (primary metric)
mae_ranking = comparison_df.sort_values('MAE')
print("1. By MAE (Primary Metric - Lower is Better):")
for i, (_, row) in enumerate(mae_ranking.iterrows(), 1):
    print(f"   {i}. {row['Model']}: MAE = {row['MAE']}")

print("\n2. By R² (Secondary Metric - Higher is Better):")
r2_ranking = comparison_df.sort_values('R²', ascending=False)
for i, (_, row) in enumerate(r2_ranking.iterrows(), 1):
    print(f"   {i}. {row['Model']}: R² = {row['R²']}")

print("\n3. Overall Performance Summary:")
best_model_mae = mae_ranking.iloc[0]
print(f" BEST MODEL: {best_model_mae['Model']}")
print(f"   - MAE: {best_model_mae['MAE']}")
print(f"   - RMSE: {best_model_mae['RMSE']}")
print(f"   - R²: {best_model_mae['R²']}")

# Calculate improvement over baseline (Dense)
if len(comparison_df) > 1:
    dense_mae = comparison_df[comparison_df['Model'] == 'Dense']['MAE'].values[0]
    best_mae = best_model_mae['MAE']
    improvement = ((dense_mae - best_mae) / dense_mae) * 100
    print(f" Improvement over Dense baseline: {improvement:.1f}%")

In [None]:
# Save comparison results
comparison_df.to_csv('model_comparison_results.csv', index=False)

# Create detailed report
report = f"""
MODEL COMPARISON REPORT
Generated on: {pd.Timestamp.now()}

BEST PERFORMING MODEL: {best_model_mae['Model']}
- MAE: {best_model_mae['MAE']}
- RMSE: {best_model_mae['RMSE']}
- R²: {best_model_mae['R²']}

FULL COMPARISON:
{comparison_df.to_string()}

CONCLUSION:
The {best_model_mae['Model']} model demonstrated the best performance for battery capacity prediction, achieving the lowest MAE and highest R² score among all tested architectures.
"""

with open('model_comparison_report.txt', 'w') as f:
    f.write(report)

print("Comparison results saved!")
print("Files created:")
print("   - model_comparison_results.csv")
print("   - model_comparison_report.txt")
print("\n EVALUATION COMPLETE! Ready for report and presentation!")