In [1]:
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from joblib import load as load_model
import os
from src.utils.evaluate_utils import evaluate_model, load_trained_models, clear_worse_models
from src.utils.training_utils import prepare_data
from src.utils.data_utils import drop_id
import datetime

# Quick setup
from src.utils.notebook_setup import setup_notebook_environment
dbs, logger = await setup_notebook_environment()

logger.info("=== STARTING MODEL EVALUATION ===")


# LOAD DATA (same as 04)
gold_data_from_db = await dbs.get_gold_data()
gold_data_df = pd.DataFrame(gold_data_from_db)
gold_data_df = drop_id(gold_data_df)

# RECREATE THE SAME SPLIT (important!)
X = gold_data_df.drop('target', axis=1)
y = gold_data_df['target']
X_train, X_test, y_train, y_test = prepare_data(X, y, test_size=0.2, random_state=42)

logger.info(f"Loaded test set: {X_test.shape}")


2025-07-25 18:40:31,372 - api.services.database_service - INFO - Connected to MongoDB database: healthcare
2025-07-25 18:40:31,377 - src.utils.notebook_setup - INFO - Database connected: True
2025-07-25 18:40:31,378 - src.utils.notebook_setup - INFO - Database collections: ['heart_disease_silver', 'heart_disease_bronze', 'heart_disease_gold']
2025-07-25 18:40:31,379 - src.utils.notebook_setup - INFO - Database collections count: 3
2025-07-25 18:40:31,380 - src.utils.notebook_setup - INFO - === STARTING MODEL EVALUATION ===
2025-07-25 18:40:31,416 - src.utils.training_utils - INFO - Training set: (1472, 16), Test set: (368, 16)
2025-07-25 18:40:31,417 - src.utils.notebook_setup - INFO - Loaded test set: (368, 16)


In [2]:
# load the trained models
trained_models = load_trained_models()
logger.info(f"Loaded {len(trained_models)} trained models")

2025-07-25 18:40:31,428 - src.utils.evaluate_utils - INFO - Loaded model: SVM
2025-07-25 18:40:31,499 - src.utils.evaluate_utils - INFO - Loaded model: AdaBoost
2025-07-25 18:40:31,501 - src.utils.evaluate_utils - INFO - Loaded model: DecisionTree
2025-07-25 18:40:31,546 - src.utils.evaluate_utils - INFO - Loaded model: RandomForest
2025-07-25 18:40:31,548 - src.utils.evaluate_utils - INFO - Loaded model: Naive Bayes
2025-07-25 18:40:31,571 - src.utils.evaluate_utils - INFO - Loaded model: Gradient Boosting
2025-07-25 18:40:31,573 - src.utils.evaluate_utils - INFO - Loaded model: LogisticRegression
2025-07-25 18:40:31,583 - src.utils.evaluate_utils - INFO - Loaded model: XGBoost
2025-07-25 18:40:31,585 - src.utils.notebook_setup - INFO - Loaded 8 trained models


In [3]:
# EVALUATE ALL TRAINED MODELS

all_results = []

for model_name, model in trained_models.items():
    logger.info(f"Evaluating {model_name}...")
    result = evaluate_model(model, X_test, y_test, model_name)
    all_results.append(result)

# Create results DataFrame
all_results_df = pd.DataFrame([
    {k: v for k, v in result.items() if k != 'model'} 
    for result in all_results
])

logger.info("\n=== MODEL COMPARISON ===")
logger.info(all_results_df.round(4))

all_results_df.head(8)

2025-07-25 18:40:31,597 - src.utils.notebook_setup - INFO - Evaluating SVM...
2025-07-25 18:40:31,692 - src.utils.evaluate_utils - INFO - 
SVM Evaluation Results:
2025-07-25 18:40:31,694 - src.utils.evaluate_utils - INFO - Accuracy: 0.8125
2025-07-25 18:40:31,695 - src.utils.evaluate_utils - INFO - Precision: 0.8054
2025-07-25 18:40:31,696 - src.utils.evaluate_utils - INFO - Recall: 0.8725
2025-07-25 18:40:31,698 - src.utils.evaluate_utils - INFO - F1-Score: 0.8376
2025-07-25 18:40:31,700 - src.utils.evaluate_utils - INFO - ROC-AUC: 0.8946
2025-07-25 18:40:31,702 - src.utils.notebook_setup - INFO - Evaluating AdaBoost...
2025-07-25 18:40:31,843 - src.utils.evaluate_utils - INFO - 
AdaBoost Evaluation Results:
2025-07-25 18:40:31,844 - src.utils.evaluate_utils - INFO - Accuracy: 0.8342
2025-07-25 18:40:31,846 - src.utils.evaluate_utils - INFO - Precision: 0.8705
2025-07-25 18:40:31,847 - src.utils.evaluate_utils - INFO - Recall: 0.8235
2025-07-25 18:40:31,848 - src.utils.evaluate_utils 

Unnamed: 0,model_name,accuracy,precision,recall,f1_score,roc_auc
0,SVM,0.8125,0.80543,0.872549,0.837647,0.894593
1,AdaBoost,0.834239,0.870466,0.823529,0.846348,0.904023
2,DecisionTree,0.956522,0.97,0.95098,0.960396,0.957198
3,RandomForest,0.961957,0.979798,0.95098,0.965174,0.979974
4,Naive Bayes,0.809783,0.845361,0.803922,0.824121,0.894996
5,Gradient Boosting,0.961957,0.961165,0.970588,0.965854,0.977762
6,LogisticRegression,0.809783,0.825243,0.833333,0.829268,0.89398
7,XGBoost,0.945652,0.95098,0.95098,0.95098,0.97818


In [4]:
# FIND BEST ONE
best_model_idx = all_results_df['roc_auc'].idxmax()
best_model_name = all_results_df.iloc[best_model_idx]['model_name']
best_model = all_results[best_model_idx]['model']



logger.info(f"\n=== BEST MODEL: {best_model_name} ===")
logger.info(f"ROC-AUC: {all_results_df.iloc[best_model_idx]['roc_auc']:.4f}")

2025-07-25 18:40:32,067 - src.utils.notebook_setup - INFO - 
=== BEST MODEL: RandomForest ===
2025-07-25 18:40:32,070 - src.utils.notebook_setup - INFO - ROC-AUC: 0.9800


In [5]:
# SAVE BEST MODEL TO DISK!
from joblib import dump as dump_model
from datetime import datetime
import json

# Clear worse models
clear_worse_models(best_model_name)

# Save the winner as the final production model
dump_model(best_model, "../models/heart_disease_classifier.joblib")

best_result = all_results_df.iloc[best_model_idx]


# Construct metadata dictionary
model_metadata = {
    "model_name": best_model_name,
    "version": "1.0.0",
    "created_date": datetime.now().strftime("%Y-%m-%d"),
    "algorithm": type(best_model).__name__,
    "features": list(X.columns),
    "target": "target",
    "metrics": {
        "accuracy": best_result["accuracy"],
        "precision": best_result["precision"],
        "recall": best_result["recall"],
        "f1_score": best_result["f1_score"],
        "roc_auc": best_result["roc_auc"]
    },
    "training_data_size": len(X_train),
    "test_data_size": len(X_test)
}

# Save to model_metadata.json
logger.info("SAVING MODEL METADATA")   
with open("../models/model_metadata.json", "w") as f:
    json.dump(model_metadata, f, indent=4)


logger.info(f"Saved final model: {best_model_name} as heart_disease_classifier.joblib")

2025-07-25 18:40:32,155 - src.utils.evaluate_utils - INFO - Removing worse model: SVM_tuned.joblib
2025-07-25 18:40:32,156 - src.utils.evaluate_utils - INFO - Removing worse model: AdaBoost_tuned.joblib
2025-07-25 18:40:32,158 - src.utils.evaluate_utils - INFO - Removing worse model: DecisionTree_tuned.joblib
2025-07-25 18:40:32,160 - src.utils.evaluate_utils - INFO - Removing worse model: RandomForest_tuned.joblib
2025-07-25 18:40:32,162 - src.utils.evaluate_utils - INFO - Removing worse model: Naive Bayes_tuned.joblib
2025-07-25 18:40:32,163 - src.utils.evaluate_utils - INFO - Removing worse model: Gradient Boosting_tuned.joblib
2025-07-25 18:40:32,165 - src.utils.evaluate_utils - INFO - Removing worse model: LogisticRegression_tuned.joblib
2025-07-25 18:40:32,167 - src.utils.evaluate_utils - INFO - Removing worse model: XGBoost_tuned.joblib
2025-07-25 18:40:32,264 - src.utils.notebook_setup - INFO - SAVING MODEL METADATA
2025-07-25 18:40:32,266 - src.utils.notebook_setup - INFO - Sa