In [2]:
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from joblib import load as load_model
import os
from src.utils.evaluate_utils import evaluate_model, load_trained_models, clear_worse_models
from src.utils.training_utils import prepare_data
from src.utils.data_utils import drop_id
import datetime

# Quick setup
from src.utils.notebook_setup import setup_notebook_environment
dbs, logger = await setup_notebook_environment()

logger.info("=== STARTING MODEL EVALUATION ===")


# LOAD DATA (same as 04)
gold_data_from_db = await dbs.get_gold_data()
gold_data_df = pd.DataFrame(gold_data_from_db)
gold_data_df = drop_id(gold_data_df)

# RECREATE THE SAME SPLIT (important!)
X = gold_data_df.drop('target', axis=1)
y = gold_data_df['target']
X_train, X_test, y_train, y_test = prepare_data(X, y, test_size=0.2, random_state=42)

logger.info(f"Loaded test set: {X_test.shape}")


2025-07-25 22:22:34,715 - api.services.database_service - INFO - Connected to MongoDB database: healthcare


2025-07-25 22:22:34,720 - src.utils.notebook_setup - INFO - Database connected: True
2025-07-25 22:22:34,722 - src.utils.notebook_setup - INFO - Database collections: ['heart_disease_silver', 'heart_disease_bronze', 'heart_disease_gold']
2025-07-25 22:22:34,724 - src.utils.notebook_setup - INFO - Database collections count: 3
2025-07-25 22:22:34,726 - src.utils.notebook_setup - INFO - === STARTING MODEL EVALUATION ===
2025-07-25 22:22:34,916 - src.utils.training_utils - INFO - Training set: (5152, 16), Test set: (1288, 16)
2025-07-25 22:22:34,919 - src.utils.notebook_setup - INFO - Loaded test set: (1288, 16)


In [3]:
# load the trained models
trained_models = load_trained_models()
logger.info(f"Loaded {len(trained_models)} trained models")

2025-07-25 22:22:40,387 - src.utils.evaluate_utils - INFO - Loaded model: SVM
2025-07-25 22:22:40,389 - src.utils.evaluate_utils - INFO - Loaded model: Logistic Regression
2025-07-25 22:22:40,444 - src.utils.evaluate_utils - INFO - Loaded model: Random Forest
2025-07-25 22:22:40,450 - src.utils.evaluate_utils - INFO - Loaded model: XGBoost
2025-07-25 22:22:40,452 - src.utils.notebook_setup - INFO - Loaded 4 trained models


In [4]:
# EVALUATE ALL TRAINED MODELS

all_results = []

for model_name, model in trained_models.items():
    logger.info(f"Evaluating {model_name}...")
    result = evaluate_model(model, X_test, y_test, model_name)
    all_results.append(result)

# Create results DataFrame
all_results_df = pd.DataFrame([
    {k: v for k, v in result.items() if k != 'model'} 
    for result in all_results
])

logger.info("\n=== MODEL COMPARISON ===")
logger.info(all_results_df.round(4))

all_results_df.head(8)

2025-07-25 22:22:55,576 - src.utils.notebook_setup - INFO - Evaluating SVM...
2025-07-25 22:22:55,859 - src.utils.evaluate_utils - INFO - 
SVM Evaluation Results:
2025-07-25 22:22:55,864 - src.utils.evaluate_utils - INFO - Accuracy: 0.8393
2025-07-25 22:22:55,865 - src.utils.evaluate_utils - INFO - Precision: 0.8504
2025-07-25 22:22:55,866 - src.utils.evaluate_utils - INFO - Recall: 0.8612
2025-07-25 22:22:55,867 - src.utils.evaluate_utils - INFO - F1-Score: 0.8557
2025-07-25 22:22:55,868 - src.utils.evaluate_utils - INFO - ROC-AUC: N/A
2025-07-25 22:22:55,869 - src.utils.notebook_setup - INFO - Evaluating Logistic Regression...
2025-07-25 22:22:55,894 - src.utils.evaluate_utils - INFO - 
Logistic Regression Evaluation Results:
2025-07-25 22:22:55,895 - src.utils.evaluate_utils - INFO - Accuracy: 0.8005
2025-07-25 22:22:55,896 - src.utils.evaluate_utils - INFO - Precision: 0.8089
2025-07-25 22:22:55,898 - src.utils.evaluate_utils - INFO - Recall: 0.8373
2025-07-25 22:22:55,899 - src.ut

Unnamed: 0,model_name,accuracy,precision,recall,f1_score,roc_auc
0,SVM,0.839286,0.850416,0.86115,0.855749,
1,Logistic Regression,0.800466,0.808943,0.837307,0.822881,0.874692
2,Random Forest,1.0,1.0,1.0,1.0,1.0
3,XGBoost,0.997671,0.99581,1.0,0.997901,1.0


In [None]:
# FIND BEST ONE
best_model_idx = all_results_df['roc_auc'].idxmax()
best_model_name = all_results_df.iloc[best_model_idx]['model_name']
best_model = all_results[best_model_idx]['model']



logger.info(f"\n=== BEST MODEL: {best_model_name} ===")
logger.info(f"ROC-AUC: {all_results_df.iloc[best_model_idx]['roc_auc']:.4f}")

In [None]:
# SAVE BEST MODEL TO DISK!
from joblib import dump as dump_model
from datetime import datetime
import json

# Clear worse models
clear_worse_models(best_model_name)

# Save the winner as the final production model
dump_model(best_model, "../models/heart_disease_classifier.joblib")

best_result = all_results_df.iloc[best_model_idx]


# Construct metadata dictionary
model_metadata = {
    "model_name": best_model_name,
    "version": "1.0.0",
    "created_date": datetime.now().strftime("%Y-%m-%d"),
    "algorithm": type(best_model).__name__,
    "features": list(X.columns),
    "target": "target",
    "metrics": {
        "accuracy": best_result["accuracy"],
        "precision": best_result["precision"],
        "recall": best_result["recall"],
        "f1_score": best_result["f1_score"],
        "roc_auc": best_result["roc_auc"]
    },
    "training_data_size": len(X_train),
    "test_data_size": len(X_test)
}

# Save to model_metadata.json
logger.info("SAVING MODEL METADATA")   
with open("../models/model_metadata.json", "w") as f:
    json.dump(model_metadata, f, indent=4)


logger.info(f"Saved final model: {best_model_name} as heart_disease_classifier.joblib")