In [1]:
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from joblib import load as load_model
import os
from src.utils.evaluate_utils import evaluate_model, load_trained_models, clear_worse_models
from src.utils.training_utils import prepare_data
from src.utils.data_utils import drop_id
import datetime

# Quick setup
from src.utils.notebook_setup import setup_notebook_environment
dbs, logger = await setup_notebook_environment()

logger.info("=== STARTING MODEL EVALUATION ===")


# LOAD DATA (same as 04)
gold_data_from_db = await dbs.get_gold_data()
gold_data_df = pd.DataFrame(gold_data_from_db)
gold_data_df = drop_id(gold_data_df)

# RECREATE THE SAME SPLIT (important!)
X = gold_data_df.drop('target', axis=1)
y = gold_data_df['target']
X_train, X_test, y_train, y_test = prepare_data(X, y, test_size=0.2, random_state=42)

logger.info(f"Loaded test set: {X_test.shape}")


2025-07-25 11:13:54,968 - api.services.database_service - INFO - Connected to MongoDB database: healthcare
2025-07-25 11:13:55,093 - src.utils.notebook_setup - INFO - Database connected: True
2025-07-25 11:13:55,094 - src.utils.notebook_setup - INFO - Database collections: ['heart_disease_gold', 'heart_disease_silver', 'heart_disease_bronze']
2025-07-25 11:13:55,096 - src.utils.notebook_setup - INFO - Database collections count: 3
2025-07-25 11:13:55,098 - src.utils.notebook_setup - INFO - === STARTING MODEL EVALUATION ===
2025-07-25 11:13:55,968 - src.utils.training_utils - INFO - Training set: (15456, 16), Test set: (3864, 16)
2025-07-25 11:13:55,971 - src.utils.notebook_setup - INFO - Loaded test set: (3864, 16)


In [2]:
# load the trained models
trained_models = load_trained_models()
logger.info(f"Loaded {len(trained_models)} trained models")

2025-07-25 11:13:55,991 - src.utils.evaluate_utils - INFO - Loaded model: LogisticRegression
2025-07-25 11:13:55,993 - src.utils.notebook_setup - INFO - Loaded 1 trained models


In [3]:
# EVALUATE ALL TRAINED MODELS

all_results = []

for model_name, model in trained_models.items():
    logger.info(f"Evaluating {model_name}...")
    result = evaluate_model(model, X_test, y_test, model_name)
    all_results.append(result)

# Create results DataFrame
all_results_df = pd.DataFrame([
    {k: v for k, v in result.items() if k != 'model'} 
    for result in all_results
])

logger.info("\n=== MODEL COMPARISON ===")
logger.info(all_results_df.round(4))

2025-07-25 11:13:56,015 - src.utils.notebook_setup - INFO - Evaluating LogisticRegression...
2025-07-25 11:13:56,061 - src.utils.evaluate_utils - INFO - 
LogisticRegression Evaluation Results:
2025-07-25 11:13:56,063 - src.utils.evaluate_utils - INFO - Accuracy: 0.8080
2025-07-25 11:13:56,065 - src.utils.evaluate_utils - INFO - Precision: 0.8105
2025-07-25 11:13:56,069 - src.utils.evaluate_utils - INFO - Recall: 0.8522
2025-07-25 11:13:56,071 - src.utils.evaluate_utils - INFO - F1-Score: 0.8308
2025-07-25 11:13:56,073 - src.utils.evaluate_utils - INFO - ROC-AUC: 0.8877
2025-07-25 11:13:56,078 - src.utils.notebook_setup - INFO - 
=== MODEL COMPARISON ===
2025-07-25 11:13:56,082 - src.utils.notebook_setup - INFO -            model_name  accuracy  precision  recall  f1_score  roc_auc
0  LogisticRegression     0.808     0.8105  0.8522    0.8308   0.8877


In [4]:
# FIND BEST ONE
best_model_idx = all_results_df['roc_auc'].idxmax()
best_model_name = all_results_df.iloc[best_model_idx]['model_name']
best_model = all_results[best_model_idx]['model']



logger.info(f"\n=== BEST MODEL: {best_model_name} ===")
logger.info(f"ROC-AUC: {all_results_df.iloc[best_model_idx]['roc_auc']:.4f}")

2025-07-25 11:13:56,165 - src.utils.notebook_setup - INFO - 
=== BEST MODEL: LogisticRegression ===
2025-07-25 11:13:56,169 - src.utils.notebook_setup - INFO - ROC-AUC: 0.8877


In [5]:
# SAVE BEST MODEL TO DISK!
from joblib import dump as dump_model
from datetime import datetime
import json

# Clear worse models
clear_worse_models(best_model_name)

# Save the winner as the final production model
dump_model(best_model, "../models/heart_disease_classifier.joblib")

best_result = all_results_df.iloc[best_model_idx]


# Construct metadata dictionary
model_metadata = {
    "model_name": best_model_name,
    "version": "1.0.0",
    "created_date": datetime.now().strftime("%Y-%m-%d"),
    "algorithm": type(best_model).__name__,
    "features": list(X.columns),
    "target": "target",
    "metrics": {
        "accuracy": best_result["accuracy"],
        "precision": best_result["precision"],
        "recall": best_result["recall"],
        "f1_score": best_result["f1_score"],
        "roc_auc": best_result["roc_auc"]
    },
    "training_data_size": len(X_train),
    "test_data_size": len(X_test)
}

# Save to model_metadata.json
logger.info("SAVING MODEL METADATA")   
with open("../models/model_metadata.json", "w") as f:
    json.dump(model_metadata, f, indent=4)


logger.info(f"Saved final model: {best_model_name} as heart_disease_classifier.joblib")

2025-07-25 11:13:56,280 - src.utils.evaluate_utils - INFO - Removing worse model: LogisticRegression_tuned.joblib
2025-07-25 11:13:56,288 - src.utils.notebook_setup - INFO - SAVING MODEL METADATA
2025-07-25 11:13:56,293 - src.utils.notebook_setup - INFO - Saved final model: LogisticRegression as heart_disease_classifier.joblib
