In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from scipy.stats import randint
import joblib



df_selected = pd.read_csv("../data/heart_disease_selected_features.csv")
X = df_selected.drop("target", axis=1)
y = df_selected["target"]


X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Define pipeline
pipeline = Pipeline([
    ("scaler", StandardScaler()),
    ("rf", RandomForestClassifier(random_state=42))
])

# Define hyperparameter space
param_dist = {
    'rf__n_estimators': randint(50, 300),
    'rf__max_depth': randint(3, 20),
    'rf__min_samples_split': randint(2, 10),
    'rf__min_samples_leaf': randint(1, 10),
    'rf__bootstrap': [True, False]
}

# RandomizedSearchCV
random_search = RandomizedSearchCV(
    estimator=pipeline,
    param_distributions=param_dist,
    n_iter=50,
    scoring='accuracy',
    cv=5,
    random_state=42,
    n_jobs=-1
)
random_search.fit(X_train, y_train)

# Evaluate
print("Best Parameters:", random_search.best_params_)
y_pred = random_search.predict(X_test)
print("\nClassification Report:")
print(classification_report(y_test, y_pred))


joblib.dump(random_search.best_estimator_, "../models/heart_disease_pipeline.pkl")
print("\n✅ Model pipeline saved successfully to ../models/heart_disease_pipeline.pkl")


Best Parameters: {'rf__bootstrap': False, 'rf__max_depth': 11, 'rf__min_samples_leaf': 4, 'rf__min_samples_split': 2, 'rf__n_estimators': 111}

Classification Report:
              precision    recall  f1-score   support

           0       0.76      0.88      0.81        32
           1       0.27      0.27      0.27        11
           2       0.00      0.00      0.00         7
           3       0.00      0.00      0.00         7
           4       0.00      0.00      0.00         3

    accuracy                           0.52        60
   macro avg       0.21      0.23      0.22        60
weighted avg       0.45      0.52      0.48        60


✅ Model pipeline saved successfully to ../models/heart_disease_pipeline.pkl


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
