In [4]:
from sklearn.model_selection import train_test_split
import pandas as pd

df = pd.read_csv("../data/heart_disease_selected.csv")

X = df.drop("target", axis=1)
y = df["target"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


## Random Forest Tuning (RandomizedSearchCV)

In [5]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

param_dist_rf = {
    "n_estimators": [50, 100, 150, 200],
    "max_depth": [None, 5, 10, 20],
    "min_samples_split": [2, 5, 10],
    "min_samples_leaf": [1, 2, 4],
    "bootstrap": [True, False]
}

rf = RandomForestClassifier(random_state=42)
random_search_rf = RandomizedSearchCV(estimator=rf, param_distributions=param_dist_rf,
                                       n_iter=20, cv=5, scoring="f1", random_state=42, n_jobs=-1)

random_search_rf.fit(X_train, y_train)

best_rf = random_search_rf.best_estimator_
y_pred_rf = best_rf.predict(X_test)

print("📈 Random Forest - Best Hyperparameters:")
print(random_search_rf.best_params_)
print("\n🧾 Performance After Tuning:")
print(classification_report(y_test, y_pred_rf))


📈 Random Forest - Best Hyperparameters:
{'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 2, 'max_depth': 10, 'bootstrap': False}

🧾 Performance After Tuning:
              precision    recall  f1-score   support

           0       0.81      0.91      0.85        32
           1       0.88      0.75      0.81        28

    accuracy                           0.83        60
   macro avg       0.84      0.83      0.83        60
weighted avg       0.84      0.83      0.83        60



##  SVM Tuning (GridSearchCV)

In [6]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

# المعلمات المحتملة
param_grid_svm = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf'],
    'gamma': ['scale', 'auto']
}

# إعداد GridSearch
svm = SVC(probability=True)
grid_search_svm = GridSearchCV(estimator=svm, param_grid=param_grid_svm,
                                cv=5, scoring="f1", n_jobs=-1)

# تدريب
grid_search_svm.fit(X_train, y_train)

# أفضل نموذج
best_svm = grid_search_svm.best_estimator_
y_pred_svm = best_svm.predict(X_test)

print("📈 SVM - Best Hyperparameters:")
print(grid_search_svm.best_params_)
print("\n🧾 Performance After Tuning:")
print(classification_report(y_test, y_pred_svm))


📈 SVM - Best Hyperparameters:
{'C': 10, 'gamma': 'scale', 'kernel': 'linear'}

🧾 Performance After Tuning:
              precision    recall  f1-score   support

           0       0.85      0.91      0.88        32
           1       0.88      0.82      0.85        28

    accuracy                           0.87        60
   macro avg       0.87      0.86      0.87        60
weighted avg       0.87      0.87      0.87        60



## Compare with baseline

In [15]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score

baseline_rf = RandomForestClassifier(random_state=42)
baseline_rf.fit(X_train, y_train)

baseline_f1 = f1_score(y_test, baseline_rf.predict(X_test))
tuned_f1 = f1_score(y_test, best_rf.predict(X_test))

print(f"📊 Random Forest F1-Score")
print(f"Before Tuning: {baseline_f1:.2f}")
print(f"After  Tuning: {tuned_f1:.2f}")


📊 Random Forest F1-Score
Before Tuning: 0.81
After  Tuning: 0.81


## Export Models

In [17]:
import joblib

joblib.dump(best_rf, "../models/random_forest_tuned.pkl")
joblib.dump(best_svm, "../models/svm_tuned.pkl")
joblib.dump(best_rf, "../models/best_rf.pkl")

['../models/best_rf.pkl']