In [2]:
import pandas as pd 
import json
import numpy as np
import joblib
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler 
from sklearn.pipeline import Pipeline 
from sklearn.metrics import roc_curve, roc_auc_score, classification_report, confusion_matrix


df = pd.read_csv("../data/heart_disease.csv")
x =df.drop('num',axis =1)
y = df['num']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('model', RandomForestClassifier(n_estimators=100, random_state=42, class_weight='balanced'))
])

pipeline.fit(x_train,y_train)

y_proba =pipeline.predict_proba(x_test)[:,1]
auc = roc_auc_score(y_test, y_proba)
fpr,tpr,thresholds = roc_curve(y_test, y_proba)

ix = np.argmax(tpr - fpr)
best_thresh = float(thresholds[ix])


print("AUC:", auc)
print("Best threshold:", best_thresh)
print("Report @0.5\n", classification_report(y_test, (y_proba>=0.5).astype(int)))
print("Report @best\n", classification_report(y_test, (y_proba>=best_thresh).astype(int)))
print("Confusion @best\n", confusion_matrix(y_test, (y_proba>=best_thresh).astype(int)))

joblib.dump(pipeline,"../models/final_model.pkl")
with open("model_meta.json", "w") as f:
    json.dump({"threshold": best_thresh, "features": list(x.columns)}, f)
print("Model saved as final_modal.pkl & model_meta.json")

AUC: 0.9398148148148149
Best threshold: 0.51
Report @0.5
               precision    recall  f1-score   support

           0       0.91      0.89      0.90        36
           1       0.84      0.88      0.86        24

    accuracy                           0.88        60
   macro avg       0.88      0.88      0.88        60
weighted avg       0.88      0.88      0.88        60

Report @best
               precision    recall  f1-score   support

           0       0.91      0.89      0.90        36
           1       0.84      0.88      0.86        24

    accuracy                           0.88        60
   macro avg       0.88      0.88      0.88        60
weighted avg       0.88      0.88      0.88        60

Confusion @best
 [[32  4]
 [ 3 21]]
Model saved as final_modal.pkl & model_meta.json
