In [53]:
import numpy as np
import pandas as pd
import sklearn.model_selection as ms
import sklearn.linear_model as lm
import sklearn.preprocessing as pp
from sklearn.metrics import (confusion_matrix, matthews_corrcoef, 
                             brier_score_loss, accuracy_score, roc_auc_score)
import warnings

warnings.filterwarnings("ignore")

In [54]:
df = pd.read_csv("/Users/Siddhesh/My Files/VIT/PFE/PFECP/Data/heart.csv")

In [55]:
# cols_with_zero_heart = ["oldpeak", "slope", "ca", "thal"]

# for col in cols_with_zero_heart:
#     df[col] = df[col].replace(0, df[col].median())

In [56]:
X = df.drop(columns="target", axis=1)
Y = df["target"]

In [57]:
X_train, X_test, Y_train, Y_test = ms.train_test_split(X, Y, test_size=0.2, random_state=45, stratify=Y)

In [58]:
scaler = pp.StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [59]:
top_features = ["cp", "slope", "thalach", "restecg", "fbs", "thal", "age"]  # Replace with actual top features from your plot

# Keep only selected features
X_train_selected = X_train[top_features]
X_test_selected = X_test[top_features]

In [60]:
model = lm.LogisticRegressionCV(cv=5, scoring="roc_auc", class_weight='balanced', 
                                 max_iter=10000, random_state=42,penalty='l2', solver='liblinear')
model.fit(X_train_selected, Y_train)

In [61]:
X_test_pred = model.predict(X_test_selected)
trainingscore = model.score(X_train_selected, Y_train)
y_pred = model.predict(X_test_selected)
y_prob = model.predict_proba(X_test_selected)[:, 1]

In [62]:
inputstr = [3, 0, 150, 0, 1, 1, 63]
inputarr = np.array(inputstr,dtype=float).reshape(1, -1)
prediction = model.predict(inputarr)
print(prediction)

if prediction[0] == 0:
    print("The person does not have a heart disease")
else:
    print("The person has a heart disease")


[1]
The person has a heart disease


In [63]:
print("Training Score: ", trainingscore)

Training Score:  0.8850550964187328


In [36]:
import pickle
filename = 'heart_lr.pkl'
pickle.dump(model, open(filename, 'wb'))
loaded_model = pickle.load(open('heart_lr.pkl', 'rb'))

In [37]:
# Generate classification report
from sklearn.metrics import classification_report
report = classification_report(Y_test, X_test_pred)
print(report)

              precision    recall  f1-score   support

           0       0.75      0.64      0.69        28
           1       0.73      0.82      0.77        33

    accuracy                           0.74        61
   macro avg       0.74      0.73      0.73        61
weighted avg       0.74      0.74      0.74        61



In [38]:
tn, fp, fn, tp = confusion_matrix(Y_test, y_pred).ravel()

# Compute required metrics
tpr = tp / (tp + fn)  # Sensitivity / Recall
tnr = tn / (tn + fp)  # Specificity
mcc = matthews_corrcoef(Y_test, y_pred)  # MCC
brier = brier_score_loss(Y_test, y_prob)  # Brier Score
accuracy = accuracy_score(Y_test, y_pred)  # Accuracy
auc = roc_auc_score(Y_test, y_prob)  # AUC-ROC

# Print results
print(f"TPR (Sensitivity): {tpr:.4f}")
print(f"TNR (Specificity): {tnr:.4f}")
print(f"MCC: {mcc:.4f}")
print(f"Brier Score: {brier:.4f}")
print(f"Accuracy: {accuracy:.4f}")
print(f"AUC-ROC: {auc:.4f}")

TPR (Sensitivity): 0.8182
TNR (Specificity): 0.6429
MCC: 0.4703
Brier Score: 0.1983
Accuracy: 0.7377
AUC-ROC: 0.7835
