In [1]:
import numpy as np
import pandas as pd
import sklearn.model_selection as ms
from sklearn import svm
import sklearn.metrics as mt
from sklearn.metrics import (confusion_matrix, matthews_corrcoef, 
                             brier_score_loss, accuracy_score, roc_auc_score)
import warnings

warnings.filterwarnings("ignore")

In [2]:
df = pd.read_csv("/Users/Siddhesh/My Files/VIT/PFE/PFECP/Data/heart.csv")

In [3]:
X = df.drop(columns="target", axis=1)
Y = df["target"]

In [4]:
cols_with_zero_heart = ["oldpeak", "slope", "ca", "thal"]

for col in cols_with_zero_heart:
    df[col] = df[col].replace(0, df[col].median())

In [5]:
X_train, X_test, Y_train, Y_test = ms.train_test_split(X, Y, test_size=0.2, random_state=2, stratify=Y)

In [6]:
print(X_train.shape, X_test.shape)

(242, 13) (61, 13)


In [7]:
classifier = svm.SVC(kernel='linear')
classifier.fit(X_train, Y_train)

In [8]:
X_train_prediction = classifier.predict(X_train)
training_data_accuracy = mt.accuracy_score(X_train_prediction, Y_train)

from sklearn.calibration import CalibratedClassifierCV

calibrated_svm = CalibratedClassifierCV(classifier, method="sigmoid")
calibrated_svm.fit(X_train, Y_train)

# Predictions
y_pred = classifier.predict(X_test)
y_prob = calibrated_svm.predict_proba(X_test)[:, 1]  # Probability estimates for Brier Score & AUC

In [9]:
print("Accuracy on training data : ", training_data_accuracy)

Accuracy on training data :  0.8553719008264463


In [10]:
input_data = (63,1,3,145,233,1,0,150,0,2.3,0,0,1)
input_data_as_numpy_array = np.asarray(input_data)
input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)
prediction = classifier.predict(input_data_reshaped)
print(prediction)
if prediction[0] == 1:
  print("The person has heart disease")
else:
    print("The person does not have heart disease")

[1]
The person has heart disease


In [11]:
import pickle
filename = 'heart_svm.pkl'
pickle.dump(classifier, open(filename, 'wb'))
loaded_model = pickle.load(open('heart_svm.pkl', 'rb'))

In [12]:
from sklearn.metrics import classification_report
report = classification_report(Y_test, y_pred)
print(report)

              precision    recall  f1-score   support

           0       0.84      0.75      0.79        28
           1       0.81      0.88      0.84        33

    accuracy                           0.82        61
   macro avg       0.82      0.81      0.82        61
weighted avg       0.82      0.82      0.82        61



In [13]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(Y_test, y_pred)
print("Confusion Matrix:\n", cm)

# Compute confusion matrix values
tn, fp, fn, tp = confusion_matrix(Y_test, y_pred).ravel()


Confusion Matrix:
 [[21  7]
 [ 4 29]]


In [14]:

# Compute required metrics
tpr = tp / (tp + fn)  # Sensitivity / Recall
tnr = tn / (tn + fp)  # Specificity
mcc = matthews_corrcoef(Y_test, y_pred)  # MCC
brier = brier_score_loss(Y_test, y_prob)  # Brier Score
accuracy = accuracy_score(Y_test, y_pred)  # Accuracy
auc = roc_auc_score(Y_test, y_prob)  # AUC-ROC

In [15]:

# Print results
print(f"TPR (Sensitivity): {tpr:.4f}")
print(f"TNR (Specificity): {tnr:.4f}")
print(f"MCC: {mcc:.4f}")
print(f"Brier Score: {brier:.4f}")
print(f"Accuracy: {accuracy:.4f}")
print(f"AUC-ROC: {auc:.4f}")

TPR (Sensitivity): 0.8788
TNR (Specificity): 0.7500
MCC: 0.6371
Brier Score: 0.1358
Accuracy: 0.8197
AUC-ROC: 0.9058
