In [2]:
import pandas as pd
import numpy as np

from sklearn.svm import SVC
from sklearn.model_selection import train_test_split as TTS
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, classification_report

import time
import joblib

df = pd.read_csv('heart_disease_data.csv')
df

Unnamed: 0,age,sex,chest pain type,resting bp s,cholesterol,fasting blood sugar,resting ecg,max heart rate,exercise angina,oldpeak,ST slope,target
0,40,1,2,140,289,0,0,172,0,0.0,1,0
1,49,0,3,160,180,0,0,156,0,1.0,2,1
2,37,1,2,130,283,0,1,98,0,0.0,1,0
3,48,0,4,138,214,0,0,108,1,1.5,2,1
4,54,1,3,150,195,0,0,122,0,0.0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...
1185,45,1,1,110,264,0,0,132,0,1.2,2,1
1186,68,1,4,144,193,1,0,141,0,3.4,2,1
1187,57,1,4,130,131,0,0,115,1,1.2,2,1
1188,57,0,2,130,236,0,2,174,0,0.0,2,1


In [3]:
X = df.drop(['target'], axis = 1)
y = df.target
X_train, X_test, y_train, y_test = TTS(X, y, test_size = (390/1190), random_state = 0)
#Test Size = (390/1190) in Order to Take Training Size as 800 Rows of Data

df_tset = pd.DataFrame(X_train)
df_tset['target'] = y_train
df_tset.to_csv("TrainingSet_df.csv", index = False)

df_testSet = pd.DataFrame(X_test)
df_testSet['target'] = y_test
df_testSet.to_csv("TestingSet_df.csv", index = False)

kfold = KFold(n_splits = 10, shuffle = True, random_state = 0)

In [4]:
print("SVM - Kernel:Linear w/o KFold\n")
str_time = time.time()
svm = SVC(kernel = 'linear')
svm.fit(X_train, y_train)
etr_time = time.time()
print("Training Time: " + str(etr_time - str_time))

stt_time = time.time()
y_pred = svm.predict(X_test)
print('Accuracy:', svm.score(X_test, y_test))
print(classification_report(y_test, y_pred))
ett_time = time.time()
print("Testing Time: " + str(ett_time - stt_time))

SVM - Kernel:Linear w/o KFold

Training Time: 33.48900485038757
Accuracy: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       176
           1       1.00      1.00      1.00       214

    accuracy                           1.00       390
   macro avg       1.00      1.00      1.00       390
weighted avg       1.00      1.00      1.00       390

Testing Time: 0.02422475814819336


In [5]:
print("SVM - Kernel:Linear\n")
svm = SVC(kernel = 'linear')
scores = []
for i, (train_index, test_index) in enumerate(kfold.split(X)):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]
    svm.fit(X_train, y_train)
    y_pred = svm.predict(X_test)
    score = accuracy_score(y_test, y_pred)
    scores.append(score)
    print('Accuracy',str(i+1),'=',score)
    print('Classification Report',str(i+1),':\n',classification_report(y_test, y_pred))

print('Average Accuracy:', sum(scores) / len(scores))
joblib.dump(svm,'SVM_LinearKernelModel.joblib')

SVM - Kernel:Linear

Accuracy 1 = 0.7647058823529411
Classification Report 1 :
               precision    recall  f1-score   support

           0       0.75      0.75      0.75        55
           1       0.78      0.78      0.78        64

    accuracy                           0.76       119
   macro avg       0.76      0.76      0.76       119
weighted avg       0.76      0.76      0.76       119

Accuracy 2 = 0.8403361344537815
Classification Report 2 :
               precision    recall  f1-score   support

           0       0.83      0.81      0.82        54
           1       0.85      0.86      0.85        65

    accuracy                           0.84       119
   macro avg       0.84      0.84      0.84       119
weighted avg       0.84      0.84      0.84       119

Accuracy 3 = 0.8067226890756303
Classification Report 3 :
               precision    recall  f1-score   support

           0       0.76      0.80      0.78        51
           1       0.85      0.81      

['SVM_LinearKernelModel.joblib']

In [6]:
print("SVM - Kernel:Poly w/o KFold\n")
str_time = time.time()
svm = SVC(kernel = 'poly')
svm.fit(X_train, y_train)
etr_time = time.time()
print("Training Time: " + str(etr_time - str_time))

stt_time = time.time()
y_pred = svm.predict(X_test)
print('Accuracy:', svm.score(X_test, y_test))
print(classification_report(y_test, y_pred))
ett_time = time.time()
print("Testing Time: " + str(ett_time - stt_time))

SVM - Kernel:Poly w/o KFold

Training Time: 0.08037543296813965
Accuracy: 0.7394957983193278
              precision    recall  f1-score   support

           0       0.70      0.79      0.74        57
           1       0.78      0.69      0.74        62

    accuracy                           0.74       119
   macro avg       0.74      0.74      0.74       119
weighted avg       0.74      0.74      0.74       119

Testing Time: 0.03653430938720703


In [7]:
print("SVM - Kernel:Poly\n")
svm = SVC(kernel = 'poly')
scores = []
for i, (train_index, test_index) in enumerate(kfold.split(X)):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]
    svm.fit(X_train, y_train)
    y_pred = svm.predict(X_test)
    score = accuracy_score(y_test, y_pred)
    scores.append(score)
    print('Accuracy',str(i+1),'=',score)
    print('Classification Report',str(i+1),':\n',classification_report(y_test, y_pred))
    
print('Average Accuracy:', sum(scores) / len(scores))
joblib.dump(svm,'SVM_PolyKernelModel.joblib')

SVM - Kernel:Poly

Accuracy 1 = 0.6974789915966386
Classification Report 1 :
               precision    recall  f1-score   support

           0       0.69      0.64      0.66        55
           1       0.71      0.75      0.73        64

    accuracy                           0.70       119
   macro avg       0.70      0.69      0.69       119
weighted avg       0.70      0.70      0.70       119

Accuracy 2 = 0.7310924369747899
Classification Report 2 :
               precision    recall  f1-score   support

           0       0.66      0.83      0.74        54
           1       0.82      0.65      0.72        65

    accuracy                           0.73       119
   macro avg       0.74      0.74      0.73       119
weighted avg       0.75      0.73      0.73       119

Accuracy 3 = 0.6890756302521008
Classification Report 3 :
               precision    recall  f1-score   support

           0       0.66      0.57      0.61        51
           1       0.71      0.78      0.

['SVM_PolyKernelModel.joblib']

In [8]:
print("SVM - Kernel:RBF w/o KFold\n")
str_time = time.time()
svm = SVC(kernel = 'rbf')
svm.fit(X_train, y_train)
etr_time = time.time()
print("Training Time: " + str(etr_time - str_time))

stt_time = time.time()
y_pred = svm.predict(X_test)
print('Accuracy:', svm.score(X_test, y_test))
print(classification_report(y_test, y_pred))
ett_time = time.time()
print("Testing Time: " + str(ett_time - stt_time))

SVM - Kernel:RBF w/o KFold

Training Time: 0.10107755661010742
Accuracy: 0.7142857142857143
              precision    recall  f1-score   support

           0       0.69      0.74      0.71        57
           1       0.74      0.69      0.72        62

    accuracy                           0.71       119
   macro avg       0.71      0.72      0.71       119
weighted avg       0.72      0.71      0.71       119

Testing Time: 0.07065224647521973


In [9]:
print("SVM - Kernel:RBF\n")
svm = SVC(kernel = 'rbf')
scores = []
for i, (train_index, test_index) in enumerate(kfold.split(X)):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]
    svm.fit(X_train, y_train)
    y_pred = svm.predict(X_test)
    score = accuracy_score(y_test, y_pred)
    scores.append(score)
    print('Accuracy',str(i+1),'=',score)
    print('Classification Report',str(i+1),':\n',classification_report(y_test, y_pred))
    
print('Average Accuracy:', sum(scores) / len(scores))
joblib.dump(svm,'SVM_RBFKernelModel.joblib')

SVM - Kernel:RBF

Accuracy 1 = 0.680672268907563
Classification Report 1 :
               precision    recall  f1-score   support

           0       0.66      0.64      0.65        55
           1       0.70      0.72      0.71        64

    accuracy                           0.68       119
   macro avg       0.68      0.68      0.68       119
weighted avg       0.68      0.68      0.68       119

Accuracy 2 = 0.7058823529411765
Classification Report 2 :
               precision    recall  f1-score   support

           0       0.63      0.83      0.72        54
           1       0.81      0.60      0.69        65

    accuracy                           0.71       119
   macro avg       0.72      0.72      0.71       119
weighted avg       0.73      0.71      0.70       119

Accuracy 3 = 0.6890756302521008
Classification Report 3 :
               precision    recall  f1-score   support

           0       0.63      0.65      0.64        51
           1       0.73      0.72      0.73

['SVM_RBFKernelModel.joblib']

In [10]:
print("SVM - Gamma = 100, C = 5 w/o KFold\n")
str_time = time.time()
svm = SVC(gamma = 100, C = 5)
svm.fit(X_train, y_train)
etr_time = time.time()
print("Training Time: " + str(etr_time - str_time))

stt_time = time.time()
y_pred = svm.predict(X_test)
print('Accuracy:', svm.score(X_test, y_test))
print(classification_report(y_test, y_pred))
ett_time = time.time()
print("Testing Time: " + str(ett_time - stt_time))

SVM - Gamma = 100, C = 5 w/o KFold

Training Time: 0.20116329193115234
Accuracy: 0.7815126050420168
              precision    recall  f1-score   support

           0       1.00      0.54      0.70        57
           1       0.70      1.00      0.83        62

    accuracy                           0.78       119
   macro avg       0.85      0.77      0.77       119
weighted avg       0.85      0.78      0.77       119

Testing Time: 0.10112166404724121


In [11]:
print("SVM - Gamma = 100, C = 5\n")
svm = SVC(gamma = 100, C = 5)
scores = []
for i, (train_index, test_index) in enumerate(kfold.split(X)):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]
    svm.fit(X_train, y_train)
    y_pred = svm.predict(X_test)
    score = accuracy_score(y_test, y_pred)
    scores.append(score)
    print('Accuracy',str(i+1),'=',score)
    print('Classification Report',str(i+1),':\n',classification_report(y_test, y_pred))
    
print('Average Accuracy:', sum(scores) / len(scores))
joblib.dump(svm,'SVM_100G_5C_Model.joblib')

SVM - Gamma = 100, C = 5

Accuracy 1 = 0.7815126050420168
Classification Report 1 :
               precision    recall  f1-score   support

           0       1.00      0.53      0.69        55
           1       0.71      1.00      0.83        64

    accuracy                           0.78       119
   macro avg       0.86      0.76      0.76       119
weighted avg       0.84      0.78      0.77       119

Accuracy 2 = 0.7815126050420168
Classification Report 2 :
               precision    recall  f1-score   support

           0       1.00      0.52      0.68        54
           1       0.71      1.00      0.83        65

    accuracy                           0.78       119
   macro avg       0.86      0.76      0.76       119
weighted avg       0.84      0.78      0.77       119

Accuracy 3 = 0.7647058823529411
Classification Report 3 :
               precision    recall  f1-score   support

           0       1.00      0.45      0.62        51
           1       0.71      1.00 

['SVM_100G_5C_Model.joblib']

In [12]:
t_model = joblib.load('SVM_LinearKernelModel.joblib')
def predictHeartDisease():
    print("Enter the Following Details:")
    age = int(input("Age: "))
    if (age <= 0 or age >= 100):
        print("INVALID INPUT!")
        return
    g = input("Sex (M/F): ")
    if (g == 'M' or g == 'm'):
        sex = 1
    elif (g == 'F' or g == 'f'):
        sex = 0
    else:
        print("INVALID INPUT!")
        return
    print("Chest Pain Type Codes:")
    print("1.Typical Angina\n2.Atypical Angina\n3.Non-Anginal Pain\n4.Asymptomatic")
    cpt = int(input("Chest Pain Type (1/2/3/4): "))
    if (cpt != 1 and cpt != 2 and cpt != 3 and cpt != 3 and cpt != 4):
        print("INVALID INPUT!")
        return
    rbps = int(input("Resting Blood Pressure (in mm Hg): "))
    if (rbps <= 50 or rbps >= 250):
        print("INVALID INPUT!")
        return
    chol = int(input("Serum Cholesterol (in mg/dl): "))
    if (chol < 0 or chol >= 2000):
        print("INVALID INPUT!")
        return
    bsugar = int(input("Fasting Blood Sugar (in mg/dl): "))
    if (bsugar < 0):
        print("INVALID INPUT!")
        return
    elif (bsugar <= 120):
        bsl = 0
    else:
        bsl = 1
    print("Resting Electrocardiogram Result Codes:")
    print("0.Normal\n1.ST-T Wave Abnormality\n2.Left Ventricular")
    rer = int(input("Resting Electrocardiogram Result (0/1/2): "))
    if (rer != 0 and rer != 1 and rer != 2):
        print("INVALID INPUT!")
        return
    maxhr = int(input("Maximum Heart Rate (71-202): "))
    if (maxhr > 205 or maxhr < 65):
        print("INVALID INPUT!")
        return
    g = input("Exercise Induced Angina (Y/N): ")
    if (g == 'Y' or g == 'y'):
        exeg = 1
    elif (g == 'N' or g == 'n'):
        exeg = 0
    else:
        print("INVALID INPUT!")
        return
    op = float(input("Oldpeak - ST: "))
    if (op > 10 or op < -10):
        print("INVALID INPUT!")
        return
    print("Slope of Peak Exercise ST Segment:")
    print("1.Upsloping\n2.Flat\n3.Downsloping")
    slst = int(input("Slope of Peak Exercise ST Segment (1/2/3): "))
    if (slst != 1 and slst != 2 and slst != 3):
        print("INVALID INPUT!")
        return
    test_data = {'1':[age], '2':[sex], '3':[cpt], '4':[rbps], '5':[chol], '6':[bsl], '7':[rer], '8':[maxhr], '9':[exeg], '10':[op], '11':[slst]}
    test = pd.DataFrame(test_data)
    test.columns = X.columns
    classRes = t_model.predict(test)
    if (classRes == 1):
        print("\nPatient has Heart Disease!")
        return
    elif (classRes == 0):
        print("\nPatient is Normal")
        return
    else:
        print("\nSome Anomaly Occured While Processing...")
        return

In [13]:
predictHeartDisease()

Enter the Following Details:
Age: 0
INVALID INPUT!


In [14]:
classification_report(y_test, y_pred)

'              precision    recall  f1-score   support\n\n           0       1.00      0.54      0.70        57\n           1       0.70      1.00      0.83        62\n\n    accuracy                           0.78       119\n   macro avg       0.85      0.77      0.77       119\nweighted avg       0.85      0.78      0.77       119\n'

In [15]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      0.54      0.70        57
           1       0.70      1.00      0.83        62

    accuracy                           0.78       119
   macro avg       0.85      0.77      0.77       119
weighted avg       0.85      0.78      0.77       119

