Importing the Dependencies

In [2]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score
import tkinter as tk
from tkinter import messagebox

Data Collection and Analysis

PIMA Diabetes Dataset

In [3]:
heart_dataset = pd.read_csv('heart_failure_clinical_records_dataset.csv')

In [4]:
for column_name in heart_dataset.columns:
    print(column_name)

age
anaemia
creatinine_phosphokinase
diabetes
ejection_fraction
high_blood_pressure
platelets
serum_creatinine
serum_sodium
sex
smoking
time
DEATH_EVENT


In [5]:
heart_dataset.head(20)

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
0,75.0,0,582,0,20,1,265000.0,1.9,130,1,0,4,1
1,55.0,0,7861,0,38,0,263358.03,1.1,136,1,0,6,1
2,65.0,0,146,0,20,0,162000.0,1.3,129,1,1,7,1
3,50.0,1,111,0,20,0,210000.0,1.9,137,1,0,7,1
4,65.0,1,160,1,20,0,327000.0,2.7,116,0,0,8,1
5,90.0,1,47,0,40,1,204000.0,2.1,132,1,1,8,1
6,75.0,1,246,0,15,0,127000.0,1.2,137,1,0,10,1
7,60.0,1,315,1,60,0,454000.0,1.1,131,1,1,10,1
8,65.0,0,157,0,65,0,263358.03,1.5,138,0,0,10,1
9,80.0,1,123,0,35,1,388000.0,9.4,133,1,1,10,1


In [6]:
heart_dataset.shape

(299, 13)

In [7]:
heart_dataset.describe()

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
count,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0
mean,60.833893,0.431438,581.839465,0.41806,38.083612,0.351171,263358.029264,1.39388,136.625418,0.648829,0.32107,130.26087,0.32107
std,11.894809,0.496107,970.287881,0.494067,11.834841,0.478136,97804.236869,1.03451,4.412477,0.478136,0.46767,77.614208,0.46767
min,40.0,0.0,23.0,0.0,14.0,0.0,25100.0,0.5,113.0,0.0,0.0,4.0,0.0
25%,51.0,0.0,116.5,0.0,30.0,0.0,212500.0,0.9,134.0,0.0,0.0,73.0,0.0
50%,60.0,0.0,250.0,0.0,38.0,0.0,262000.0,1.1,137.0,1.0,0.0,115.0,0.0
75%,70.0,1.0,582.0,1.0,45.0,1.0,303500.0,1.4,140.0,1.0,1.0,203.0,1.0
max,95.0,1.0,7861.0,1.0,80.0,1.0,850000.0,9.4,148.0,1.0,1.0,285.0,1.0


In [8]:
heart_dataset['DEATH_EVENT'].value_counts()

DEATH_EVENT
0    203
1     96
Name: count, dtype: int64

0 --> Heart Failure

1 --> No Heart Failure

In [9]:
heart_dataset.groupby('DEATH_EVENT').mean()

Unnamed: 0_level_0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time
DEATH_EVENT,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0,58.761906,0.408867,540.054187,0.418719,40.26601,0.325123,266657.489901,1.184877,137.216749,0.650246,0.325123,158.339901
1,65.215281,0.479167,670.197917,0.416667,33.46875,0.40625,256381.044792,1.835833,135.375,0.645833,0.3125,70.885417


In [10]:
# separating the data and labels
X = heart_dataset.drop(columns = 'DEATH_EVENT', axis=1)
Y = heart_dataset['DEATH_EVENT']

In [11]:
print(X)

      age  anaemia  creatinine_phosphokinase  diabetes  ejection_fraction   
0    75.0        0                       582         0                 20  \
1    55.0        0                      7861         0                 38   
2    65.0        0                       146         0                 20   
3    50.0        1                       111         0                 20   
4    65.0        1                       160         1                 20   
..    ...      ...                       ...       ...                ...   
294  62.0        0                        61         1                 38   
295  55.0        0                      1820         0                 38   
296  45.0        0                      2060         1                 60   
297  45.0        0                      2413         0                 38   
298  50.0        0                       196         0                 45   

     high_blood_pressure  platelets  serum_creatinine  serum_sodium  sex   

In [12]:
print(Y)

0      1
1      1
2      1
3      1
4      1
      ..
294    0
295    0
296    0
297    0
298    0
Name: DEATH_EVENT, Length: 299, dtype: int64


Data Standardization

In [13]:
scaler = StandardScaler()

In [14]:
scaled_features = scaler.fit_transform(X)

In [15]:
feature_names = scaler.get_feature_names_out()

In [16]:
X_scaled = pd.DataFrame(scaled_features, columns=feature_names)

In [17]:
print(scaled_features)

[[ 1.19294523e+00 -8.71104775e-01  1.65728387e-04 ...  7.35688190e-01
  -6.87681906e-01 -1.62950241e+00]
 [-4.91279276e-01 -8.71104775e-01  7.51463953e+00 ...  7.35688190e-01
  -6.87681906e-01 -1.60369074e+00]
 [ 3.50832977e-01 -8.71104775e-01 -4.49938761e-01 ...  7.35688190e-01
   1.45416070e+00 -1.59078490e+00]
 ...
 [-1.33339153e+00 -8.71104775e-01  1.52597865e+00 ... -1.35927151e+00
  -6.87681906e-01  1.90669738e+00]
 [-1.33339153e+00 -8.71104775e-01  1.89039811e+00 ...  7.35688190e-01
   1.45416070e+00  1.93250906e+00]
 [-9.12335403e-01 -8.71104775e-01 -3.98321274e-01 ...  7.35688190e-01
   1.45416070e+00  1.99703825e+00]]


In [18]:
X = scaled_features
Y = heart_dataset['DEATH_EVENT']

In [19]:
print(X)
print(Y)

[[ 1.19294523e+00 -8.71104775e-01  1.65728387e-04 ...  7.35688190e-01
  -6.87681906e-01 -1.62950241e+00]
 [-4.91279276e-01 -8.71104775e-01  7.51463953e+00 ...  7.35688190e-01
  -6.87681906e-01 -1.60369074e+00]
 [ 3.50832977e-01 -8.71104775e-01 -4.49938761e-01 ...  7.35688190e-01
   1.45416070e+00 -1.59078490e+00]
 ...
 [-1.33339153e+00 -8.71104775e-01  1.52597865e+00 ... -1.35927151e+00
  -6.87681906e-01  1.90669738e+00]
 [-1.33339153e+00 -8.71104775e-01  1.89039811e+00 ...  7.35688190e-01
   1.45416070e+00  1.93250906e+00]
 [-9.12335403e-01 -8.71104775e-01 -3.98321274e-01 ...  7.35688190e-01
   1.45416070e+00  1.99703825e+00]]
0      1
1      1
2      1
3      1
4      1
      ..
294    0
295    0
296    0
297    0
298    0
Name: DEATH_EVENT, Length: 299, dtype: int64


Train Test Split

In [20]:
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size = 0.2, stratify=Y, random_state=2)

In [21]:
print(X.shape, X_train.shape, X_test.shape)

(299, 12) (239, 12) (60, 12)


Training the Model

In [22]:
classifier = svm.SVC(kernel='linear')

In [23]:
#training the support vector Machine Classifier
classifier.fit(X_train, Y_train)

Model Evaluation

Accuracy Score

In [24]:
# accuracy score on the training data
X_train_prediction = classifier.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction, Y_train)

In [25]:
print('Accuracy score of the training data : ', training_data_accuracy)

Accuracy score of the training data :  0.8451882845188284


In [26]:
# accuracy score on the test data
X_test_prediction = classifier.predict(X_test)
test_data_accuracy = accuracy_score(X_test_prediction, Y_test)

In [27]:
print('Accuracy score of the test data : ', test_data_accuracy)

Accuracy score of the test data :  0.8166666666666667


Making a Predictive System

In [28]:
input_data = (49.0,	1,	80,	0,	30,	1,	427000.00,	1.0,	138,	0,	0,	12)

# changing the input_data to numpy array
input_data_as_numpy_array = np.asarray(input_data)

# reshape the array as we are predicting for one instance
input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)

# standardize the input data
std_data = scaler.transform(input_data_reshaped)
print(std_data)

prediction = classifier.predict(std_data)
print(prediction)

if (prediction[0] == 1):
  print('HEART FAILURE')
else:
  print('NO HEART FAILURE')

[[-0.99654663  1.14796753 -0.51807384 -0.84757938 -0.68418021  1.35927151
   1.67596326 -0.38137852  0.31204384 -1.35927151 -0.68768191 -1.5262557 ]]
[1]
HEART FAILURE




In [29]:
input_data = (50.0,	1,	168, 0, 38, 1, 276000.00, 1.1, 137, 1, 0, 11)

# changing the input_data to numpy array
input_data_as_numpy_array = np.asarray(input_data)

# reshape the array as we are predicting for one instance
input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)

# standardize the input data
std_data = scaler.transform(input_data_reshaped)
print(std_data)

prediction = classifier.predict(std_data)
print(prediction)

if (prediction[0] == 1):
  print('HEART FAILURE')
else:
  print('NO HEART FAILURE')

[[-0.9123354   1.14796753 -0.42722707 -0.84757938 -0.00707675  1.35927151
   0.1294746  -0.28455235  0.08503384  0.73568819 -0.68768191 -1.53916154]]
[0]
NO HEART FAILURE




Creating GUI

In [31]:
window = tk.Tk()
window.title("Heart Failure Prediction System")

# Create labels and input fields
label_age = tk.Label(window, text="Age:")
label_age.pack()
entry_age = tk.Entry(window)
entry_age.pack()

label_anameia = tk.Label(window, text="Anameia:")
label_anameia.pack()
entry_anameia = tk.Entry(window)
entry_anameia.pack()

label_cp = tk.Label(window, text="Creatinine Phosphokinase")
label_cp.pack()
entry_cp = tk.Entry(window)
entry_cp.pack()

label_diabetes = tk.Label(window, text="Diabetes")
label_diabetes.pack()
entry_diabetes = tk.Entry(window)
entry_diabetes.pack()

label_ef = tk.Label(window, text="Ejection Fraction:")
label_ef.pack()
entry_ef = tk.Entry(window)
entry_ef.pack()

label_bp = tk.Label(window, text="Blood Pressure:")
label_bp.pack()
entry_bp = tk.Entry(window)
entry_bp.pack()

label_p = tk.Label(window, text="Platelets:")
label_p.pack()
entry_p = tk.Entry(window)
entry_p.pack()

label_sc = tk.Label(window, text="Serum Cretinine:")
label_sc.pack()
entry_sc = tk.Entry(window)
entry_sc.pack()

label_ss = tk.Label(window, text="Serum Sodium:")
label_ss.pack()
entry_ss = tk.Entry(window)
entry_ss.pack()

label_gen = tk.Label(window, text="Gender:")
label_gen.pack()
entry_gen = tk.Entry(window)
entry_gen.pack()

label_smoking = tk.Label(window, text="Smoking:")
label_smoking.pack()
entry_smoking = tk.Entry(window)
entry_smoking.pack()

label_time = tk.Label(window, text="Time:")
label_time.pack()
entry_time = tk.Entry(window)
entry_time.pack()

def predict_heart_failure():
   
  
    age = float(entry_age.get())
    anaemia = float(entry_anameia.get())
    creatinine_phosphokinase = float(entry_cp.get())
    diabetes = float(entry_diabetes.get())
    ejection_fraction = float(entry_ef.get())
    high_blood_pressure = float(entry_bp.get())
    platelets = float(entry_p.get())
    serum_creatinine = float(entry_sc.get())
    serum_sodium = float(entry_ss.get())
    sex = float(entry_gen.get())
    smoking = float(entry_smoking.get())
    time = float(entry_time.get())
    
    
    input_data = [[age, anaemia, creatinine_phosphokinase, diabetes, ejection_fraction, high_blood_pressure, platelets, serum_creatinine, serum_sodium, sex, smoking, time]]
    
    input_data_as_numpy_array = np.asarray(input_data)

    # reshape the array as we are predicting for one instance
    input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)
   
    std_data = scaler.transform(input_data_reshaped)
    print(std_data)

    prediction = classifier.predict(std_data)
    
  
    if prediction == 1:
        messagebox.showinfo("Prediction Result", "THE PATIENT IS AT RISK OF HEART FAILURE.")
    else:
        messagebox.showinfo("Prediction Result", "THE PATIENT IS NOT AT RISK OF HEART FAILURE.")


button_predict = tk.Button(window, text="Predict", command=predict_heart_failure)
button_predict.pack()

window.mainloop()