In [28]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import joblib
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from imblearn.over_sampling import SMOTE

Load the dataset

In [29]:
data = pd.read_csv('heart_disease_multiclass.csv')
data.sample(5)

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
211,65,1,2,198,342,0,1,201,0,0.4,1,2,0,0
467,65,0,0,187,244,0,1,188,1,2.3,1,3,1,3
419,65,1,2,95,428,1,1,175,0,2.5,0,3,0,0
322,74,0,1,93,162,1,1,135,1,5.4,1,3,1,0
253,46,0,2,177,247,1,1,127,0,5.0,2,0,1,0


Check how many samples for each target class

In [30]:
data['target'].value_counts()

target
0    229
1    119
2     66
4     45
3     41
Name: count, dtype: int64

In [31]:
data['target'] = data['target'].map({0: 'No Heart Disease',
                                     1: 'Mild Heart Disease',
                                     2: 'Moderate Heart Disease',
                                     3: 'Severe Heart Disease',
                                     4: 'Very Severe Heart Disease'
})

Separate features and labels

In [32]:
X = data.drop(columns=['target'])
X.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal
0,67,1,0,172,387,0,1,130,0,1.2,0,1,0
1,57,1,1,151,341,1,0,119,0,5.5,0,0,1
2,43,1,0,121,272,1,1,133,1,1.8,0,2,0
3,71,1,0,119,461,1,0,180,1,2.3,1,2,1
4,36,0,0,118,293,1,1,179,0,0.4,0,2,1


In [33]:
Y = data['target']
Y.head()

0          No Heart Disease
1        Mild Heart Disease
2      Severe Heart Disease
3          No Heart Disease
4    Moderate Heart Disease
Name: target, dtype: object

Split datasets into training and testing

In [34]:
x_train, x_test, y_train, y_test = train_test_split(
    X, Y, test_size=0.2, random_state=42, stratify=Y
)
x_train.shape, y_train.shape

((400, 13), (400,))

Handle class imbalance using SMOTE

In [35]:
smote = SMOTE(random_state=42)
x_train, y_train = smote.fit_resample(X, Y)



Start Training

In [36]:
model = KNeighborsClassifier(n_neighbors=3)
model.fit(x_train, y_train)

In [37]:
predictions = model.predict(x_test)
print("KNN (k=3) Classification Report")
print(classification_report(y_test, predictions))

KNN (k=3) Classification Report
                           precision    recall  f1-score   support

       Mild Heart Disease       0.70      0.88      0.78        24
   Moderate Heart Disease       0.61      0.85      0.71        13
         No Heart Disease       0.94      0.67      0.78        46
     Severe Heart Disease       0.78      0.88      0.82         8
Very Severe Heart Disease       0.70      0.78      0.74         9

                 accuracy                           0.77       100
                macro avg       0.75      0.81      0.77       100
             weighted avg       0.80      0.77      0.77       100



In [38]:
confusion_matrix = confusion_matrix(y_test, predictions)
print(confusion_matrix)

[[21  1  1  1  0]
 [ 0 11  1  0  1]
 [ 7  5 31  1  2]
 [ 0  1  0  7  0]
 [ 2  0  0  0  7]]


Save the better model

In [39]:
joblib.dump(model, 'heart_model.pkl')

['heart_model.pkl']