In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix

In [2]:
df = pd.read_csv('heart.csv')

In [3]:
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


# Naive Bayes
## GaussianNB model

In [4]:
X_train, X_test, y_train, y_test = train_test_split(df.drop(columns='target'),
                                                   df['target'],
                                                   test_size = 0.2,
                                                   random_state = 42)

model = GaussianNB()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print(f'Accuracy Score : {accuracy_score(y_test, y_pred)}')
print(f'Precision Score : {precision_score(y_test, y_pred)}')
print(f'Recall Score : {recall_score(y_test, y_pred)}')
print(f'F1 Score : {f1_score(y_test, y_pred)}')
print(f'Classification Report : \n{classification_report(y_test, y_pred)}')
cm = confusion_matrix(y_test, y_pred)
cm_df = pd.DataFrame(cm,
                    index = ['Actual 0', 'Actual 1'],
                    columns = ['Predicted 0', 'Predicted 1'])
print(cm_df)

Accuracy Score : 0.8688524590163934
Precision Score : 0.9
Recall Score : 0.84375
F1 Score : 0.8709677419354839
Classification Report : 
              precision    recall  f1-score   support

           0       0.84      0.90      0.87        29
           1       0.90      0.84      0.87        32

    accuracy                           0.87        61
   macro avg       0.87      0.87      0.87        61
weighted avg       0.87      0.87      0.87        61

          Predicted 0  Predicted 1
Actual 0           26            3
Actual 1            5           27


## Model using Pipeline

In [5]:
X_train, X_test, y_train, y_test = train_test_split(df.drop(columns = 'target'),
                                                   df['target'],
                                                   test_size = 0.2,
                                                   random_state = 42) 
pipeline = Pipeline(steps=[
    ('gnb', GaussianNB())
])

pipeline.fit(X_train, y_train)
y_pred = pipeline.predict(X_test)

print(f'Accuracy Score : {accuracy_score(y_test, y_pred)}')
print(f'Precision Score : {precision_score(y_test, y_pred)}')
print(f'Recall Score : {recall_score(y_test, y_pred)}')
print(f'F1 Score : {f1_score(y_test, y_pred)}')
print(f'Classification Report : \n{classification_report(y_test, y_pred)}')
cm = confusion_matrix(y_test, y_pred)
cm_df = pd.DataFrame(cm,
                    index = ['Actual 0', 'Actual 1'],
                    columns = ['Predicted 0', 'Predicted 1'])
print(cm_df)

Accuracy Score : 0.8688524590163934
Precision Score : 0.9
Recall Score : 0.84375
F1 Score : 0.8709677419354839
Classification Report : 
              precision    recall  f1-score   support

           0       0.84      0.90      0.87        29
           1       0.90      0.84      0.87        32

    accuracy                           0.87        61
   macro avg       0.87      0.87      0.87        61
weighted avg       0.87      0.87      0.87        61

          Predicted 0  Predicted 1
Actual 0           26            3
Actual 1            5           27
