In [None]:
#Step 1: Importing the Libraries
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns # for statistical data visualization
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score,confusion_matrix
from sklearn import preprocessing

In [None]:
#Step 2: Reading & Loading the Dataset
df = pd.read_csv('/kaggle/input/modified2/New_heart.csv',header=None)
df.shape

(303, 14)

In [None]:
col_names = ['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach','exang', 'oldpeak', 'slope', 'ca', 'thal', 'target']
df.columns = col_names

In [None]:
# Attribute Information
# 1) age
# 2) sex
# 3) cp = chest pain type (4 values)
# 4) trestbps = resting blood pressure
# 5) chol = serum cholestoral in mg/dl
# 6) fbs = fasting blood sugar > 120 mg/dl
# 7) restecg = resting electrocardiographic results (values 0,1,2)
# 8) thalach = maximum heart rate achieved
# 9) exang = exercise induced angina
# 10) oldpeak = ST depression induced by exercise relative to rest
# 11) slope = the slope of the peak exercise ST segment
# 12) ca = number of major vessels (0-3) colored by flourosopy
# 13) thal: 0 = normal; 1 = fixed defect; 2 = reversable defect
# 14) target: 0= less chance of heart attack 1= more chance of heart attack

In [None]:
df.head(10)

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1
5,57,1,0,140,192,0,1,148,0,0.4,1,0,1,1
6,56,0,1,140,294,0,0,153,0,1.3,1,0,2,1
7,44,1,1,120,263,0,1,173,0,0.0,2,0,3,1
8,52,1,2,172,199,1,1,162,0,0.5,2,0,3,1
9,57,1,2,150,168,0,1,174,0,1.6,2,0,2,1


In [None]:
df.describe()
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 303 entries, 0 to 302
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       303 non-null    int64  
 1   sex       303 non-null    int64  
 2   cp        303 non-null    int64  
 3   trestbps  303 non-null    int64  
 4   chol      303 non-null    int64  
 5   fbs       303 non-null    int64  
 6   restecg   303 non-null    int64  
 7   thalach   303 non-null    int64  
 8   exang     303 non-null    int64  
 9   oldpeak   303 non-null    float64
 10  slope     303 non-null    int64  
 11  ca        303 non-null    int64  
 12  thal      303 non-null    int64  
 13  target    303 non-null    int64  
dtypes: float64(1), int64(13)
memory usage: 33.3 KB


In [None]:
X = df.drop(['target'], axis=1)
X = preprocessing.scale(X)
Y = df['target']

In [None]:
# Step 3: Train the model
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.3, random_state = 0)

In [None]:
X_train.shape, X_test.shape

((212, 13), (91, 13))

In [None]:
# train a Gaussian Naive Bayes classifier on the training set
# instantiate the model
gnb = GaussianNB()

# fit the model
gnb.fit(X_train, Y_train)

# Step 4: Prediction
Y_pred = gnb.predict(X_test)
Y_pred_train = gnb.predict(X_train)

In [None]:
# print the scores on training and test set
Train_Accuracy=accuracy_score(Y_train,Y_pred_train)*100
#print('Accuracy of our model is equal (Train) '+str(round(Train_Accuracy,2))+' %')

accuracy = accuracy_score(Y_test,Y_pred)*100
print('Accuracy of our model is equal  '+str(round(accuracy,2))+' %')

Accuracy of our model is equal  80.22 %


In [None]:
cm = confusion_matrix(Y_test,Y_pred)
print(cm)

[[32 12]
 [ 6 41]]


In [None]:
# check class distribution in test set
Y_test.value_counts()

1    47
0    44
Name: target, dtype: int64

In [None]:
for X,Y in zip(X_test, Y_test):
	print(f"Predicted Target:{gnb.predict([X])[0]}, Actual Target: {Y}")

Predicted Target:0, Actual Target: 0
Predicted Target:1, Actual Target: 1
Predicted Target:1, Actual Target: 0
Predicted Target:0, Actual Target: 0
Predicted Target:0, Actual Target: 1
Predicted Target:1, Actual Target: 0
Predicted Target:0, Actual Target: 0
Predicted Target:0, Actual Target: 0
Predicted Target:0, Actual Target: 0
Predicted Target:0, Actual Target: 0
Predicted Target:1, Actual Target: 1
Predicted Target:1, Actual Target: 1
Predicted Target:0, Actual Target: 0
Predicted Target:1, Actual Target: 1
Predicted Target:1, Actual Target: 1
Predicted Target:1, Actual Target: 1
Predicted Target:0, Actual Target: 1
Predicted Target:1, Actual Target: 1
Predicted Target:0, Actual Target: 0
Predicted Target:1, Actual Target: 1
Predicted Target:1, Actual Target: 1
Predicted Target:1, Actual Target: 0
Predicted Target:0, Actual Target: 0
Predicted Target:0, Actual Target: 0
Predicted Target:1, Actual Target: 1
Predicted Target:0, Actual Target: 0
Predicted Target:0, Actual Target: 0
P