# Naive Bayes Classifier in Python using SciKit Learn

In [1]:
# load libaries
import numpy as np
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn import metrics
import pandas as pd

In [3]:
df=pd.read_csv('DATASET.csv',names=['Age','Sex','Chest Pain Location','Blood Pressure','Cholestoral','Fasting Blood Sugar','ResTecg','Thalach',' Exang','OldPeak','Slope','CA','Thal','Diagnosis Heart Disease'])
df.head()

Unnamed: 0,Age,Sex,Chest Pain Location,Blood Pressure,Cholestoral,Fasting Blood Sugar,ResTecg,Thalach,Exang,OldPeak,Slope,CA,Thal,Diagnosis Heart Disease
0,40,1,2,140,289,0,0,172,0,0.0,-9,-9,-9,0
1,49,0,3,160,180,0,0,156,0,1.0,2,-9,-9,1
2,37,1,2,130,283,0,1,98,0,0.0,-9,-9,-9,0
3,48,0,4,138,214,0,0,108,1,1.5,2,-9,-9,3
4,54,1,3,150,-9,0,0,122,0,0.0,-9,-9,-9,0


In [4]:

#datagram convert into numpy array
sample_data = df.to_numpy()
print(sample_data)

[[40.  1.  2. ... -9. -9.  0.]
 [49.  0.  3. ... -9. -9.  1.]
 [37.  1.  2. ... -9. -9.  0.]
 ...
 [48.  1.  3. ... -9.  6.  0.]
 [47.  0.  2. ... -9. -9.  0.]
 [53.  1.  4. ... -9. -9.  0.]]


In [5]:
# Feature Variables 
features = sample_data[:,0:-1].astype('float')
print(features)

[[40.  1.  2. ... -9. -9. -9.]
 [49.  0.  3. ...  2. -9. -9.]
 [37.  1.  2. ... -9. -9. -9.]
 ...
 [48.  1.  3. ... -9. -9.  6.]
 [47.  0.  2. ...  1. -9. -9.]
 [53.  1.  4. ... -9. -9. -9.]]


In [6]:
# Target/Dependent Variable - Gender
target = sample_data[:,-1]
print(target)

[0. 1. 0. 3. 0. 0. 0. 0. 1. 0. 0. 3. 0. 3. 0. 0. 1. 0. 1. 1. 0. 0. 0. 3.
 0. 0. 0. 0. 0. 0. 3. 0. 1. 3. 0. 0. 1. 0. 0. 0. 0. 4. 0. 0. 3. 0. 0. 0.
 0. 1. 2. 4. 0. 0. 0. 0. 1. 1. 0. 3. 0. 0. 0. 1. 0. 0. 0. 0. 3. 0. 1. 0.
 2. 0. 2. 0. 2. 0. 0. 1. 0. 0. 1. 0. 2. 3. 3. 0. 2. 0. 0. 0. 0. 2. 0. 2.
 0. 0. 0. 0. 4. 0. 2. 1. 1. 0. 0. 0. 0. 0. 0. 2. 0. 0. 0. 1. 1. 1. 0. 1.
 3. 0. 0. 4. 0. 0. 0. 0. 0. 0. 0. 1. 3. 3. 0. 1. 0. 0. 2. 1. 4. 4. 4. 0.
 3. 0. 0. 0. 0. 2. 0. 0. 0. 0. 0. 4. 2. 0. 3. 0. 3. 4. 0. 0. 0. 0. 3. 4.
 0. 0. 0. 0. 0. 0. 0. 4. 4. 3. 0. 0. 0. 1. 0. 3. 0. 0. 2. 0. 1. 0. 3. 0.
 0. 0. 0. 0. 0. 0. 0. 2. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 1. 1. 0. 0. 1.
 0. 2. 0. 0. 0. 2. 1. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 4. 3. 2.
 2. 0. 2. 3. 0. 1. 0. 2. 2. 2. 3. 4. 1. 0. 0. 3. 0. 0. 0. 0. 0. 0. 0. 2.
 3. 1. 0. 3. 0. 2. 0. 0. 0. 1. 0. 0. 0. 3. 2. 0. 0. 0. 4. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0.]


In [8]:
# Split dataset into training set and test set
X_train, X_test, y_train, y_test = train_test_split(features,target, test_size=0.2,random_state=110) # 70% training and 30% test

In [10]:
# Train the Model
model = GaussianNB()
fittedModel = model.fit(X_train, y_train)

In [11]:
# Predict Disease
y_pred = fittedModel.predict(X_test)
print(y_pred)

[3. 3. 2. 2. 0. 0. 2. 0. 1. 2. 3. 0. 2. 0. 2. 3. 3. 3. 0. 0. 3. 2. 3. 0.
 3. 4. 3. 4. 0. 2. 4. 1. 3. 0. 0. 4. 0. 2. 0. 0. 0. 0. 3. 0. 4. 0. 0. 4.
 2. 2. 1. 2. 2. 2. 2. 0. 4. 0. 2.]


In [12]:
print("Accuracy:",round(metrics.accuracy_score(y_test, y_pred)*100),'%')

Accuracy: 42.0 %
