In [1]:
# Importing Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix, precision_recall_fscore_support

In [2]:
# Importing Dataset
data_set = pd.read_csv('PimaIndiansDiabetes.csv')
X = data_set.iloc[:, :-1]   # Independent Variables separated as X
y = data_set.iloc[:, -1]    # Dependent Variables into y

In [3]:
# Check for Missing Values
print(X.isnull().any())
print(y.isnull().any())

TimesPregnant           False
GlucoseConcentration    False
BloodPrs                False
SkinThickness           False
Serum                   False
BMI                     False
DiabetesFunct           False
Age                     False
dtype: bool
False


In [4]:
# Summary
print(X.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 768 entries, 0 to 767
Data columns (total 8 columns):
TimesPregnant           768 non-null int64
GlucoseConcentration    768 non-null int64
BloodPrs                768 non-null int64
SkinThickness           768 non-null int64
Serum                   768 non-null int64
BMI                     768 non-null float64
DiabetesFunct           768 non-null float64
Age                     768 non-null int64
dtypes: float64(2), int64(6)
memory usage: 48.1 KB
None


In [5]:
# 1. Splitting X,y into Train & Test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=0) 

In [6]:
# Machine: Classifier | NB: Gaussian Naive Bayes
from sklearn import metrics
classifier = GaussianNB()
classifier.fit(X_train, y_train)

# Predictions
y_pred = classifier.predict(X_test)
score = metrics.accuracy_score(y_test, y_pred)
print("Accuracy of our model is: {:.1f}%".format(score*100))

Accuracy of our model is: 79.2%


In [7]:
# Validating Predictions using Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
print(cm)

prf = precision_recall_fscore_support(y_test, y_pred)
print('\t\t\t\t ZERO\t\t\tONE')
print('Precision\t:', prf[0]*100)
print('Recall\t\t:', prf[1]*100)
print('F1 Measure\t:', prf[2]*100)
print('Support\t\t:', prf[3])


[[93 14]
 [18 29]]
				 ZERO			ONE
Precision	: [83.78378378 67.44186047]
Recall		: [86.91588785 61.70212766]
F1 Measure	: [85.32110092 64.44444444]
Support		: [107  47]
