# Naive Bayes classification 
    Naive Bayes is a classification method based on the Bayes theorem. Bayes' theorem is named after its inventor, the statistician Thomas Bayes. It is a fast, accurate, robust, easy-tounderstand, and interpretable technique. It can also work faster on large datasets. Naive Bayes is effectively deployed in text mining applications such as document classification, predicting sentiments of customer reviews, and spam filtering. 

In [1]:
#create Naive Bayes classification

import pandas as pd
diabetes = pd.read_csv("datasets/diabetes.csv")
diabetes.head()

Unnamed: 0,pregnant,glucose,bp,skin,insulin,bmi,pedigree,age,label
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [3]:
#split data into two parts
features_set = ['pregnant', 'glucose', 'bp', 'insulin', 'bmi', 'age', 'pedigree']
features = diabetes[features_set]
target = diabetes.label

In [5]:
#partition intp training and test set
from sklearn.model_selection import train_test_split

features_train, features_test, target_train, target_test = train_test_split(features, target, test_size=0.3, random_state=1)

In [7]:
#import Gaussian Naive Bayes model
from sklearn.naive_bayes import GaussianNB

#create Gaussian Classifier
model = GaussianNB()
#train models
model.fit(features_train, target_train)
#forecast the target variable
predictions = model.predict(features_test)

In [8]:
#import metrics modules for performance evaluation
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score

#calc model accuracy
print("Accuracy:", accuracy_score(target_test, predictions))
#calc model precision
print("Precision:", precision_score(target_test, predictions))
#cals model recall
print("Recall:", recall_score(target_test, predictions))
#calc model f1 score
print("F1-score", f1_score(target_test, predictions))

Accuracy: 0.7748917748917749
Precision: 0.7391304347826086
Recall: 0.6
F1-score 0.6623376623376623


# Decision Tree classification


In [12]:
import pandas as pd
diabetes = pd.read_csv("datasets/diabetes.csv")
diabetes.head()

Unnamed: 0,pregnant,glucose,bp,skin,insulin,bmi,pedigree,age,label
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [13]:
#split data into two parts
features_set = ['pregnant', 'glucose', 'bp', 'insulin', 'bmi', 'age', 'pedigree']
features = diabetes[features_set]
target = diabetes.label

In [14]:
#partition intp training and test set
from sklearn.model_selection import train_test_split

features_train, features_test, target_train, target_test = train_test_split(features, target, test_size=0.3, random_state=1)

In [15]:
#import decision tree classifier
from sklearn.tree import DecisionTreeClassifier

clf = DecisionTreeClassifier()
clf = clf.fit(features_train, target_train)
predictions = clf.predict(features_test)


In [16]:
#import metrics modules for performance evaluation
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score

#calc model accuracy
print("Accuracy:", accuracy_score(target_test, predictions))
#calc model precision
print("Precision:", precision_score(target_test, predictions))
#cals model recall
print("Recall:", recall_score(target_test, predictions))
#calc model f1 score
print("F1-score", f1_score(target_test, predictions))

Accuracy: 0.7056277056277056
Precision: 0.6075949367088608
Recall: 0.5647058823529412
F1-score 0.5853658536585367


# KNN Classification

In [17]:
import pandas as pd
diabetes = pd.read_csv("datasets/diabetes.csv")
diabetes.head()

Unnamed: 0,pregnant,glucose,bp,skin,insulin,bmi,pedigree,age,label
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [18]:
#split data into two parts
features_set = ['pregnant', 'glucose', 'bp', 'insulin', 'bmi', 'age', 'pedigree']
features = diabetes[features_set]
target = diabetes.label

In [19]:
#partition intp training and test set
from sklearn.model_selection import train_test_split

features_train, features_test, target_train, target_test = train_test_split(features, target, test_size=0.3, random_state=1)

In [20]:
#import knn classification model
from sklearn.neighbors import KNeighborsClassifier

model = KNeighborsClassifier(n_neighbors=3)
model.fit(features_train, target_train)
predictions = model.predict(features_test)

In [21]:
#import metrics modules for performance evaluation
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score

#calc model accuracy
print("Accuracy:", accuracy_score(target_test, predictions))
#calc model precision
print("Precision:", precision_score(target_test, predictions))
#cals model recall
print("Recall:", recall_score(target_test, predictions))
#calc model f1 score
print("F1-score", f1_score(target_test, predictions))

Accuracy: 0.7532467532467533
Precision: 0.7058823529411765
Recall: 0.5647058823529412
F1-score 0.6274509803921569
