## # Machine Learning - Naive Bayes Algorithm

### Step-01: Import all the libraries required to perform Gaussian Naive Bayes

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")

### Step-02: Import load_iris from sklearn.datasets

In [2]:
from sklearn.datasets import load_iris

In [3]:
dataset = load_iris()

In [5]:
print(dataset.DESCR)

.. _iris_dataset:

Iris plants dataset
--------------------

**Data Set Characteristics:**

    :Number of Instances: 150 (50 in each of three classes)
    :Number of Attributes: 4 numeric, predictive attributes and the class
    :Attribute Information:
        - sepal length in cm
        - sepal width in cm
        - petal length in cm
        - petal width in cm
        - class:
                - Iris-Setosa
                - Iris-Versicolour
                - Iris-Virginica
                
    :Summary Statistics:

                    Min  Max   Mean    SD   Class Correlation
    sepal length:   4.3  7.9   5.84   0.83    0.7826
    sepal width:    2.0  4.4   3.05   0.43   -0.4194
    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)
    petal width:    0.1  2.5   1.20   0.76    0.9565  (high!)

    :Missing Attribute Values: None
    :Class Distribution: 33.3% for each of 3 classes.
    :Creator: R.A. Fisher
    :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
    :

In [6]:
dataset.keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename', 'data_module'])

In [7]:
dataset.data

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [5.4, 3.7, 1.5, 0.2],
       [4.8, 3.4, 1.6, 0.2],
       [4.8, 3. , 1.4, 0.1],
       [4.3, 3. , 1.1, 0.1],
       [5.8, 4. , 1.2, 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [5.4, 3.9, 1.3, 0.4],
       [5.1, 3.5, 1.4, 0.3],
       [5.7, 3.8, 1.7, 0.3],
       [5.1, 3.8, 1.5, 0.3],
       [5.4, 3.4, 1.7, 0.2],
       [5.1, 3.7, 1.5, 0.4],
       [4.6, 3.6, 1. , 0.2],
       [5.1, 3.3, 1.7, 0.5],
       [4.8, 3.4, 1.9, 0.2],
       [5. , 3. , 1.6, 0.2],
       [5. , 3.4, 1.6, 0.4],
       [5.2, 3.5, 1.5, 0.2],
       [5.2, 3.4, 1.4, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [5.4, 3.4, 1.5, 0.4],
       [5.2, 4.1, 1.5, 0.1],
       [5.5, 4.2, 1.4, 0.2],
       [4.9, 3

In [8]:
dataset.feature_names

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

In [9]:
dataset.target

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [10]:
dataset.target_names

array(['setosa', 'versicolor', 'virginica'], dtype='<U10')

In [16]:
x = pd.DataFrame(data = dataset.data, columns = dataset.feature_names)
y = dataset.target

In [17]:
x.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [21]:
print("{} \n" .format(x.head()))
print("x.shape: {} \n" .format(x.shape))

   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
0                5.1               3.5                1.4               0.2
1                4.9               3.0                1.4               0.2
2                4.7               3.2                1.3               0.2
3                4.6               3.1                1.5               0.2
4                5.0               3.6                1.4               0.2 

x.shape: (150, 4) 



In [22]:
y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [23]:
print("{} \n" .format(y))
print("y.shape {} \n" .format(y.shape))

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2] 

y.shape (150,) 



### Step-03: Import train_test_split from sklearn.model_selection

In [24]:
from sklearn.model_selection import train_test_split

In [25]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.33)

In [26]:
x_train.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
138,6.0,3.0,4.8,1.8
7,5.0,3.4,1.5,0.2
112,6.8,3.0,5.5,2.1
56,6.3,3.3,4.7,1.6
16,5.4,3.9,1.3,0.4


In [27]:
print("{} \n" .format(x_train.head()))
print("x_train.shape: {} \n" .format(x_train.shape))

     sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
138                6.0               3.0                4.8               1.8
7                  5.0               3.4                1.5               0.2
112                6.8               3.0                5.5               2.1
56                 6.3               3.3                4.7               1.6
16                 5.4               3.9                1.3               0.4 

x_train.shape: (100, 4) 



In [28]:
x_test.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
38,4.4,3.0,1.3,0.2
136,6.3,3.4,5.6,2.4
132,6.4,2.8,5.6,2.2
133,6.3,2.8,5.1,1.5
109,7.2,3.6,6.1,2.5


In [29]:
print("{} \n" .format(x_test.head()))
print("x_train.shape: {} \n" .format(x_test.shape))

     sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
38                 4.4               3.0                1.3               0.2
136                6.3               3.4                5.6               2.4
132                6.4               2.8                5.6               2.2
133                6.3               2.8                5.1               1.5
109                7.2               3.6                6.1               2.5 

x_train.shape: (50, 4) 



In [31]:
y_train

array([2, 0, 2, 1, 0, 0, 2, 0, 0, 1, 1, 1, 0, 0, 0, 1, 2, 1, 2, 1, 2, 1,
       0, 0, 0, 1, 2, 1, 1, 0, 1, 2, 2, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0,
       0, 1, 0, 0, 1, 1, 2, 2, 0, 2, 0, 2, 1, 1, 0, 1, 2, 0, 2, 2, 2, 2,
       2, 2, 2, 2, 1, 2, 0, 1, 2, 1, 0, 2, 1, 0, 0, 2, 0, 2, 1, 1, 2, 2,
       0, 2, 1, 2, 1, 1, 1, 0, 0, 0, 0, 2])

In [32]:
print("{} \n" .format(y_train))
print("x_train.shape: {} \n" .format(y_train.shape))

[2 0 2 1 0 0 2 0 0 1 1 1 0 0 0 1 2 1 2 1 2 1 0 0 0 1 2 1 1 0 1 2 2 1 1 0 1
 0 1 0 1 0 0 0 0 1 0 0 1 1 2 2 0 2 0 2 1 1 0 1 2 0 2 2 2 2 2 2 2 2 1 2 0 1
 2 1 0 2 1 0 0 2 0 2 1 1 2 2 0 2 1 2 1 1 1 0 0 0 0 2] 

x_train.shape: (100,) 



In [33]:
print("{} \n" .format(y_test))
print("x_train.shape: {} \n" .format(y_test.shape))

[0 2 2 2 2 0 2 2 0 1 2 2 0 0 1 2 2 1 1 2 2 1 0 1 2 0 1 1 2 1 1 1 2 1 0 2 0
 1 0 2 0 0 0 1 1 2 1 1 0 0] 

x_train.shape: (50,) 



### Step-04: Import GaussianNB from sklearn.naive_bayes

In [34]:
from sklearn.naive_bayes import GaussianNB

In [35]:
gaussian_nb = GaussianNB()

In [36]:
gaussian_nb.fit(x_train, y_train)

In [37]:
y_pred = gaussian_nb.predict(x_test)

In [38]:
y_pred

array([0, 2, 2, 1, 2, 0, 2, 2, 0, 1, 2, 2, 0, 0, 1, 2, 2, 1, 1, 2, 2, 1,
       0, 1, 2, 0, 1, 1, 2, 1, 2, 1, 2, 1, 0, 2, 0, 1, 0, 2, 0, 0, 0, 1,
       1, 2, 1, 2, 0, 0])

### Step-05: Import confusion_matrix, accuracy_score, classification_report from sklearn.metrics

In [39]:
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

In [41]:
print("Confusion Matrix: \n\n {} \n" .format(confusion_matrix(y_test, y_pred)))
print("Accuracy Score: {} \n" .format(accuracy_score(y_test, y_pred)))
print("Classification Report: \n\n {} \n" .format(classification_report(y_test, y_pred)))

Confusion Matrix: 

 [[15  0  0]
 [ 0 15  2]
 [ 0  1 17]] 

Accuracy Score: 0.94 

Classification Report: 

               precision    recall  f1-score   support

           0       1.00      1.00      1.00        15
           1       0.94      0.88      0.91        17
           2       0.89      0.94      0.92        18

    accuracy                           0.94        50
   macro avg       0.94      0.94      0.94        50
weighted avg       0.94      0.94      0.94        50
 

