## # Machine Learning - Naive Bayes Algorithm

### Step-01: Import all the libraries required to perform Gaussian Naive Bayes, Multinomial Naive Bayes

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")

### Step-02: Import tips dataset from sns.load_datasets

In [2]:
df = sns.load_dataset("tips")

In [3]:
df

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.50,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4
...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3
240,27.18,2.00,Female,Yes,Sat,Dinner,2
241,22.67,2.00,Male,Yes,Sat,Dinner,2
242,17.82,1.75,Male,No,Sat,Dinner,2


In [4]:
df["time"].unique()

['Dinner', 'Lunch']
Categories (2, object): ['Lunch', 'Dinner']

In [5]:
df["sex"].unique()

['Female', 'Male']
Categories (2, object): ['Male', 'Female']

In [6]:
df["smoker"].unique()

['No', 'Yes']
Categories (2, object): ['Yes', 'No']

### Step-03: Import LabelEncoder from sklearn.preprocessing

In [7]:
from sklearn.preprocessing import OneHotEncoder

In [8]:
nominal_encoder = OneHotEncoder()

In [9]:
nominal_encoder.fit_transform(df[["sex", "smoker"]])

<244x4 sparse matrix of type '<class 'numpy.float64'>'
	with 488 stored elements in Compressed Sparse Row format>

In [10]:
nominal_encoded = nominal_encoder.fit_transform(df[["sex", "smoker"]]).toarray()

In [11]:
nominal_encoded

array([[1., 0., 1., 0.],
       [0., 1., 1., 0.],
       [0., 1., 1., 0.],
       [0., 1., 1., 0.],
       [1., 0., 1., 0.],
       [0., 1., 1., 0.],
       [0., 1., 1., 0.],
       [0., 1., 1., 0.],
       [0., 1., 1., 0.],
       [0., 1., 1., 0.],
       [0., 1., 1., 0.],
       [1., 0., 1., 0.],
       [0., 1., 1., 0.],
       [0., 1., 1., 0.],
       [1., 0., 1., 0.],
       [0., 1., 1., 0.],
       [1., 0., 1., 0.],
       [0., 1., 1., 0.],
       [1., 0., 1., 0.],
       [0., 1., 1., 0.],
       [0., 1., 1., 0.],
       [1., 0., 1., 0.],
       [1., 0., 1., 0.],
       [0., 1., 1., 0.],
       [0., 1., 1., 0.],
       [0., 1., 1., 0.],
       [0., 1., 1., 0.],
       [0., 1., 1., 0.],
       [0., 1., 1., 0.],
       [1., 0., 1., 0.],
       [0., 1., 1., 0.],
       [0., 1., 1., 0.],
       [1., 0., 1., 0.],
       [1., 0., 1., 0.],
       [0., 1., 1., 0.],
       [0., 1., 1., 0.],
       [0., 1., 1., 0.],
       [1., 0., 1., 0.],
       [0., 1., 1., 0.],
       [0., 1., 1., 0.],


In [12]:
df_encoded = pd.DataFrame(data = nominal_encoded, columns = nominal_encoder.get_feature_names_out())

In [13]:
df_encoded

Unnamed: 0,sex_Female,sex_Male,smoker_No,smoker_Yes
0,1.0,0.0,1.0,0.0
1,0.0,1.0,1.0,0.0
2,0.0,1.0,1.0,0.0
3,0.0,1.0,1.0,0.0
4,1.0,0.0,1.0,0.0
...,...,...,...,...
239,0.0,1.0,1.0,0.0
240,1.0,0.0,0.0,1.0
241,0.0,1.0,0.0,1.0
242,0.0,1.0,1.0,0.0


In [14]:
df.drop(["sex", "smoker"], axis = 1, inplace = True)

In [15]:
df = pd.concat([df, df_encoded], axis = 1)

In [16]:
df

Unnamed: 0,total_bill,tip,day,time,size,sex_Female,sex_Male,smoker_No,smoker_Yes
0,16.99,1.01,Sun,Dinner,2,1.0,0.0,1.0,0.0
1,10.34,1.66,Sun,Dinner,3,0.0,1.0,1.0,0.0
2,21.01,3.50,Sun,Dinner,3,0.0,1.0,1.0,0.0
3,23.68,3.31,Sun,Dinner,2,0.0,1.0,1.0,0.0
4,24.59,3.61,Sun,Dinner,4,1.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...
239,29.03,5.92,Sat,Dinner,3,0.0,1.0,1.0,0.0
240,27.18,2.00,Sat,Dinner,2,1.0,0.0,0.0,1.0
241,22.67,2.00,Sat,Dinner,2,0.0,1.0,0.0,1.0
242,17.82,1.75,Sat,Dinner,2,0.0,1.0,1.0,0.0


In [17]:
from sklearn.preprocessing import LabelEncoder

In [18]:
label_encoding = LabelEncoder()

In [21]:
label_encoded = label_encoding.fit_transform(df["time"])

In [22]:
label_encoded

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0])

In [23]:
df_label_encoded = pd.DataFrame(data = label_encoded, columns = ["time"])

In [24]:
df_label_encoded

Unnamed: 0,time
0,0
1,0
2,0
3,0
4,0
...,...
239,0
240,0
241,0
242,0


In [25]:
df.drop("time", axis = 1, inplace = True)

In [26]:
df = pd.concat([df, df_label_encoded], axis = 1)

In [27]:
df

Unnamed: 0,total_bill,tip,day,size,sex_Female,sex_Male,smoker_No,smoker_Yes,time
0,16.99,1.01,Sun,2,1.0,0.0,1.0,0.0,0
1,10.34,1.66,Sun,3,0.0,1.0,1.0,0.0,0
2,21.01,3.50,Sun,3,0.0,1.0,1.0,0.0,0
3,23.68,3.31,Sun,2,0.0,1.0,1.0,0.0,0
4,24.59,3.61,Sun,4,1.0,0.0,1.0,0.0,0
...,...,...,...,...,...,...,...,...,...
239,29.03,5.92,Sat,3,0.0,1.0,1.0,0.0,0
240,27.18,2.00,Sat,2,1.0,0.0,0.0,1.0,0
241,22.67,2.00,Sat,2,0.0,1.0,0.0,1.0,0
242,17.82,1.75,Sat,2,0.0,1.0,1.0,0.0,0


In [28]:
df.drop(["day"], axis = 1, inplace = True)

In [29]:
df

Unnamed: 0,total_bill,tip,size,sex_Female,sex_Male,smoker_No,smoker_Yes,time
0,16.99,1.01,2,1.0,0.0,1.0,0.0,0
1,10.34,1.66,3,0.0,1.0,1.0,0.0,0
2,21.01,3.50,3,0.0,1.0,1.0,0.0,0
3,23.68,3.31,2,0.0,1.0,1.0,0.0,0
4,24.59,3.61,4,1.0,0.0,1.0,0.0,0
...,...,...,...,...,...,...,...,...
239,29.03,5.92,3,0.0,1.0,1.0,0.0,0
240,27.18,2.00,2,1.0,0.0,0.0,1.0,0
241,22.67,2.00,2,0.0,1.0,0.0,1.0,0
242,17.82,1.75,2,0.0,1.0,1.0,0.0,0


In [37]:
x_gaussian = df[["total_bill", "tip", "size"]]
y_gaussian = np.array(list(df["time"]))

In [38]:
x_gaussian

Unnamed: 0,total_bill,tip,size
0,16.99,1.01,2
1,10.34,1.66,3
2,21.01,3.50,3
3,23.68,3.31,2
4,24.59,3.61,4
...,...,...,...
239,29.03,5.92,3
240,27.18,2.00,2
241,22.67,2.00,2
242,17.82,1.75,2


In [39]:
print("x_gaussian.head(): \n\n {} \n" .format(x_gaussian.head()))
print("x_gaussian.shape: {}" .format(x_gaussian.shape))

x_gaussian.head(): 

    total_bill   tip  size
0       16.99  1.01     2
1       10.34  1.66     3
2       21.01  3.50     3
3       23.68  3.31     2
4       24.59  3.61     4 

x_gaussian.shape: (244, 3)


In [40]:
y_gaussian

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0])

In [41]:
print("y_gaussian: \n\n {} \n" .format(y_gaussian))
print("y_gaussian.shape: {}" .format(y_gaussian.shape))

y_gaussian: 

 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1
 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] 

y_gaussian.shape: (244,)


In [43]:
x_multinomial = df[["sex_Female", "sex_Male", "smoker_No", "smoker_Yes"]]
y_multinomial = np.array(list(df["time"]))

In [44]:
x_multinomial

Unnamed: 0,sex_Female,sex_Male,smoker_No,smoker_Yes
0,1.0,0.0,1.0,0.0
1,0.0,1.0,1.0,0.0
2,0.0,1.0,1.0,0.0
3,0.0,1.0,1.0,0.0
4,1.0,0.0,1.0,0.0
...,...,...,...,...
239,0.0,1.0,1.0,0.0
240,1.0,0.0,0.0,1.0
241,0.0,1.0,0.0,1.0
242,0.0,1.0,1.0,0.0


In [45]:
print("x_multinomial.head(): \n\n {} \n" .format(x_multinomial.head()))
print("x_multinomial.shape: {}" .format(x_multinomial.shape))

x_multinomial.head(): 

    sex_Female  sex_Male  smoker_No  smoker_Yes
0         1.0       0.0        1.0         0.0
1         0.0       1.0        1.0         0.0
2         0.0       1.0        1.0         0.0
3         0.0       1.0        1.0         0.0
4         1.0       0.0        1.0         0.0 

x_multinomial.shape: (244, 4)


In [46]:
y_multinomial

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0])

In [48]:
print("y_multinomial: \n\n {} \n" .format(y_multinomial))
print("y_multinomial.shape: {}" .format(y_multinomial.shape))

y_multinomial: 

 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1
 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] 

y_multinomial.shape: (244,)


### Step-04: Import train_test_split from sklearn.model_selection

In [49]:
from sklearn.model_selection import train_test_split

In [50]:
x_gaussian_train, x_gaussian_test, y_gaussian_train, y_gaussian_test = train_test_split(x_gaussian, y_gaussian, test_size = 0.25)

In [51]:
x_multinomial_train, x_multinomial_test, y_multinomial_train, y_multinomial_test = train_test_split(x_multinomial, y_multinomial, test_size = 0.25)

In [52]:
x_gaussian_train.head()

Unnamed: 0,total_bill,tip,size
204,20.53,4.0,4
220,12.16,2.2,2
57,26.41,1.5,2
48,28.55,2.05,3
126,8.52,1.48,2


In [53]:
print("x_gaussian_train.head(): \n\n {} \n" .format(x_gaussian_train.head()))
print("x_gaussian_train.shape: {}" .format(x_gaussian_train.shape))

x_gaussian_train.head(): 

      total_bill   tip  size
204       20.53  4.00     4
220       12.16  2.20     2
57        26.41  1.50     2
48        28.55  2.05     3
126        8.52  1.48     2 

x_gaussian_train.shape: (183, 3)


In [54]:
x_gaussian_test.head()

Unnamed: 0,total_bill,tip,size
171,15.81,3.16,2
43,9.68,1.32,2
60,20.29,3.21,2
7,26.88,3.12,4
80,19.44,3.0,2


In [55]:
print("x_gaussian_test.head(): \n\n {} \n" .format(x_gaussian_test.head()))
print("x_gaussian_test.shape: {}" .format(x_gaussian_test.shape))

x_gaussian_test.head(): 

      total_bill   tip  size
171       15.81  3.16     2
43         9.68  1.32     2
60        20.29  3.21     2
7         26.88  3.12     4
80        19.44  3.00     2 

x_gaussian_test.shape: (61, 3)


In [56]:
y_gaussian_train

array([1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0,
       0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0,
       0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1,
       0, 0, 0, 0, 1, 1, 0])

In [57]:
print("y_gaussian_train: \n\n {} \n" .format(y_gaussian_train))
print("y_gaussian_train.shape: {}" .format(y_gaussian_train.shape))

y_gaussian_train: 

 [1 1 0 0 1 0 1 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 1 0 1 1 0 0 1 1 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 1 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 1 0 1 0 0 0
 1 1 0 1 1 0 0 0 0 0 0 0 1 0 0 1 0 1 1 1 0 0 0 0 0 0 1 1 0 0 0 1 0 0 0 0 0
 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 1 1 0 0 1 0 0 1 0 0 1 0 0 0 0 1 1 0] 

y_gaussian_train.shape: (183,)


In [58]:
y_gaussian_test

array([0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0,
       1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0,
       0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1])

In [59]:
print("y_gaussian_test: \n\n {} \n" .format(y_gaussian_test))
print("y_gaussian_test.shape: {}" .format(y_gaussian_test.shape))

y_gaussian_test: 

 [0 0 0 0 1 0 0 0 1 0 0 0 1 1 0 1 1 0 0 1 0 0 1 0 0 0 0 1 1 0 0 1 0 0 1 1 0
 1 1 0 0 0 0 0 0 0 1 1 0 0 1 1 1 0 0 0 1 0 0 1 1] 

y_gaussian_test.shape: (61,)


In [61]:
x_multinomial_train.head()

Unnamed: 0,sex_Female,sex_Male,smoker_No,smoker_Yes
29,1.0,0.0,1.0,0.0
215,1.0,0.0,0.0,1.0
218,0.0,1.0,0.0,1.0
60,0.0,1.0,0.0,1.0
191,1.0,0.0,0.0,1.0


In [62]:
print("x_multinomial_train.head(): \n\n {} \n" .format(x_multinomial_train.head()))
print("x_multinomial_train.shape: {}" .format(x_multinomial_train.shape))

x_multinomial_train.head(): 

      sex_Female  sex_Male  smoker_No  smoker_Yes
29          1.0       0.0        1.0         0.0
215         1.0       0.0        0.0         1.0
218         0.0       1.0        0.0         1.0
60          0.0       1.0        0.0         1.0
191         1.0       0.0        0.0         1.0 

x_multinomial_train.shape: (183, 4)


In [63]:
x_multinomial_test.head()

Unnamed: 0,sex_Female,sex_Male,smoker_No,smoker_Yes
87,0.0,1.0,1.0,0.0
211,0.0,1.0,0.0,1.0
64,0.0,1.0,1.0,0.0
154,0.0,1.0,1.0,0.0
132,1.0,0.0,1.0,0.0


In [64]:
print("x_multinomial_test.head(): \n\n {} \n" .format(x_multinomial_test.head()))
print("x_multinomial_test.shape: {}" .format(x_multinomial_test.shape))

x_multinomial_test.head(): 

      sex_Female  sex_Male  smoker_No  smoker_Yes
87          0.0       1.0        1.0         0.0
211         0.0       1.0        0.0         1.0
64          0.0       1.0        1.0         0.0
154         0.0       1.0        1.0         0.0
132         1.0       0.0        1.0         0.0 

x_multinomial_test.shape: (61, 4)


In [67]:
y_multinomial_train

array([0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0,
       1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0,
       0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0,
       0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1,
       1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1,
       0, 0, 0, 0, 0, 0, 0])

In [65]:
print("y_multinomial_train: \n\n {} \n" .format(y_multinomial_train))
print("y_multinomial_train.shape: {}" .format(y_multinomial_train.shape))

y_multinomial_train: 

 [0 0 0 0 1 0 0 0 0 0 0 1 0 1 0 1 1 0 0 1 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 1
 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 1 1 1 1 0 0 0 0 0 0 1 0 0 0 1 0 0 0 1 1 0 0
 1 1 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0
 0 0 0 0 1 0 0 0 1 0 1 1 1 0 1 0 0 0 1 0 0 0 1 1 0 0 1 1 1 0 0 0 0 1 0 1 1
 0 1 0 1 0 1 1 0 1 0 1 1 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0] 

y_multinomial_train.shape: (183,)


In [66]:
y_multinomial_test

array([1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1,
       1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0])

In [68]:
print("y_multinomial_test: \n\n {} \n" .format(y_multinomial_test))
print("y_multinomial_test.shape: {}" .format(y_multinomial_test.shape))

y_multinomial_test: 

 [1 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 1 0 1 0 1 1 1 1 0 0 0 1 1 0 1 0 1 1 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0] 

y_multinomial_test.shape: (61,)


### Step-05: Import GaussianNB from sklearn.naive_bayes

In [69]:
from sklearn.naive_bayes import GaussianNB

In [70]:
gaussian_nb = GaussianNB()

In [71]:
gaussian_nb.fit(x_gaussian_train, y_gaussian_train)

In [72]:
y_gaussian_pred = gaussian_nb.predict(x_gaussian_test)

In [73]:
y_gaussian_pred

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [74]:
from sklearn.naive_bayes import MultinomialNB

In [75]:
multinomial_nb = MultinomialNB()

In [76]:
multinomial_nb.fit(x_multinomial_train, y_multinomial_train)

In [77]:
y_multinomial_pred = multinomial_nb.predict(x_multinomial_test)

In [78]:
y_multinomial_pred

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

### Step-06: Import confusion_matrix, accuracy_score, classification_report from sklearn.metrics

In [79]:
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

In [80]:
print("Confusion Matrix: \n\n {} \n" .format(confusion_matrix(y_gaussian_test, y_gaussian_pred)))
print("Accuracy Score: {} \n" .format(accuracy_score(y_gaussian_test, y_gaussian_pred)))
print("Classification Report: \n\n {} \n" .format(classification_report(y_gaussian_test, y_gaussian_pred)))

Confusion Matrix: 

 [[38  0]
 [23  0]] 

Accuracy Score: 0.6229508196721312 

Classification Report: 

               precision    recall  f1-score   support

           0       0.62      1.00      0.77        38
           1       0.00      0.00      0.00        23

    accuracy                           0.62        61
   macro avg       0.31      0.50      0.38        61
weighted avg       0.39      0.62      0.48        61
 



In [81]:
print("Confusion Matrix: \n\n {} \n" .format(confusion_matrix(y_multinomial_test, y_multinomial_pred)))
print("Accuracy Score: {} \n" .format(accuracy_score(y_multinomial_test, y_multinomial_pred)))
print("Classification Report: \n\n {} \n" .format(classification_report(y_multinomial_test, y_multinomial_pred)))

Confusion Matrix: 

 [[46  0]
 [15  0]] 

Accuracy Score: 0.7540983606557377 

Classification Report: 

               precision    recall  f1-score   support

           0       0.75      1.00      0.86        46
           1       0.00      0.00      0.00        15

    accuracy                           0.75        61
   macro avg       0.38      0.50      0.43        61
weighted avg       0.57      0.75      0.65        61
 

