In [1]:
# Enable code formatting using external plugin: nb_black.
%reload_ext nb_black

<IPython.core.display.Javascript object>

# Naive Bayes Classifier - CategoricalNB

# [1] Setup

### Import and configure required libraries

In [2]:
# Data manipulation libraries
import pandas as pd

# Data modeling libraries
import sklearn
from sklearn.naive_bayes import CategoricalNB
from sklearn.preprocessing import LabelEncoder

# Library versions used in below EDA.
print("Pandas version:", pd.__version__)
print("Sklearn version:", sklearn.__version__)

# Configure Pandas.
# Set display width to maximum 130 characters in the output, post which it will continue in next line.
pd.options.display.width = 130

Pandas version: 1.4.2
Sklearn version: 1.0.2


<IPython.core.display.Javascript object>

# [2] Load Dataset

In [3]:
wthr_df = pd.read_csv("./Input/weather_forecast.csv")
wthr_df.head(14)

Unnamed: 0,Outlook,Temperature,Humidity,Windy,Play
0,Sunny,Hot,High,Weak,No
1,Sunny,Hot,High,Strong,No
2,Overcast,Hot,High,Weak,Yes
3,Rain,Mild,High,Weak,Yes
4,Rain,Cool,Normal,Weak,Yes
5,Rain,Cool,Normal,Strong,No
6,Overcast,Cool,Normal,Strong,Yes
7,Sunny,Mild,High,Weak,No
8,Sunny,Cool,Normal,Weak,Yes
9,Rain,Mild,Normal,Weak,Yes


<IPython.core.display.Javascript object>

# [3] Transform Data

In [4]:
wthr_dft = wthr_df.copy()
wthr_dft = wthr_dft.apply(LabelEncoder().fit_transform)
wthr_dft

Unnamed: 0,Outlook,Temperature,Humidity,Windy,Play
0,2,1,0,1,0
1,2,1,0,0,0
2,0,1,0,1,1
3,1,2,0,1,1
4,1,0,1,1,1
5,1,0,1,0,0
6,0,0,1,0,1
7,2,2,0,1,0
8,2,0,1,1,1
9,1,2,1,1,1


<IPython.core.display.Javascript object>

In [5]:
X_train = wthr_dft[["Outlook", "Temperature", "Humidity", "Windy"]]
y_train = wthr_dft["Play"]

<IPython.core.display.Javascript object>

# [4] Train `CategoricalNB` Model

In [6]:
nb_clfr = CategoricalNB(min_categories=[3, 3, 3, 3])
model = nb_clfr.fit(X_train.values, y_train.values)

<IPython.core.display.Javascript object>

# [5] Test Model

In [7]:
def label_decode(label):
    op = "No" if label == 0 else "Yes"
    print("Predicted output:", op)

<IPython.core.display.Javascript object>

In [8]:
# Features: Outlook, Temperature, Humidity, Windy.
# query point: 'Sunny', 'Cool', 'High', 'Strong'.
x_q = [2, 0, 0, 0]

label_decode(model.predict([x_q]))

Predicted output: No


<IPython.core.display.Javascript object>

# [6] Testing Additive Smoothing

In [9]:
# Features: Outlook, Temperature, Humidity, Windy.
# query point: 'Sunny', 'Cool', 'Low', 'Strong'.
x_q = [2, 0, 2, 0]

label_decode(model.predict([x_q]))

Predicted output: No


<IPython.core.display.Javascript object>

In [10]:
# Features: Outlook, Temperature, Humidity, Windy.
# query point: 'Sunny', 'Cool', 'Low', 'Weak'.
x_q = [2, 0, 2, 1]

label_decode(model.predict([x_q]))

Predicted output: Yes


<IPython.core.display.Javascript object>