# **Naive Bayes**

In [48]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [49]:
data = pd.read_csv("play_tennis.csv")

display(data)

Unnamed: 0,day,outlook,temperature,humidity,wind,play
0,D1,Sunny,Hot,High,Weak,No
1,D2,Sunny,Hot,High,Strong,No
2,D3,Overcast,Hot,High,Weak,Yes
3,D4,Rain,Mild,High,Weak,Yes
4,D5,Rain,Cool,Normal,Weak,Yes
5,D6,Rain,Cool,Normal,Strong,No
6,D7,Overcast,Cool,Normal,Strong,Yes
7,D8,Sunny,Mild,High,Weak,No
8,D9,Sunny,Cool,Normal,Weak,Yes
9,D10,Rain,Mild,Normal,Weak,Yes


In [50]:
data.drop(columns=['day'], inplace=True)

display(data)

Unnamed: 0,outlook,temperature,humidity,wind,play
0,Sunny,Hot,High,Weak,No
1,Sunny,Hot,High,Strong,No
2,Overcast,Hot,High,Weak,Yes
3,Rain,Mild,High,Weak,Yes
4,Rain,Cool,Normal,Weak,Yes
5,Rain,Cool,Normal,Strong,No
6,Overcast,Cool,Normal,Strong,Yes
7,Sunny,Mild,High,Weak,No
8,Sunny,Cool,Normal,Weak,Yes
9,Rain,Mild,Normal,Weak,Yes


In [51]:
X = data.iloc[:, 0:4]
Y = data.iloc[:, -1]

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, shuffle=False)

In [52]:
PosProb = Y_train[Y_train == "Yes"].count() / Y_train.count()
NegProb = Y_train[Y_train == "No"].count() / Y_train.count()

print(PosProb)
print(NegProb)

0.6
0.4


In [53]:
features = [{col : list(X_train[col].unique())} for col in X_train.columns]

display(features)

[{'outlook': ['Sunny', 'Overcast', 'Rain']},
 {'temperature': ['Hot', 'Mild', 'Cool']},
 {'humidity': ['High', 'Normal']},
 {'wind': ['Weak', 'Strong']}]

In [54]:
featureProbs = {}

for feature in features:
    for k, possibilities in feature.items():
        d = {k : {}}
        for possibility in possibilities:
            positive = X_train.loc[(X_train[k] == possibility) & (Y_train == "Yes")][k].count() / Y_train[Y_train == "Yes"].count()
            negative = X_train.loc[(X_train[k] == possibility) & (Y_train == "No")][k].count() / Y_train[Y_train == "No"].count()
            d[k].update({possibility : [positive, negative]})
        featureProbs.update(d)

In [57]:
for row in range(X_test.shape[0]):
    classifyP = PosProb
    classifyN = PosProb
    display(X_test.iloc[row])
    for col in X_test.columns:
        classifyP *= featureProbs[col][X_test.iloc[row][col]][0]
        classifyN *= featureProbs[col][X_test.iloc[row][col]][1]
    print("Predicted: ", 'P' if classifyP > classifyN else 'N')
    print("Actual test: ", Y_test.iloc[row])

outlook         Sunny
temperature      Mild
humidity       Normal
wind           Strong
Name: 10, dtype: object

Predicted:  N
Actual test:  Yes


outlook        Overcast
temperature        Mild
humidity           High
wind             Strong
Name: 11, dtype: object

Predicted:  P
Actual test:  Yes


outlook        Overcast
temperature         Hot
humidity         Normal
wind               Weak
Name: 12, dtype: object

Predicted:  P
Actual test:  Yes


outlook          Rain
temperature      Mild
humidity         High
wind           Strong
Name: 13, dtype: object

Predicted:  N
Actual test:  No
