<a href="https://colab.research.google.com/github/AkshayKohad/ML-Tutorial/blob/main/Naive_Bayes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [53]:
import numpy as np
import pandas as pd

In [54]:
golf = pd.read_csv("golf.csv")

In [55]:
golf

Unnamed: 0,Outlook,Temperature,Humidity,Windy,Play
0,sunny,hot,high,False,no
1,sunny,hot,high,True,no
2,overcast,hot,high,False,yes
3,rainy,mild,high,False,yes
4,rainy,cool,normal,False,yes
5,rainy,cool,normal,True,no
6,overcast,cool,normal,True,yes
7,sunny,mild,high,False,no
8,sunny,cool,normal,False,yes
9,rainy,mild,normal,False,yes


In [56]:
def prior_prob(golf,label):
  total_examples = golf.shape[0]
  class_examples = (golf['Play']==label).sum()


  return class_examples/total_examples

In [57]:
PRIOR = {
    'yes':prior_prob(golf,"yes"),
    'no' :prior_prob(golf,"no")
}

print(PRIOR)

{'yes': 0.6428571428571429, 'no': 0.35714285714285715}


In [58]:
golf[golf['Play']=='yes']

Unnamed: 0,Outlook,Temperature,Humidity,Windy,Play
2,overcast,hot,high,False,yes
3,rainy,mild,high,False,yes
4,rainy,cool,normal,False,yes
6,overcast,cool,normal,True,yes
8,sunny,cool,normal,False,yes
9,rainy,mild,normal,False,yes
10,sunny,mild,normal,True,yes
11,overcast,mild,high,True,yes
12,overcast,hot,normal,False,yes


In [59]:
golf[golf['Play']=='no']

Unnamed: 0,Outlook,Temperature,Humidity,Windy,Play
0,sunny,hot,high,False,no
1,sunny,hot,high,True,no
5,rainy,cool,normal,True,no
7,sunny,mild,high,False,no
13,rainy,mild,high,True,no


#### Conditional Probablity

In [60]:
def cond_prob(golf,feature,feature_value,label):
  filtered_data = golf[golf['Play']==label]
  numerator = np.sum(filtered_data[feature] == feature_value)
  denominator = filtered_data.shape[0]

  return numerator/denominator

In [61]:
# find conditional probablity when wind is false provided he is not playing

cond_prob(golf,'Windy',False,'no')

0.4

In [62]:
# find conditional probablity when humidity is Normal provided he is not playing

cond_prob(golf,'Humidity','normal','no')

0.2

#### Likelihood

In [63]:
features = list(golf.columns)[:-1]
features

['Outlook', 'Temperature', 'Humidity', 'Windy']

In [64]:
COND_PROB = {}

In [65]:
golf['Play'].unique()

array(['no', 'yes'], dtype=object)

In [66]:
for label in golf['Play'].unique():
  COND_PROB[label] = {}

  for feature in features:
    COND_PROB[label][feature] = {}

    feature_values = golf[feature].unique()

    for fea_value in feature_values:
      prob = round(cond_prob(golf,feature,fea_value,label),2)
      COND_PROB[label][feature][fea_value] = prob
      print(label,feature,fea_value,prob)

  print()



no Outlook sunny 0.6
no Outlook overcast 0.0
no Outlook rainy 0.4
no Temperature hot 0.4
no Temperature mild 0.4
no Temperature cool 0.2
no Humidity high 0.8
no Humidity normal 0.2
no Windy False 0.4
no Windy True 0.6

yes Outlook sunny 0.22
yes Outlook overcast 0.44
yes Outlook rainy 0.33
yes Temperature hot 0.22
yes Temperature mild 0.44
yes Temperature cool 0.33
yes Humidity high 0.33
yes Humidity normal 0.67
yes Windy False 0.67
yes Windy True 0.33



In [67]:
COND_PROB

{'no': {'Outlook': {'sunny': 0.6, 'overcast': 0.0, 'rainy': 0.4},
  'Temperature': {'hot': 0.4, 'mild': 0.4, 'cool': 0.2},
  'Humidity': {'high': 0.8, 'normal': 0.2},
  'Windy': {False: 0.4, True: 0.6}},
 'yes': {'Outlook': {'sunny': 0.22, 'overcast': 0.44, 'rainy': 0.33},
  'Temperature': {'hot': 0.22, 'mild': 0.44, 'cool': 0.33},
  'Humidity': {'high': 0.33, 'normal': 0.67},
  'Windy': {False: 0.67, True: 0.33}}}

#### Prediction

In [68]:
X_test = ["sunny","hot","normal",False]

In [69]:
for label in golf['Play'].unique():

  prior = PRIOR[label]
  liklihood = 1.0

  for i in range(len(features)):
    feature = features[i]
    fea_value = X_test[i]

    liklihood *= COND_PROB[label][feature][fea_value]

  post = liklihood*prior

  print(label,post)



no 0.006857142857142858
yes 0.013967202857142858


#### Naive Bayes using Sk learn

In [70]:
from sklearn.preprocessing import LabelEncoder

In [71]:
le1 = LabelEncoder()
golf['Outlook'] = le1.fit_transform(golf['Outlook'])

In [72]:
le2 = LabelEncoder()
golf['Temperature'] = le2.fit_transform(golf['Temperature'])

In [73]:
le3 = LabelEncoder()
golf['Humidity'] = le3.fit_transform(golf['Humidity'])

In [74]:
le4 = LabelEncoder()
golf['Windy'] = le4.fit_transform(golf['Windy'])

In [75]:
le5 = LabelEncoder()
golf['Play'] = le5.fit_transform(golf['Play'])

In [76]:
golf

Unnamed: 0,Outlook,Temperature,Humidity,Windy,Play
0,2,1,0,0,0
1,2,1,0,1,0
2,0,1,0,0,1
3,1,2,0,0,1
4,1,0,1,0,1
5,1,0,1,1,0
6,0,0,1,1,1
7,2,2,0,0,0
8,2,0,1,0,1
9,1,2,1,0,1


In [77]:
X = golf.iloc[:,:-1]
y = golf.iloc[:,-1]

In [78]:
from sklearn.naive_bayes import CategoricalNB

In [79]:
model = CategoricalNB()

In [80]:
model.fit(X,y)

In [81]:
X_test = ["sunny","hot","normal",False]

In [82]:
le1.transform(['sunny'])

array([2])

In [83]:
le2.transform(['hot'])

array([1])

In [84]:
le3.transform(['normal'])

array([1])

In [85]:
le4.transform([False])

array([0])

In [86]:
X_test = np.array([[2,1,1,0]])

In [87]:
model.predict(X_test)



array([1])

In [88]:
model.predict_proba(X_test)



array([[0.33508723, 0.66491277]])