 # Naive Bayes Classifier using Laplace Smoothing

## Loading the csv file into a DataFrame :

In [1]:
import pandas as pd
import numpy as np
df = pd.read_csv('Q2-tennis.csv')
df

Unnamed: 0,Outlook,Temp.,Humidity,Windy,Play
0,sunny,hot,high,False,no
1,sunny,hot,high,True,no
2,overcast,hot,high,False,yes
3,rainy,mild,high,False,yes
4,rainy,cool,normal,False,yes
5,rainy,cool,normal,True,no
6,overcast,cool,normal,True,yes
7,sunny,mild,high,False,no
8,sunny,cool,normal,False,yes
9,rainy,mild,normal,False,yes


## Forming a DataFrame containing Row (intersection) Column values from 'df'

In [6]:
df.rename(columns={'Temp.': 'Temp'}, inplace=True)
values = {}

play = df.Play.unique()

#Forming a Dictionary containing 'keys' as unique values in categories, 'values' as number of unique values in the key's category
features = [df.Outlook.unique(),df.Temp.unique(),df.Humidity.unique(),df.Windy.unique()]
for i in features:
    for j in i:
        values[j] = len(i)
        
# Forming a DataFrame containing Row<intersection>Column values from 'df'
df2 = pd.DataFrame(index = values, columns = ['yes','no'])
for i in df.Outlook.unique():
    for j in play:
        df2.at[i,j] = df[(df.Outlook==i) & (df.Play==j)].Play.count()
for i in df.Temp.unique():
    for j in play:
        df2.at[i,j] = df[(df.Temp==i) & (df.Play==j)].Play.count()
for i in df.Humidity.unique():
    for j in play:
        df2.at[i,j] = df[(df.Humidity==i) & (df.Play==j)].Play.count()
for i in df.Windy.unique():
    for j in play:
        df2.at[i,j] = df[(df.Windy==i) & (df.Play==j)].Play.count()
df2

Unnamed: 0,yes,no
mild,4,2
normal,6,1
rainy,3,2
high,3,4
sunny,2,3
overcast,4,0
cool,3,1
false,6,2
true,3,3
hot,2,2


## Forming the Probability table:

In [7]:

p_yes = df[df.Play=='yes'].Play.count() # Probability of 'yes' in Play category
p_no = df[df.Play=='no'].Play.count()   # Probability of 'no' in Play category

# Forming the Probability DataFrame (Using Laplace Smoothing)
prob = pd.DataFrame(index = values, columns = ['yes','no'])

for column,row in prob.iterrows():
    prob.at[column,'no'] = (df2.at[column,'no']+1)/(p_no+values[column])    # Laplace Smoothing Formula
    prob.at[column,'yes'] = (df2.at[column,'yes']+1)/(p_yes+values[column])
prob

Unnamed: 0,yes,no
mild,0.416667,0.375
normal,0.636364,0.285714
rainy,0.333333,0.375
high,0.363636,0.714286
sunny,0.25,0.5
overcast,0.416667,0.125
cool,0.333333,0.25
false,0.636364,0.428571
true,0.363636,0.571429
hot,0.25,0.375


## Testing the results based on User Input:

In [8]:
#Testing on random sample inputs

print("Choose the scenerio:\n")
outlook_input = input("OUTLOOK (sunny,overcast,rainy):")
temp_input = input("TEMPERATURE (hot,mild,cool):")
humidity_input = input("HUMIDITY (normal,high):")
windy_input = input("WINDY (true,false):")

# Calculating the probabilities based on user input
prob_yes = prob.at[outlook_input,'yes']*prob.at[temp_input,'yes']*prob.at[humidity_input,'yes']*prob.at[windy_input+' ','yes']
prob_no = prob.at[outlook_input,'no']*prob.at[temp_input,'no']*prob.at[humidity_input,'no']*prob.at[windy_input+' ','no']
print("\n-------------------------------------------\n")

# Comparing the calculated probabilities
if prob_yes > prob_no:
    print("Yes")           # Printing the final Decision
else:
    print("No")

Choose the scenerio:

OUTLOOK (sunny,overcast,rainy):overcast
TEMPERATURE (hot,mild,cool):cool
HUMIDITY (normal,high):normal
WINDY (true,false):true

-------------------------------------------

Yes
