In [1]:
# Import necessary packages

import pandas as pd
import numpy as np 
from collections import Counter

In [2]:
# Load dataset

df = pd.read_csv('Q2-tennis.csv')
df.head()

Unnamed: 0,Outlook,Temp.,Humidity,Windy,Play
0,sunny,hot,high,False,no
1,sunny,hot,high,True,no
2,overcast,hot,high,False,yes
3,rainy,mild,high,False,yes
4,rainy,cool,normal,False,yes


In [3]:
# Shape of the data

df.shape

(14, 5)

In [4]:
# Summary statistics of data

df.describe()

Unnamed: 0,Outlook,Temp.,Humidity,Windy,Play
count,14,14,14,14,14
unique,3,3,2,2,2
top,rainy,mild,high,false,yes
freq,5,6,7,8,9


In [5]:
# Function to calculate prior for Naive Bayes

def calculate_priors(target):
    ''' Input: target column
        Output: Dictionary containing prior likelihood with keys
                and values as probabilities of each class'''
    prior_dict = {}
    for value in target.values:
        if value not in prior_dict.keys():
            prior_dict[value] = 1
        else:
            prior_dict[value] += 1
    num = len(target)
    for key, value in prior_dict.items():
        prior_dict[key] = value / num
    return prior_dict

In [6]:
# Function to calculate likelihood with Laplace transformation

def calculate_likelihood(data, column):
    ''' Input: a) data i.e. our dataset
               b) column i.e. our feature of interest to calculate likelihood
        Output: Dictionary containing likelihoods for every feature with keys
                as likelihood class and values as likelihoods'''
    target = df.iloc[:,-1].values
    target_unique = np.unique(target)
    feature = df[column].values
    list_of_lists = []
    for i in target_unique:
        class_list = []
        for j in range(len(feature)):
            if target[j] == i:
                class_list.append(feature[j] + '/' + i)
        list_of_lists.append(class_list)
    empty_dict = {}
    for each_list in list_of_lists:
        d1 = Counter(each_list)
        num = sum(d1.values())
        for key, value in d1.items():
            d1[key] = (value + 1) / (num + len(data) + 1)            # Laplace smoothing
        for key, value in d1.items():
            empty_dict[key] = value
    return empty_dict    

In [7]:
# Function to calculate posterior likelihood 

def calculate_posterior(data, outlook, temp, humidity, windy, string_1, string_2):
    ''' Input: data i.e.our dataset
               outlook i.e. our outlook class
               temp i.e.our temperature class
               humidity i.e. our humidity class
               windy i.e. our windy class
               string_1 i.e. target class 1
               string_2 i.e. target class 2
        Output: Posterior probabilities of "yes or "no" given information'''
    columns = data.columns
    priors = calculate_priors(data.iloc[:,-1])
    prior_1 = priors[string_1]
    prior_2 = priors[string_2]
    likelihoods = []
    for col in columns:
        col_likelihood = calculate_likelihood(data, col)
        likelihoods.append(col_likelihood)
    part_1 = prior_1 * likelihoods[0][outlook + '/' + string_1] * likelihoods[1][temp + '/' + string_1] * likelihoods[2][humidity + '/' + string_1] * likelihoods[3][windy + '/' + string_1] 
    part_2 = prior_2 * likelihoods[0][outlook + '/' + string_2] * likelihoods[1][temp + '/' + string_2] * likelihoods[2][humidity + '/' + string_2] * likelihoods[3][windy + '/' + string_2]
    if (part_1 / (part_1 + part_2)) > (part_2 / (part_1 + part_2)):
        return string_1
    else:
        return string_2

In [8]:
# Example

calculate_posterior(df, 'sunny', 'mild', 'high', 'true ', 'no', 'yes')

'no'