In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('/Users/kirtan/Downloads/play_tennis.csv')

In [3]:
df.head()

Unnamed: 0,day,outlook,temp,humidity,wind,play
0,D1,Sunny,Hot,High,Weak,No
1,D2,Sunny,Hot,High,Strong,No
2,D3,Overcast,Hot,High,Weak,Yes
3,D4,Rain,Mild,High,Weak,Yes
4,D5,Rain,Cool,Normal,Weak,Yes


In [4]:
df.shape

(14, 6)

In [5]:
df.isnull().sum()

day         0
outlook     0
temp        0
humidity    0
wind        0
play        0
dtype: int64

In [6]:
df = df.drop(columns=['day'])

In [7]:
df.head()

Unnamed: 0,outlook,temp,humidity,wind,play
0,Sunny,Hot,High,Weak,No
1,Sunny,Hot,High,Strong,No
2,Overcast,Hot,High,Weak,Yes
3,Rain,Mild,High,Weak,Yes
4,Rain,Cool,Normal,Weak,Yes


## Initial Probabilities

In [8]:
total, _ = df.shape

In [9]:
total_Yes, total_No = df['play'].value_counts()

In [10]:
total_Yes

9

In [11]:
prob_No = total_No / total
prob_Yes = total_Yes / total

In [12]:
prob_Yes

0.6428571428571429

In [13]:
prob_No

0.35714285714285715

In [69]:
class Naive_Bayes:
    def __init__(self, df):
        self.df = df
        self.total_yes = df[df['play'] == 'Yes'].shape[0]
        self.total_no = df[df['play'] == 'No'].shape[0]
        self.total = df.shape[0]
        self.prob_No = self.total_no / self.total
        self.prob_Yes = self.total_yes / self.total
    
    def get_total(self, col):
        variable_counts = {}
        unique_vals = self.df[col].unique()

        for val in unique_vals:
            count = self.df[self.df[col] == val].shape[0]
            variable_counts[val] = count

        variable_counts_df = pd.DataFrame(list(variable_counts.items()), columns=[col, 'count'])
        return variable_counts_df

    def calculate_conditional_prob(self, col):
        x = self.get_total(col)
        condi_prob_dict = {}

        n, _ = x.shape

        for i in range(n):
            col_yes = self.df[(self.df[col] == x[col][i]) & (self.df['play'] == 'Yes')].shape[0]
            col_no = self.df[(self.df[col] == x[col][i]) & (self.df['play'] == 'No')].shape[0]

            yes_prob = col_yes / self.total_yes if self.total_yes > 0 else 0
            no_prob = col_no / self.total_no if self.total_no > 0 else 0

            condi_prob_dict[x[col][i]] = (yes_prob, no_prob)

        condi_prob_df = pd.DataFrame.from_dict(condi_prob_dict, orient='index', columns=['yes_prob', 'no_prob'])
        condi_prob_df.reset_index(inplace=True)
        condi_prob_df.rename(columns={'index': col}, inplace=True)

        return condi_prob_df
    
    def get_conditional_prob(self):
        ans = []
        for col in self.df.columns:
            if col != 'play':  
                col_probs = self.calculate_conditional_prob(col)
                ans.append(col_probs)
        return ans
    
    def predict(self, param):
        probs = self.get_conditional_prob()

        prob_yes = self.prob_Yes
        prob_no = self.prob_No

        for i, col in enumerate(self.df.columns):
            if col != 'play':
                col_probs = probs[i]  
                feature_value = param[col]  


                feature_probs = col_probs[col_probs[col] == feature_value]

                if not feature_probs.empty:
                    prob_yes *= feature_probs['yes_prob'].values[0]
                    prob_no *= feature_probs['no_prob'].values[0]

        if prob_yes > prob_no:
            return 'Yes'
        else:
            return 'No' 
        

In [70]:
NV = Naive_Bayes(df)

## Predict play for the following
outlook = sunny, temp = cool, humidity = high, wind = strong

In [77]:
param = {'outlook': 'Sunny', 'temp': 'Cool', 'humidity': 'High', 'wind': 'Strong'}

In [78]:
NV.predict(param)

'No'