In [1]:
import pandas as pd

In [34]:
class NaiveBayes:
    
    def fit(self, data, target):
        
        self.features = data.drop(target, axis=1)
        self.target = target
        self.target_labels = data[target].unique()
        
        self.class_counts = data[target].value_counts()
        self.class_probs = {}
        
        for label in self.target_labels:
            
            self.class_probs[label] = round(self.class_counts[label]/data.shape[0],2)
        
        self.freq_mats = {}
        
        for feature in self.features:
            
            values = data[feature].unique()
            freq_mat = pd.DataFrame(columns = self.target_labels, index = values)
            
            for value in values:
                
                filtered_data = data[data[feature]==value]
                class_counts = filtered_data[self.target].value_counts()
                probs = {}
                
                for label in self.target_labels:
                    
                    if label in filtered_data[self.target].unique():
                        probs[label] = round(class_counts[label]/self.class_counts[label],2)
                    else:
                        probs[label] = 0                        
                
                freq_mat.loc[value] = probs
            
            print(freq_mat,'\n')
            
            self.freq_mats[feature] = freq_mat  
        
    
    def predict(self, data):
        
        for i in range(data.shape[0]):
            
            class_probs = {}
            
            for feature in self.features:
                
                for label in self.target_labels:
                    
                    if label in class_probs.keys():
                        class_probs[label] *= self.freq_mats[feature].loc[data.iloc[i][feature]][label]
                    else:
                        class_probs[label] = self.freq_mats[feature].loc[data.iloc[i][feature]][label]
            
            for label in self.target_labels:
                
                class_probs[label] *= self.class_probs[label]
            
        
            print(class_probs)
            max_prob = -1000
            belongs_to = ''
            
            for label in self.target_labels:
                
                if class_probs[label] > max_prob:
                    max_prob = class_probs[label]
                    belongs_to = label
                    
            print(f"Sample belongs to : {belongs_to} class\n")
            
                
            

In [35]:
data = pd.read_csv("./Datasets/data 2.csv")

In [36]:
data.head()

Unnamed: 0,Outlook,Temperature,Humidity,Wind,Play Tennis
0,Sunny,Hot,High,Weak,No
1,Sunny,Hot,High,Strong,No
2,Overcast,Hot,High,Weak,Yes
3,Rain,Mild,High,Weak,Yes
4,Rain,Cool,Normal,Weak,Yes


In [37]:
model = NaiveBayes()

In [38]:
model.fit(data,"Play Tennis")

           No   Yes
Sunny     0.6  0.22
Overcast    0  0.44
Rain      0.4  0.33 

       No   Yes
Hot   0.4  0.22
Mild  0.4  0.44
Cool  0.2  0.33 

         No   Yes
High    0.8  0.33
Normal  0.2  0.67 

         No   Yes
Weak    0.4  0.67
Strong  0.6  0.33 



In [39]:
test = data[1:3]

In [40]:
test

Unnamed: 0,Outlook,Temperature,Humidity,Wind,Play Tennis
1,Sunny,Hot,High,Strong,No
2,Overcast,Hot,High,Weak,Yes


In [41]:
model.predict(test)

{'No': 0.041471999999999995, 'Yes': 0.0033732864}
Sample belongs to : No class

{'No': 0.0, 'Yes': 0.013697587200000002}
Sample belongs to : Yes class

