# Naive Bayes Classifier Implementation from scratch

In [1]:
#Importing Libraries
import numpy as np
import pandas as pd

In [2]:
#Creating a DataFrame
data={'Example No.':list(range(1,11)),
     'Color':['Red','Red','Red','Yellow','Yellow','Yellow','Yellow','Yellow','Red','Red'],
     'Type' : ['Sports','Sports','Sports', 'Sports','Sports','SUV','SUV','SUV','SUV','Sports'],
      'Orgin':['Domestic','Domestic','Domestic','Domestic','Imported','Imported','Imported','Domestic','Imported','Imported'],
      'Stolen':['Yes','No','Yes','No','Yes','No','Yes','No','No','Yes']
     }
df=pd.DataFrame(data)
df

Unnamed: 0,Example No.,Color,Type,Orgin,Stolen
0,1,Red,Sports,Domestic,Yes
1,2,Red,Sports,Domestic,No
2,3,Red,Sports,Domestic,Yes
3,4,Yellow,Sports,Domestic,No
4,5,Yellow,Sports,Imported,Yes
5,6,Yellow,SUV,Imported,No
6,7,Yellow,SUV,Imported,Yes
7,8,Yellow,SUV,Domestic,No
8,9,Red,SUV,Imported,No
9,10,Red,Sports,Imported,Yes


In [3]:
#Splitting into target and features
df.drop(columns='Example No.',inplace=True)
y=df['Stolen']
x=df.drop(columns='Stolen')

In [4]:
#Glance at feature dataset
x

Unnamed: 0,Color,Type,Orgin
0,Red,Sports,Domestic
1,Red,Sports,Domestic
2,Red,Sports,Domestic
3,Yellow,Sports,Domestic
4,Yellow,Sports,Imported
5,Yellow,SUV,Imported
6,Yellow,SUV,Imported
7,Yellow,SUV,Domestic
8,Red,SUV,Imported
9,Red,Sports,Imported


In [5]:
#Glance at target dataset
y

0    Yes
1     No
2    Yes
3     No
4    Yes
5     No
6    Yes
7     No
8     No
9    Yes
Name: Stolen, dtype: object

In [6]:
#Building Naive Bayes Classifier
class NB:

    def __init__(self):
        self.dummy=dict()           #Dummy dictionary to Likelihood Table
        
    def fit(self, x, y):        #x is feature dataframe and y is target dataframe
        for i in x.columns:         #Accessing each column names of dataset
            self.dummy[i]=dict()       #Creating a key for accessed column (Column Key)
            for c in x[i].unique():        #Accessing unique values within the column
                self.dummy[i][c]=dict()        #Creating a key for accessed unique value (Value Key) within the respective Column Key
                for ch in y.unique():              #Accessing each unique values within the target dataframe 
                    self.dummy[i][c][ch]=x[(x[i]==c) & (y==ch)].shape[0]/y[y==ch].shape[0]    #Calculating likelihood value for the respective feature and target combination
        self.dummy['target']=dict()     #Creating a key within dummy dictionary to contain target labels
        for i in y.unique():            #Accessing each unige value within target dataframe 
            self.dummy['target'][i]=y[y==i].shape[0]/y.shape[0] #Calculating and storing probabilities of respective target label
        
    def predict(self, x):     #Method to make prection, x-features of test dataframe
        y=pd.DataFrame({'Predicted':[]}) #Empty dataframe to contain predicted values (Predicted dataframe)
        for i, r in x.iterrows():        #Accessing each rows of test dataframe
            Score=dict()           #Empty dictionary to store Score fore each target labels
            for key in self.dummy['target'].keys():   #Accessing each column in the accessed row
                Score[key]=self.dummy['target'][key]   #Calculating Score for each label for the accessed row
                for a in x.columns:   #
                    Score[key]=Score[key]*self.dummy[a][r[a]][key]   
            y=y.append({'Predicted':max(Score, key= lambda x: Score[x])},ignore_index=True)  #Predicting and adding the predicted label to Predicted dataframe (y)
        return(y)

In [7]:
#Initialising NB model
classifier=NB()

In [8]:
#Training
classifier.fit(x,y)

In [9]:
#Prediction
classifier.predict(x)

Unnamed: 0,Predicted
0,Yes
1,Yes
2,Yes
3,No
4,Yes
5,No
6,No
7,No
8,No
9,Yes


In [10]:
#Actual target 
pd.DataFrame(y)

Unnamed: 0,Stolen
0,Yes
1,No
2,Yes
3,No
4,Yes
5,No
6,Yes
7,No
8,No
9,Yes


## Using a different Dataset

In [11]:
#Creating a DataFrame
data={'Day':list(range(1,15)),
     'Outlook':['Rainy','Sunny','Rainy','Rainy','Rainy','Rainy','Rainy','Sunny','Sunny','Rainy','Sunny','Sunny','Sunny','Rainy'],
     'Temp' : ['Mid','Hot','Hot', 'Cold','Hot','Mid','Mid','Hot','Cold','Cold','Hot','Cold','Cold','Cold'],
      'Humidity':['Low','Low','Low','High','Low','Low','High','High','High','Low','Low','High','Low','High'],
      'Wind':['Weak','Normal','Normal','Weak','Weak','Normal','Normal','Weak','Normal','Normal','Weak','Normal','Weak','Weak'],
      'Play':['Yes','Yes','No','Yes','No','Yes','No','No','No','Yes','No','No','Yes','No']
     }
df=pd.DataFrame(data)
df

Unnamed: 0,Day,Outlook,Temp,Humidity,Wind,Play
0,1,Rainy,Mid,Low,Weak,Yes
1,2,Sunny,Hot,Low,Normal,Yes
2,3,Rainy,Hot,Low,Normal,No
3,4,Rainy,Cold,High,Weak,Yes
4,5,Rainy,Hot,Low,Weak,No
5,6,Rainy,Mid,Low,Normal,Yes
6,7,Rainy,Mid,High,Normal,No
7,8,Sunny,Hot,High,Weak,No
8,9,Sunny,Cold,High,Normal,No
9,10,Rainy,Cold,Low,Normal,Yes


In [12]:
#Splitting into target and features
df.drop(columns='Day',inplace=True)
y=df['Play']
x=df.drop(columns='Play')

In [13]:
x

Unnamed: 0,Outlook,Temp,Humidity,Wind
0,Rainy,Mid,Low,Weak
1,Sunny,Hot,Low,Normal
2,Rainy,Hot,Low,Normal
3,Rainy,Cold,High,Weak
4,Rainy,Hot,Low,Weak
5,Rainy,Mid,Low,Normal
6,Rainy,Mid,High,Normal
7,Sunny,Hot,High,Weak
8,Sunny,Cold,High,Normal
9,Rainy,Cold,Low,Normal


In [14]:
y

0     Yes
1     Yes
2      No
3     Yes
4      No
5     Yes
6      No
7      No
8      No
9     Yes
10     No
11     No
12    Yes
13     No
Name: Play, dtype: object

In [15]:
#Initialising NB model
classifier=NB()

In [16]:
#Training
classifier.fit(x,y)

In [17]:
#Prediction
classifier.predict(x)

Unnamed: 0,Predicted
0,Yes
1,No
2,No
3,No
4,No
5,Yes
6,No
7,No
8,No
9,Yes


In [18]:
#Actual target 
pd.DataFrame(y)

Unnamed: 0,Play
0,Yes
1,Yes
2,No
3,Yes
4,No
5,Yes
6,No
7,No
8,No
9,Yes


Thank You!!