## Naive Bayes Classifier

Naive Bayes methods are a set of supervised learning algorithms based on applying Bayes’ theorem with the “naive” assumption of conditional independence between every pair of features given the value of the class variable

There are different types of NB like Gaussian, Multinomial, Bernoulli, Catergorical and Compliment. These models differ in way they make assumptions regarding distribution of $P(x_i|y)$

In [402]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [403]:
dataset = pd.read_csv('titanic.csv')
dataset

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


In [404]:
data = dataset.drop(['PassengerId','Name','SibSp','Parch','Ticket','Cabin','Embarked'],axis=1)
data

Unnamed: 0,Survived,Pclass,Sex,Age,Fare
0,0,3,male,22.0,7.2500
1,1,1,female,38.0,71.2833
2,1,3,female,26.0,7.9250
3,1,1,female,35.0,53.1000
4,0,3,male,35.0,8.0500
...,...,...,...,...,...
886,0,2,male,27.0,13.0000
887,1,1,female,19.0,30.0000
888,0,3,female,,23.4500
889,1,1,male,26.0,30.0000


In [405]:
dummies = pd.get_dummies(data.Sex)
data = pd.concat([data,dummies],axis=1)
data

Unnamed: 0,Survived,Pclass,Sex,Age,Fare,female,male
0,0,3,male,22.0,7.2500,0,1
1,1,1,female,38.0,71.2833,1,0
2,1,3,female,26.0,7.9250,1,0
3,1,1,female,35.0,53.1000,1,0
4,0,3,male,35.0,8.0500,0,1
...,...,...,...,...,...,...,...
886,0,2,male,27.0,13.0000,0,1
887,1,1,female,19.0,30.0000,1,0
888,0,3,female,,23.4500,1,0
889,1,1,male,26.0,30.0000,0,1


In [406]:
# from sklearn.preprocessing import LabelEncoder
# le = LabelEncoder()
# data['Sex'] = le.fit_transform(data['Sex'])
# data.head(20)

In [407]:
data.Age.fillna(np.mean(data['Age']),inplace=True)

In [408]:
X = data.drop(['Survived','Sex'],axis=1)
Y = data['Survived']
X

Unnamed: 0,Pclass,Age,Fare,female,male
0,3,22.000000,7.2500,0,1
1,1,38.000000,71.2833,1,0
2,3,26.000000,7.9250,1,0
3,1,35.000000,53.1000,1,0
4,3,35.000000,8.0500,0,1
...,...,...,...,...,...
886,2,27.000000,13.0000,0,1
887,1,19.000000,30.0000,1,0
888,3,29.699118,23.4500,1,0
889,1,26.000000,30.0000,0,1


In [409]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaler.fit(np.array(X.Age).reshape(-1,1))
X['Age'] = scaler.transform(np.array(X.Age).reshape(-1,1))

In [410]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X,Y,train_size=0.8,shuffle=True)

In [411]:
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
gnb.fit(X_train,Y_train)
gnb.score(X_test,Y_test)

0.8379888268156425

In [412]:
from sklearn.naive_bayes import MultinomialNB
mnb = MultinomialNB()
mnb.fit(X_train,Y_train)
mnb.score(X_test,Y_test)

0.6927374301675978

In [413]:
gnb.predict(X_test[:10])

array([0, 1, 0, 0, 0, 0, 0, 1, 0, 1])

In [414]:
gnb.predict_proba(X_test[:10])

array([[0.98891187, 0.01108813],
       [0.007706  , 0.992294  ],
       [0.98821129, 0.01178871],
       [0.98852941, 0.01147059],
       [0.98750646, 0.01249354],
       [0.98878926, 0.01121074],
       [0.91703696, 0.08296304],
       [0.09946998, 0.90053002],
       [0.98735663, 0.01264337],
       [0.10347473, 0.89652527]])

In [415]:
Y_test[:10]

771    0
781    1
42     0
363    0
624    0
818    0
694    0
649    1
212    0
610    0
Name: Survived, dtype: int64