In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
df = pd.read_csv('Social_Network_Ads.csv')

In [4]:
df.sample(5)

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
97,15582492,Male,28,123000,1
294,15566689,Female,35,57000,0
347,15768151,Female,54,108000,1
177,15594577,Male,25,22000,0
111,15660200,Female,37,71000,0


In [5]:
X = df.iloc[:,2:4]

In [6]:
y = df.iloc[:,-1]

In [7]:
X

Unnamed: 0,Age,EstimatedSalary
0,19,19000
1,35,20000
2,26,43000
3,27,57000
4,19,76000
...,...,...
395,46,41000
396,51,23000
397,50,20000
398,36,33000


In [8]:
y

0      0
1      0
2      0
3      0
4      0
      ..
395    1
396    1
397    1
398    0
399    1
Name: Purchased, Length: 400, dtype: int64

## Spliting Data

In [9]:
from sklearn.model_selection import train_test_split

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X,y, train_size = 0.80, test_size = 0.20, random_state = 40)

In [11]:
X_train.shape

(320, 2)

In [12]:
X_test.shape

(80, 2)

In [13]:
y_train.shape

(320,)

In [14]:
y_test.shape

(80,)

# Scalling

In [15]:
sna = ['Age', 'EstimatedSalary']

In [16]:
from sklearn.preprocessing import MinMaxScaler

In [17]:
scalar = MinMaxScaler()

In [18]:
X_train[sna] = scalar.fit_transform(X_train[sna])  ## scaling X_train

In [19]:
X_test[sna] = scalar.transform(X_test[sna]) ## scaling test

In [20]:
y_train = scalar.fit_transform(y_train.values.reshape(-1, 1))

In [21]:
y_test = scalar.transform(y_test.values.reshape(-1, 1))

In [22]:
X_train

Unnamed: 0,Age,EstimatedSalary
85,0.309524,0.762963
69,0.309524,0.392593
203,0.547619,0.414815
379,0.952381,0.059259
261,0.428571,0.955556
...,...,...
306,0.785714,0.881481
165,0.000000,0.525926
7,0.333333,1.000000
219,0.976190,0.948148


In [23]:
y_train

array([[1.],
       [0.],
       [0.],
       [1.],
       [1.],
       [1.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [1.],
       [0.],
       [1.],
       [1.],
       [1.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [1.],
       [1.],
       [1.],
       [1.],
       [0.],
       [1.],
       [1.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [1.],
       [1.],
       [0.],
       [1.],
       [1.],
       [0.],
       [0.],
       [1.],
       [0.],
       [0.],
       [1.],
       [0.],
       [1.],
       [0.],
       [1.],
       [1.],
       [1.],
       [0.],
       [1.],
       [0.],
       [1.],
       [0.],
       [1.],
       [0.],
       [0.],
       [1.],
       [0.],
       [1.],
       [0.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [0.],
       [1.],
       [1.],
       [1.],
       [0.],
       [0.],

# Gaussian

### Filling the model

In [24]:
from sklearn.naive_bayes import GaussianNB

In [25]:
gaussian = GaussianNB()

### Train the model

In [26]:
gaussian.fit(X_train, y_train)

GaussianNB()

### Model evaluate

In [27]:
y_pred = gaussian.predict(X_test)

In [28]:
y_pred

array([0., 1., 0., 0., 0., 0., 1., 0., 1., 1., 0., 0., 0., 1., 0., 1., 0.,
       0., 0., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 1., 1., 1., 0., 0.,
       1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.,
       1., 0., 0., 0., 0., 0., 0., 1., 0., 0., 1., 0., 1., 0., 0., 0., 0.,
       1., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 0.])

In [29]:
y_test

array([[0.],
       [1.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [1.],
       [1.],
       [0.],
       [1.],
       [0.],
       [1.],
       [0.],
       [1.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [0.],
       [1.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [0.],
       [1.],
       [1.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [1.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [1.],
       [1.],
       [0.],
       [1.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [0.],
       [1.],
       [0.],
       [1.],
       [0.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [0.],
       [1.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],

### Confusion Matrix

In [30]:
from sklearn.metrics import confusion_matrix

In [31]:
cm = confusion_matrix(y_test, y_pred)

In [32]:
cm

array([[54,  2],
       [ 5, 19]], dtype=int64)

In [33]:
TN = cm[0][0]
TP = cm[1][1]
FN = cm[1][0]
FP = cm[0][1]

In [34]:
accuracy = (TN+TP)/(TN+TP+FN+FP)

In [35]:
accuracy*100

91.25

# Multinomial

### Filling the model

In [36]:
from sklearn.naive_bayes import MultinomialNB

In [37]:
mnb = MultinomialNB()

### Train the model

In [38]:
mnb.fit(X_train, y_train)

MultinomialNB()

### Model evaluate

In [39]:
y_pred = mnb.predict(X_test)

In [40]:
y_pred

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [41]:
y_test

array([[0.],
       [1.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [1.],
       [1.],
       [0.],
       [1.],
       [0.],
       [1.],
       [0.],
       [1.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [0.],
       [1.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [0.],
       [1.],
       [1.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [1.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [1.],
       [1.],
       [0.],
       [1.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [0.],
       [1.],
       [0.],
       [1.],
       [0.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [0.],
       [1.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],

### Confusion Matrix

In [42]:
cm = confusion_matrix(y_test, y_pred)

In [43]:
cm

array([[56,  0],
       [24,  0]], dtype=int64)

In [44]:
TN = cm[0][0]
TP = cm[1][1]
FN = cm[1][0]
FP = cm[0][1]

In [45]:
accuracy = (TN+TP)/(TN+TP+FN+FP)

In [46]:
accuracy*100

70.0

# Bernoulli 

### Filling the model

In [47]:
from sklearn.naive_bayes import BernoulliNB

In [48]:
bnb = BernoulliNB()

### Train the model

In [49]:
bnb.fit(X_train, y_train)

BernoulliNB()

### Model evaluate

In [50]:
y_pred = bnb.predict(X_test)

In [51]:
y_pred

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [52]:
y_test

array([[0.],
       [1.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [1.],
       [1.],
       [0.],
       [1.],
       [0.],
       [1.],
       [0.],
       [1.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [0.],
       [1.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [0.],
       [1.],
       [1.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [1.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [1.],
       [1.],
       [0.],
       [1.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [0.],
       [1.],
       [0.],
       [1.],
       [0.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [0.],
       [1.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],

### Confusion Matrix

In [53]:
cm = confusion_matrix(y_test, y_pred)

In [54]:
cm

array([[56,  0],
       [24,  0]], dtype=int64)

In [55]:
TN = cm[0][0]
TP = cm[1][1]
FN = cm[1][0]
FP = cm[0][1]

In [56]:
accuracy = (TN+TP)/(TN+TP+FN+FP)

In [57]:
accuracy*100

70.0

## Conclusion

- Gaussian Naive Bayes algorithm accuracy is 91.25%
- Multinomal Naive Bayes algorithm accuracy is 70%
- Bernoulli Naive Bayes algorithm accuracy is 70% 
- We choosing Gaussian algorithm for this dataset because it has more accuracy.