## Importing libraries

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
dataset = pd.read_csv("Social_Network_Ads.csv")
dataset.head()

Unnamed: 0,Age,EstimatedSalary,Purchased
0,19,19000,0
1,35,20000,0
2,26,43000,0
3,27,57000,0
4,19,76000,0


In [3]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 3 columns):
 #   Column           Non-Null Count  Dtype
---  ------           --------------  -----
 0   Age              400 non-null    int64
 1   EstimatedSalary  400 non-null    int64
 2   Purchased        400 non-null    int64
dtypes: int64(3)
memory usage: 9.5 KB


In [4]:
dataset.shape

(400, 3)

In [5]:
dataset.isna().sum()

Age                0
EstimatedSalary    0
Purchased          0
dtype: int64

In [6]:
x = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [7]:
x, y

(array([[    19,  19000],
        [    35,  20000],
        [    26,  43000],
        [    27,  57000],
        [    19,  76000],
        [    27,  58000],
        [    27,  84000],
        [    32, 150000],
        [    25,  33000],
        [    35,  65000],
        [    26,  80000],
        [    26,  52000],
        [    20,  86000],
        [    32,  18000],
        [    18,  82000],
        [    29,  80000],
        [    47,  25000],
        [    45,  26000],
        [    46,  28000],
        [    48,  29000],
        [    45,  22000],
        [    47,  49000],
        [    48,  41000],
        [    45,  22000],
        [    46,  23000],
        [    47,  20000],
        [    49,  28000],
        [    47,  30000],
        [    29,  43000],
        [    31,  18000],
        [    31,  74000],
        [    27, 137000],
        [    21,  16000],
        [    28,  44000],
        [    27,  90000],
        [    35,  27000],
        [    33,  28000],
        [    30,  49000],
        [   

In [8]:
x_train, x_test, y_train, y_test = train_test_split(x,
                                                    y,
                                                    test_size = 0.2,
                                                    random_state = 100)

In [9]:
x_train.shape, x_test.shape

((320, 2), (80, 2))

## Apply z-score standardization to train set

In [10]:
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

In [11]:
x_train, x_test

(array([[-1.27832135,  0.38895733],
        [-1.18265651,  0.24543064],
        [-1.27832135, -1.10372024],
        [ 0.44364567, -0.50090815],
        [-0.0346785 ,  0.04449327],
        [-0.321673  , -0.78796153],
        [-1.56531585, -0.07032808],
        [-1.75664552, -1.27595227],
        [-1.66098068, -0.98889889],
        [ 0.92196984, -1.16113092],
        [ 0.06098633, -0.32867612],
        [ 0.15665117,  0.18801996],
        [ 1.20896434, -0.98889889],
        [-0.13034334, -0.47220281],
        [ 0.92196984,  0.99176942],
        [ 0.44364567, -0.18514943],
        [ 0.15665117,  0.73342138],
        [ 0.15665117,  0.01578794],
        [-1.46965102,  0.30284131],
        [-0.99132684,  0.38895733],
        [-0.22600817, -0.93148821],
        [ 0.44364567,  0.27413598],
        [-0.22600817, -0.35738146],
        [-0.22600817,  0.04449327],
        [ 1.11329951, -0.90278288],
        [-0.99132684, -0.38608679],
        [ 0.73064017, -1.39077362],
        [ 1.01763467, -1.189

## Run Gaussian Naive Bayes Classifier (Continious Variables)

In [12]:
from sklearn.naive_bayes import GaussianNB

In [13]:
classifier = GaussianNB()
classifier.fit(x_train, y_train)

GaussianNB()

In [14]:
x_train, y_train

(array([[-1.27832135,  0.38895733],
        [-1.18265651,  0.24543064],
        [-1.27832135, -1.10372024],
        [ 0.44364567, -0.50090815],
        [-0.0346785 ,  0.04449327],
        [-0.321673  , -0.78796153],
        [-1.56531585, -0.07032808],
        [-1.75664552, -1.27595227],
        [-1.66098068, -0.98889889],
        [ 0.92196984, -1.16113092],
        [ 0.06098633, -0.32867612],
        [ 0.15665117,  0.18801996],
        [ 1.20896434, -0.98889889],
        [-0.13034334, -0.47220281],
        [ 0.92196984,  0.99176942],
        [ 0.44364567, -0.18514943],
        [ 0.15665117,  0.73342138],
        [ 0.15665117,  0.01578794],
        [-1.46965102,  0.30284131],
        [-0.99132684,  0.38895733],
        [-0.22600817, -0.93148821],
        [ 0.44364567,  0.27413598],
        [-0.22600817, -0.35738146],
        [-0.22600817,  0.04449327],
        [ 1.11329951, -0.90278288],
        [-0.99132684, -0.38608679],
        [ 0.73064017, -1.39077362],
        [ 1.01763467, -1.189

In [15]:
sample_data = [[30, 8700]]
scaled_data = sc.transform(sample_data)
prediction = classifier.predict(scaled_data)
prediction

array([0])

In [18]:
y_prediction = classifier.predict(x_test)
y_pred_column = y_prediction.reshape(len(y_prediction), 1)
y_test_column = y_test.reshape(len(y_test), 1)
combined = np.concatenate((y_pred_column, y_test_column), axis=1)
df = pd.DataFrame(combined, columns = ['Predicted', "Actual"])

In [20]:
df.head(10)

Unnamed: 0,Predicted,Actual
0,0,0
1,0,1
2,0,0
3,0,1
4,0,0
5,1,1
6,0,0
7,0,1
8,0,0
9,0,0


In [22]:
from sklearn.metrics import confusion_matrix, accuracy_score, recall_score, precision_score, f1_score
cm = confusion_matrix(y_test, y_prediction)
cm

array([[46,  3],
       [ 9, 22]])

In [23]:
cm_df = pd.DataFrame(cm, 
                     index = ['Actual Negative', "Actual Positive"], 
                     columns = ["Predicted Negative", "Predicted Negative"])

In [24]:
cm_df

Unnamed: 0,Predicted Negative,Predicted Negative.1
Actual Negative,46,3
Actual Positive,9,22


## ---

In [25]:
accuracy = accuracy_score(y_test, y_prediction)
recall = recall_score(y_test, y_prediction)
precision = precision_score(y_test, y_prediction)
f1 = f1_score(y_test, y_prediction)
tn_rate = cm[0,0] / (cm[0,0] + cm[0,1])

In [27]:
print(f"Accuracy: {accuracy}")
print(f"Recall: {recall}")
print(f"Precision: {precision}")
print(f"F1 Score: {f1}")
print(f"Tn Rate: {tn_rate}")

Accuracy: 0.85
Recall: 0.7096774193548387
Precision: 0.88
F1 Score: 0.7857142857142856
Tn Rate: 0.9387755102040817
