In [27]:
import numpy as np
import pandas as pd
from pandas.core.frame import DataFrame
from pandas.core.series import Series
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB,BernoulliNB,CategoricalNB,ComplementNB
from sklearn.metrics import confusion_matrix, classification_report
import warnings

#suppress warnings
warnings.filterwarnings('ignore')

print("Imports successful")

Imports successful


In [2]:
dataset=pd.read_csv("Social_Network_Ads.csv")
dataset.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0


In [3]:
dataset=pd.get_dummies(dataset,drop_first=True)
dataset.head()

Unnamed: 0,User ID,Age,EstimatedSalary,Purchased,Gender_Male
0,15624510,19,19000,0,True
1,15810944,35,20000,0,True
2,15668575,26,43000,0,False
3,15603246,27,57000,0,False
4,15804002,19,76000,0,True


In [4]:
dataset=dataset.drop("User ID",axis=1)
dataset.head()

Unnamed: 0,Age,EstimatedSalary,Purchased,Gender_Male
0,19,19000,0,True
1,35,20000,0,True
2,26,43000,0,False
3,27,57000,0,False
4,19,76000,0,True


In [5]:
dataset["Purchased"].value_counts()

Purchased
0    257
1    143
Name: count, dtype: int64

In [6]:
indep=dataset[["Age","EstimatedSalary","Gender_Male"]]
dep=dataset["Purchased"]
print(f"indep.shape: {indep.shape} ")
print(f"dep.shape: {dep.shape} ")

indep.shape: (400, 3) 
dep.shape: (400,) 


In [30]:
def evaluate_model(
  classifier : MultinomialNB|BernoulliNB|CategoricalNB|ComplementNB,
  X_train : DataFrame,
  X_test : DataFrame,
  y_train : Series,
  y_test : Series
) -> None: 
  classifier.fit(X_train, y_train)
  print("model creation successful")

  #Run predication
  y_pred = classifier.predict(X_test)

  #Evaluate model based on prediction. 
  #For regression we evaluate using rSquare method
  #For classification we evaluate using confusion metrix
  cm = confusion_matrix(y_test, y_pred)
  clf_report = classification_report(y_test, y_pred)
  print("Confusion Matix:\n", cm)
  print("Classification Report:\n",clf_report)  



In [31]:
#split into training set and test
X_train, X_test, y_train, y_test = train_test_split(indep, dep, test_size = 1/3, random_state = 0)

In [32]:
evaluate_model(
  classifier=MultinomialNB(),
  X_train=X_train,
  y_train=y_train,
  X_test=X_test,
  y_test=y_test
)

model creation successful
Confusion Matix:
 [[85  0]
 [49  0]]
Classification Report:
               precision    recall  f1-score   support

           0       0.63      1.00      0.78        85
           1       0.00      0.00      0.00        49

    accuracy                           0.63       134
   macro avg       0.32      0.50      0.39       134
weighted avg       0.40      0.63      0.49       134



In [33]:
evaluate_model(
  classifier=BernoulliNB(),
  X_train=X_train,
  y_train=y_train,
  X_test=X_test,
  y_test=y_test
)

model creation successful
Confusion Matix:
 [[85  0]
 [49  0]]
Classification Report:
               precision    recall  f1-score   support

           0       0.63      1.00      0.78        85
           1       0.00      0.00      0.00        49

    accuracy                           0.63       134
   macro avg       0.32      0.50      0.39       134
weighted avg       0.40      0.63      0.49       134



In [34]:
evaluate_model(
  classifier=CategoricalNB(),
  X_train=X_train,
  y_train=y_train,
  X_test=X_test,
  y_test=y_test
)

model creation successful
Confusion Matix:
 [[82  3]
 [18 31]]
Classification Report:
               precision    recall  f1-score   support

           0       0.82      0.96      0.89        85
           1       0.91      0.63      0.75        49

    accuracy                           0.84       134
   macro avg       0.87      0.80      0.82       134
weighted avg       0.85      0.84      0.84       134



In [35]:
evaluate_model(
  classifier=ComplementNB(),
  X_train=X_train,
  y_train=y_train,
  X_test=X_test,
  y_test=y_test
)

model creation successful
Confusion Matix:
 [[40 45]
 [20 29]]
Classification Report:
               precision    recall  f1-score   support

           0       0.67      0.47      0.55        85
           1       0.39      0.59      0.47        49

    accuracy                           0.51       134
   macro avg       0.53      0.53      0.51       134
weighted avg       0.57      0.51      0.52       134

