In [126]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,precision_score,f1_score,recall_score
def read_data(file_path):
    return pd.read_csv(file_path)

In [127]:
class NaiveBayes():
    def __init__(self):
        self.class_prob={}
        self.features_prob={}
    def fit(self,X_train,Y_train):
        classes,counts=np.unique(Y_train,return_counts=True)
        total_samples=len(Y_train)
        for c,count in zip(classes,counts):
            self.class_prob[c]=count/total_samples
        self.features_prob={}
        for c in classes:
            self.features_prob[c]={}
            for feature in X_train.columns:
                unique_values=X_train[feature].unique()
                self.features_prob[c][feature]={}
                for value in unique_values:
                    count = np.sum((X_train[feature] == value) & (Y_train == c))
                    self.features_prob[c][feature][value] = count / counts[c]
                   
                   
    def predict(self,X_test):
        predictions=[]
        for _,row in X_test.iterrows():
            max_prob=-1
            predicted_class=None
            for c in self.class_prob:
                prob = self.class_prob[c]
                for feature, value in row.items():
                    if value in self.features_prob[c][feature]:
                        prob *= self.features_prob[c][feature][value]
                    else:
                        prob *= 0
                if prob > max_prob:
                    max_prob = prob
                    predicted_class = c
                predictions.append(predicted_class)
        return predictions


In [128]:
data = read_data("Social_Network_Ads.csv")
data['Gender'] = data['Gender'].apply(lambda x: 1 if x == "Male" else 0)
X = data.iloc[:,1:4]
y = data['Purchased']
X_train,X_test,Y_train,Y_test=train_test_split(X,y,test_size=0.2,random_state=0)

In [129]:
model=NaiveBayes()
model.fit(X_train,Y_train)
Y_pred=model.predict(X_test)

In [130]:
y_pred = []
for i in range(len(Y_pred)):
    if i % 2 != 0:
        y_pred.append(Y_pred[i])
Y_test = Y_test.tolist()

In [131]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

accuracy = accuracy_score(y_pred, Y_test)
precision = precision_score(y_pred, Y_test)
recall = recall_score(y_pred, Y_test)
f1 = f1_score(y_pred, Y_test)

print("Validation Set Metrics:")
print("Accuracy: {:.2f}".format(accuracy))
print("Precision: {:.2f}".format(precision))
print("Recall: {:.2f}".format(recall))
print("F1 Score: {:.2f}".format(f1))

Validation Set Metrics:
Accuracy: 0.85
Precision: 0.73
Recall: 0.73
F1 Score: 0.73


In [132]:
confusion = confusion_matrix(y_pred,Y_test)
print(confusion)
print("Class 0 predicted and true : ")
print(confusion[0][0])
print("Class 0 predicted and false : ")
print(confusion[0][1])
print("Class 1 predicted and false : ")
print(confusion[1][0])
print("Class 1 predicted and true : ")
print(confusion[1][1])

[[52  6]
 [ 6 16]]
Class 0 predicted and true : 
52
Class 0 predicted and false : 
6
Class 1 predicted and false : 
6
Class 1 predicted and true : 
16


In [133]:
valid = data.sample(n=20)
X_valid = valid.iloc[:,1:4]
y_valid = valid['Purchased']

In [134]:
y_val = model.predict(X_valid)
y_valpred = []
for i in range(len(y_val)):
    if i % 2 != 0:
        y_valpred.append(y_val[i])
y_valid = y_valid.tolist()

In [135]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

accuracy = accuracy_score(y_valpred,y_valid)
precision = precision_score(y_valpred,y_valid)
recall = recall_score(y_valpred,y_valid)
f1 = f1_score(y_valpred,y_valid)

print("Validation Set Metrics:")
print("Accuracy: {:.2f}".format(accuracy))
print("Precision: {:.2f}".format(precision))
print("Recall: {:.2f}".format(recall))
print("F1 Score: {:.2f}".format(f1))

Validation Set Metrics:
Accuracy: 0.90
Precision: 0.88
Recall: 0.88
F1 Score: 0.88


In [136]:
confusion = confusion_matrix(y_valpred,y_valid)
print(confusion)
print("Class 0 predicted and true : ")
print(confusion[0][0])
print("Class 0 predicted and false : ")
print(confusion[0][1])
print("Class 1 predicted and false : ")
print(confusion[1][0])
print("Class 1 predicted and true : ")
print(confusion[1][1])

[[11  1]
 [ 1  7]]
Class 0 predicted and true : 
11
Class 0 predicted and false : 
1
Class 1 predicted and false : 
1
Class 1 predicted and true : 
7
