In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, f1_score, confusion_matrix

class NaiveBayesClassifier:
    def __init__(self, X, y):
        self.X, self.y = X, y
        self.N = len(self.X)
        self.dim = len(self.X[0])
        self.attrs = [[] for _ in range(self.dim)]
        self.output_dom = {}
        self.data = []

        for i in range(len(self.X)):
            for j in range(self.dim):
                if not self.X[i][j] in self.attrs[j]:
                    self.attrs[j].append(self.X[i][j])

            if not self.y[i] in self.output_dom.keys():
                self.output_dom[self.y[i]] = 1
            else:
                self.output_dom[self.y[i]] += 1
            self.data.append([self.X[i], self.y[i]])

    def classify(self, entry):
        solve = None
        max_arg = -1
        for y in self.output_dom.keys():
            prob = self.output_dom[y] / self.N
            for i in range(self.dim):
                cases = [x for x in self.data if x[0][i] == entry[i] and x[1] == y]
                n = len(cases)
                prob *= n / self.N

            if prob > max_arg:
                max_arg = prob
                solve = y
        return solve

data = pd.read_csv("Social_Network_Ads.csv")
X = data.iloc[:,2:4].values
y = data.iloc[:,-1].values
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.25, random_state=42)
nbc = NaiveBayesClassifier(X_train, y_train)
y_pred = [nbc.classify(entry) for entry in X_test]

print(f"Accuracy score {accuracy_score(y_test,y_pred)}")
print(f"Precision score: {precision_score(y_test, y_pred)}")
print(f"F1 score: {f1_score(y_test, y_pred)}")
print(f"Confusion Matrix: {confusion_matrix(y_test, y_pred)}")


Accuracy score 0.8
Precision score: 0.7741935483870968
F1 score: 0.7058823529411764
Confusion Matrix: [[56  7]
 [13 24]]
