In [11]:
import joblib
import numpy as np
from sklearn import metrics

In [16]:
(X_train, y_train) = joblib.load('../data/training_crime_data.joblib')
(X_test, y_test) = joblib.load('../data/training_crime_data2.joblib')

In [15]:
class GaussianNaiveBayes:
    def fit(self, X, y):
        self.classes = np.unique(y)
        self.mean = {}
        self.var = {}
        self.prior = {}

        for cls in self.classes:
            X_cls = X[y == cls]
            self.mean[cls] = np.mean(X_cls, axis=0)
            self.var[cls] = np.var(X_cls, axis=0)
            self.prior[cls] = len(X_cls) / len(X)

    def _pdf(self, x, mean, var):
        exponent = np.exp(-(x - mean)**2 / (2 * var))
        return (1 / np.sqrt(2 * np.pi * var)) * exponent

    def predict_proba(self, X):
        likelihood = np.zeros((X.shape[0], len(self.classes)))

        for i, cls in enumerate(self.classes):
            prior = np.log(self.prior[cls])
            pdf = np.sum(np.log(self._pdf(X, self.mean[cls], self.var[cls])), axis=1)
            likelihood[:, i] = prior + pdf

        # Normalize to get probabilities
        exp_likelihood = np.exp(likelihood)
        probabilities = exp_likelihood / np.sum(exp_likelihood, axis=1, keepdims=True)

        return probabilities

    def predict(self, X):
        probabilities = self.predict_proba(X)
        return np.argmax(probabilities, axis=1)

In [17]:
model = GaussianNaiveBayes()
model.fit(X_train, y_train)

# Assuming X_test is your test data
predictions = model.predict(X_test)

  pdf = np.sum(np.log(self._pdf(X, self.mean[cls], self.var[cls])), axis=1)
  probabilities = exp_likelihood / np.sum(exp_likelihood, axis=1, keepdims=True)


In [18]:
accuracy = metrics.accuracy_score(y_true=y_test, y_pred=predictions)
precision = metrics.precision_score(y_true=y_test, y_pred=predictions, average='weighted')
recall = metrics.recall_score(y_true=y_test, y_pred=predictions, average='weighted')
f1 = metrics.f1_score(y_true=y_test, y_pred=predictions, average='weighted')

print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")

Accuracy: 0.44
Precision: 0.79
Recall: 0.44
F1 Score: 0.50
