In [None]:
from google.colab import drive
drive.mount("/content/drive")

In [None]:
%cd "/content/drive/MyDrive/lab2"
%ls

In [None]:
import numpy as np
train_dataset = np.load("train_dataset.npy")
test_dataset = np.load("test_dataset.npy")
train_labels = np.load("train_labels.npy")
test_labels = np.load("test_labels.npy")

In [None]:
class NaiveBayesClassifier:
  def __init__(self):
    self.train_dataset = None
    self.train_labels = None
    self.train_size = 0
    self.num_features = 0
    self.num_classes = 0
    self.num_feature_categories = 0

  def fit(self, train_dataset, train_labels):
    self.train_dataset = train_dataset
    self.train_labels = train_labels
    self.train_size = train_dataset.shape[0]
    self.num_features = train_dataset.shape[1]
    self.num_classes = np.amax(train_labels) + 1
    self.num_feature_categories = np.amax(train_dataset, axis=0) + 1

  def estimate_class_prior(self):
    deltas = (np.arange(self.num_classes) == self.train_labels.reshape(-1, 1))
    class_count = np.sum(deltas, axis=0)
    class_prior = (class_count + 1) / (np.sum(class_count) + self.num_classes)
    return class_prior

  def estimate_likelihoods(self):
    deltas = (np.arange(self.num_classes) == self.train_labels.reshape(-1, 1))
    class_count = np.sum(deltas.astype(int), axis=0)
    likelihoods = []
    for feature in np.arange(self.num_features):
      feature_deltas = (np.arange(self.num_feature_categories[feature]) == self.train_dataset[:, feature].reshape(-1,1))
      feature_count = np.dot(feature_deltas.transpose(), deltas.astype(int))
      feature_likelihood = (feature_count + 1) / (np.sum(deltas.astype(int), axis=0) + self.num_feature_categories[feature])
      likelihoods.append(feature_likelihood)
    return likelihoods

  def predict(self, test_dataset):
    test_size = test_dataset.shape[0]
    class_prior = self.estimate_class_prior()
    likelihoods = self.estimate_likelihoods()
    class_prob = np.tile(np.log(class_prior), (test_size, 1))
    for feature in np.arange(self.num_features):
      feature_likelihood = likelihoods[feature]
      feature_deltas = (np.arange(self.num_feature_categories[feature]) == test_dataset[:, feature].reshape(-1,1))
      category_prob = np.matmul(feature_deltas, feature_likelihood)
      class_prob += np.log(category_prob)
    test_predict = np.argmax(class_prob, axis=1)
    return test_predict

In [None]:
import numpy as np
np.arange(10) ==[[1],[0],[6],[4]]

In [None]:
if __name__ == '__main__':
  train_dataset = np.load("train_dataset.npy")
  test_dataset = np.load("test_dataset.npy")
  train_labels = np.load("train_labels.npy")
  test_labels = np.load("test_labels.npy")
  nb_model = NaiveBayesClassifier()
  nb_model.fit(train_dataset, train_labels)
  print(f"After fitting the training data, the train size is\
  {nb_model.train_size}, the number of features is {nb_model.num_features},\
  the number of class labels is {nb_model.num_classes}.")
  class_prior = nb_model.estimate_class_prior()
  print(f"The class priors are {class_prior}.")
  likelihoods = nb_model.estimate_likelihoods()
  print(f"The likelihoods of the first feature (Age) are \n {likelihoods[0]}.")
  test_predict = nb_model.predict(test_dataset)
  print(f"The predictions for test data are:\n {test_predict}")


  accuracy_score = np.sum(test_predict == test_labels) / test_labels.shape[0]

  print(accuracy_score)