In [None]:
# Implementing gaussian bayes classifier

class gaussianBayesClassifier:

  def __init__(self, variant):
    self.variant = variant
    self.mean = None
    self.std = None
    self.classes = None
    self.priors = None
    self.cov_mat = None

  def fit(self, x_train, y_train):
    self.classes = np.unique(y)

    self.mean = np.array([np.mean(x_train[y_train == i], axis=0) for i in self.classes])
    self.std = np.array([np.std(x_train[y_train == i], axis=0) for i in self.classes])
    self.prior = np.array([len(y_train[y_train == i]) / len(y_train) for i in self.classes])

# variant 1
    if(self.variant == 1):
      variance = (np.sum(self.std) / 12)**2
      cov_mat = variance * np.eye(4)
      self.cov_mat = cov_mat

# variant 2
    elif(self.variant == 2):
      cov_mat0 = np.zeros((4,4))
      cov_mat1 = np.zeros((4,4))
      cov_mat2 = np.zeros((4,4))
      
      x_train0 = x_train[y_train == 0]
      x_train1 = x_train[y_train == 1]
      x_train2 = x_train[y_train == 2]

      for i in range(4):
        a = x_train0[:,i]
        for j in range(4):
          b = x_train0[:,j]
          c = np.vstack([a, b])
          temp_cov_mat = np.cov(c)
          cov_mat0[i][j] = temp_cov_mat[0,1]
      
      for i in range(4):
        a = x_train1[:,i]
        for j in range(4):
          b = x_train1[:,j]
          c = np.vstack([a, b])
          temp_cov_mat = np.cov(c)
          cov_mat1[i][j] = temp_cov_mat[0,1]

      for i in range(4):
        a = x_train2[:,i]
        for j in range(4):
          b = x_train2[:,j]
          c = np.vstack([a, b])
          temp_cov_mat = np.cov(c)
          cov_mat2[i][j] = temp_cov_mat[0,1]

      cov_mat = (cov_mat1 + cov_mat2 + cov_mat0) / 3
      self.cov_mat = cov_mat

# variant 3
    elif(self.variant == 3):
      cov_mat0 = np.zeros((4,4))      
      cov_mat1 = np.zeros((4,4))      
      cov_mat2 = np.zeros((4,4))      

      x_train0 = x_train[y_train == 0]
      x_train1 = x_train[y_train == 1]
      x_train2 = x_train[y_train == 2]

      for i in range(4):
        a = x_train0[:,i]
        for j in range(4):
          b = x_train0[:,j]
          c = np.vstack([a, b])
          temp_cov_mat = np.cov(c)
          cov_mat0[i][j] = temp_cov_mat[0,1]

      for i in range(4):
        a = x_train1[:,i]
        for j in range(4):
          b = x_train1[:,j]
          c = np.vstack([a, b])
          temp_cov_mat = np.cov(c)
          cov_mat1[i][j] = temp_cov_mat[0,1]

      for i in range(4):
        a = x_train2[:,i]
        for j in range(4):
          b = x_train2[:,j]
          c = np.vstack([a, b])
          temp_cov_mat = np.cov(c)
          cov_mat2[i][j] = temp_cov_mat[0,1]

      self.cov_mat = np.array([cov_mat0, cov_mat1, cov_mat2])

  def predict(self, x):
    # variant 1
    if(self.variant == 1):
      variance = (np.sum(self.std) / 12)**2
      w0 = self.mean[0] / variance
      w00 = (-1 / (2*variance))*(np.dot(self.mean[0], self.mean[0].T)) + math.log(self.prior[0])
      g0 = np.dot(w0.T, x) + w00

      w1 = self.mean[1] / variance
      w10 = (-1 / (2*variance))*(np.dot(self.mean[1], self.mean[1].T)) + math.log(self.prior[1])
      g1 = np.dot(w1.T, x) + w10

      w2 = self.mean[2] / variance
      w20 = (-1 / (2*variance))*(np.dot(self.mean[2], self.mean[2].T)) + math.log(self.prior[2])
      g2 = np.dot(w2.T, x) + w20

      g = np.array([g0, g1, g2])
      print(np.argmax(g), end=' ')
      return np.argmax(g)

    # variant 2
    elif(self.variant == 2):
      cov_mat_inv = np.linalg.inv(self.cov_mat)
      mean0 = self.mean[0]
      w0 = np.matmul(cov_mat_inv, mean0)
      w00 = (-0.5 * np.matmul(np.matmul(mean0.T, cov_mat_inv), mean0)) + math.log(self.prior[0])
      g0 = np.dot(w0.T, x) + w00

      mean1 = self.mean[1]
      w1 = np.matmul(cov_mat_inv, mean1)
      w10 = (-0.5 * np.matmul(np.matmul(mean1.T, cov_mat_inv), mean1)) + math.log(self.prior[1])
      g1 = np.dot(w1.T, x) + w10

      mean2 = self.mean[2]
      w2 = np.matmul(cov_mat_inv, mean2)
      w20 = (-0.5 * np.matmul(np.matmul(mean2.T, cov_mat_inv), mean2)) + math.log(self.prior[2])
      g2 = np.dot(w2.T, x) + w20

      g = np.array([g0, g1, g2])
      print(np.argmax(g), end=' ')
      return np.argmax(g)

    # variant 3
    elif(self.variant == 3):
      cov_mat_inv0 = np.linalg.inv(self.cov_mat[0])
      mean0 = self.mean[0]
      W0 = -0.5 * cov_mat_inv0
      w0 = np.matmul(cov_mat_inv0, mean0)
      w00 = (-0.5 * np.matmul(mean0.T, np.matmul(cov_mat_inv0, mean0))) + (-0.5 * math.log(np.linalg.det(self.cov_mat[0]))) + (math.log(self.prior[0]))
      g0 = np.matmul(x.T, np.matmul(W0, x)) + np.matmul(w0.T, x) + w00

      cov_mat_inv1 = np.linalg.inv(self.cov_mat[1])
      mean1 = self.mean[1]
      W1 = -0.5 * cov_mat_inv1
      w1 = np.matmul(cov_mat_inv1, mean1)
      w10 = (-0.5 * np.matmul(mean1.T, np.matmul(cov_mat_inv1, mean1))) + (-0.5 * math.log(np.linalg.det(self.cov_mat[1]))) + (math.log(self.prior[1]))
      g1 = np.matmul(x.T, np.matmul(W1, x)) + np.matmul(w1.T, x) + w10

      cov_mat_inv2 = np.linalg.inv(self.cov_mat[2])
      mean2 = self.mean[2]
      W2 = -0.5 * cov_mat_inv2
      w2 = np.matmul(cov_mat_inv2, mean2)
      w20 = (-0.5 * np.matmul(mean2.T, np.matmul(cov_mat_inv2, mean2))) + (-0.5 * math.log(np.linalg.det(self.cov_mat[2]))) + (math.log(self.prior[2]))
      g2 = np.matmul(x.T, np.matmul(W2, x)) + np.matmul(w2.T, x) + w20

      g = np.array([g0, g1, g2])
      print(np.argmax(g), end=' ')
      return np.argmax(g)

  def test(self, x_test, y_test):
    # Below code outputs the predictions for every instance in testing data (x_test) and stores the predictions in y_pred.
    y_pred = []
    print('Predictions: ')
    for i in x_test:
      y_pred.append(self.predict(i))
    y_pred = np.array(y_pred)
    
    # Below is the code to calculate accuracy
    size_test_data = len(y_test)
    true = 0
    for i in range(size_test_data):
      if(y_pred[i] == y_test[i]):
        true += 1

    accuracy = true/size_test_data

    print("\nAccuracy =", accuracy*100, '%')
    print('\n')
    return accuracy*100

  def plotDecisionBoundary(self, x_train, y_train):
    feature1 = x_train[:, 2]
    feature2 = x_train[:, 3]

    xx, yy = np.meshgrid(np.linspace(feature1.min(), feature1.max(), 100), np.linspace(feature2.min(), feature2.max(), 100))

    temp_df = pd.DataFrame({'feature0': np.full(10000, np.mean(x_train[:, 0])), 'feature1': np.full(10000, np.mean(x_train[:, 1])), 'feature2': xx.reshape(-1), 'feature3': yy.reshape(-1)})
    
    categories = []
    print('Output class for 10000 random inputs (these 10000 inputs are used in forming decision boundaries) :')
    for i in range(10000):
      categories.append(self.predict(np.array([temp_df['feature0'][i], temp_df['feature1'][i], temp_df['feature2'][i], temp_df['feature3'][i]])))

    categories = np.array(categories)

    x_axis_points = xx.reshape(-1)
    x_axis_points_0 = x_axis_points[categories == 0]
    x_axis_points_1 = x_axis_points[categories == 1]
    x_axis_points_2 = x_axis_points[categories == 2]

    y_axis_points = yy.reshape(-1)
    y_axis_points_0 = y_axis_points[categories == 0]
    y_axis_points_1 = y_axis_points[categories == 1]
    y_axis_points_2 = y_axis_points[categories == 2]

    plt.scatter(x_axis_points_0, y_axis_points_0, color='r', alpha=0.05)
    plt.scatter(x_axis_points_1, y_axis_points_1, color='g', alpha=0.05)
    plt.scatter(x_axis_points_2, y_axis_points_2, color='b', alpha=0.05)

    feature0_0 = feature1[y_train == 0]
    feature0_1 = feature1[y_train == 1]
    feature0_2 = feature1[y_train == 2]

    feature3_0 = feature2[y_train == 0]
    feature3_1 = feature2[y_train == 1]
    feature3_2 = feature2[y_train == 2]

    plt.scatter(feature0_0, feature3_0, color='r', label='Setosa', alpha=1)
    plt.scatter(feature0_1, feature3_1, color='g', label='Versicolor', alpha=1)
    plt.scatter(feature0_2, feature3_2, color='b', label='Virginica', alpha=1)
    plt.legend(loc='lower right')
    plt.show()