In [None]:
import numpy as np
import sklearn as sk
from sklearn.datasets import load_iris

def covmat(data):
  num_features=data.shape[1] #get Number of features
  num_sample=len(data)  #get Number of samples
  cov_matrix = np.zeros((num_features, num_features)) #create cov matrix
  mean=np.sum(data,axis=0)/num_sample #mean for all collumns
  for i in range (num_features):
    for j in range (num_features):
      cov=0;
      for k in range (num_sample): #for each sample take sum
        cov+=((data[k,i]-mean[i])*(data[k,j]-mean[j]))/(num_sample-1)
        cov_matrix[i,j]=cov
  return cov_matrix


def bayesian(x_train, y_train, x_test, y_test):
  # computing class priors
  unique_classes, class_counts = np.unique(y_train, return_counts=True) #returns all unique classes with their counts
  class_priors = class_counts / len(y_train)  #find probabilty of each class
  # Computing class conditional probabilities
  class_mean_inv = {} #to store mean and inverse of all classes
  for c in unique_classes:
    class_indices = np.where(y_train == c)[0] #returns list of all indices of class c
    class_data = x_train[class_indices] #get all train data of class c
    mean = np.mean(class_data, axis=0) #find mean of that class
    cov_matrix = covmat(class_data) #find cov matrix of that class
    inverse_covmat = np.linalg.inv(cov_matrix)  #inverse of covariance matrix of that class
    class_mean_inv[c] = {'mean': mean, 'inverse_covmat': inverse_covmat}
  #To store predictions
  predictions = []
  for sample in x_test: #now predict for all test samples as data is ready
    posteriors = []  #to store posterior probabilites of sample wrt all classes
    for c in unique_classes:
      mean = class_mean_inv[c]['mean']
      inverse_covmat = class_mean_inv[c]['inverse_covmat']
      prior = class_priors[c]
      d = sample.shape[0] # Dimensionality of the sample
      diff = sample - mean
      # calculating the exponent term of the probability density function
      exponent = -0.5 * np.dot(np.dot(diff, inverse_covmat), diff.T)
      # calculating the determinant of the covariance matrix
      det_covmat = np.linalg.det(inverse_covmat)
      #calculating the probability density function
      class_prob = np.exp(exponent) / np.sqrt((2 * np.pi) ** d * np.abs(det_covmat))
      #Multiplying by the prior probability of the class
      posterior = prior * class_prob
      #adding posterior prob to list
      posteriors.append(posterior)
    # selecting the class with the highest posterior probability
    predicted_class = unique_classes[np.argmax(posteriors)] #take indice of max of all posterior prob
    predictions.append(predicted_class)

  # calculating accuracy
  accuracy = np.mean(predictions == y_test)
  return predictions, accuracy

def test_train_split(data,target,split_perc):
  perm=np.random.permutation(len(data)) #to get random permutation of 0 to len-1
  x_shuff=data[perm]    #obtain shuffled data
  y_shuff=target[perm]    #obtain shuffled targets
  #get the index till which data is in train set
  n=round((((100-split_perc)*0.01))*len(data)) #round is used to round off to the nearest integer
  x_train=x_shuff[:n] #take starting n elements of shuffled data
  y_train=y_shuff[:n]
  x_test=x_shuff[n:] #take remaining elements of shuffled data
  y_test=y_shuff[n:]
  return x_train, y_train, x_test, y_test

iris=load_iris()
x_train,y_train,x_test,y_test=test_train_split(iris.data,iris.target,20)
predictions, accuracy = bayesian(x_train, y_train, x_test, y_test)
print("Predictions:", predictions)
print("Accuracy:", accuracy)

Predictions: [0, 0, 1, 0, 0, 2, 0, 2, 1, 1, 2, 2, 0, 1, 1, 2, 0, 0, 0, 0, 2, 1, 2, 1, 0, 2, 0, 0, 2, 0]
Accuracy: 1.0
