In [1]:
import numpy as np
import sklearn as sk
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split  #inbuilt data split tool

def covmat(data):
  num_features=data.shape[1] #get Number of features
  num_sample=len(data)  #get Number of samples
  cov_matrix = np.zeros((num_features, num_features)) #create cov matrix
  mean=np.sum(data,axis=0)/num_sample #mean for all collumns
  for i in range (num_features):
    for j in range (num_features):
      cov=0;
      for k in range (num_sample): #for each sample take sum
        cov+=((data[k,i]-mean[i])*(data[k,j]-mean[j]))/(num_sample-1)
        cov_matrix[i,j]=cov
  return cov_matrix

def mahalanobis(x_test,x_train,y_train):
  num_sample=x_train.shape[0] #number of training samples

  unique_classes, class_counts = np.unique(y_train, return_counts=True) #returns all unique classes with their counts
  class_mean_inv = {} #to store mean and inverse of all classes
  for c in unique_classes:
    class_indices = np.where(y_train == c)[0] #returns list of all indices of class c
    class_data = x_train[class_indices] #get all train data of class c
    mean = np.mean(class_data, axis=0) #find mean of that class
    cov_matrix = covmat(class_data) #find cov matrix of that class
    inverse_covmat = np.linalg.inv(cov_matrix)  #inverse of covariance matrix of that class
    class_mean_inv[c] = {'mean': mean, 'inverse_covmat': inverse_covmat}

  predicted=[] #to store predicted classes
  #To store predictions
  for test_sample in x_test:
    a=[] #to store all distances
    for c in unique_classes:
      mean = class_mean_inv[c]['mean']
      inverse_covmat = class_mean_inv[c]['inverse_covmat']
      diff=test_sample-mean #(x-meani)
      dist=np.sqrt(np.dot(diff,np.dot(inverse_covmat,diff.T))) #|(x-meani)cov^-1(x-meani)^T|
      a.append(dist)
    predicted_class=np.argmin(a)
    predicted.append(predicted_class)
  return predicted

def test_train_split(data,target,split_perc):
  perm=np.random.permutation(len(data)) #to get random permutation of 0 to len-1
  x_shuff=data[perm]    #obtain shuffled data
  y_shuff=target[perm]    #obtain shuffled targets
  #get the index till which data is in train set
  n=round((((100-split_perc)*0.01))*len(data)) #round is used to round off to the nearest integer
  x_train=x_shuff[:n] #take starting n elements of shuffled data
  y_train=y_shuff[:n]
  x_test=x_shuff[n:] #take remaining elements of shuffled data
  y_test=y_shuff[n:]
  return x_train, y_train, x_test, y_test

iris=load_iris()
x_train,y_train,x_test,y_test=test_train_split(iris.data,iris.target,20)
predicted=mahalanobis(x_test, x_train, y_train)
print(predicted)

#Analysis of this result using 3 class confusion matrix
print("Analysing our model.")

def calculate_confusion_matrix(y_true, y_pred, num_classes):
  # Initialize the confusion matrix of size(nXn) where n=number of classes
  conf_matrix = np.zeros((num_classes, num_classes))
  # Iterate over each sample to create confusion matrix with respect to A
  #.     A.   B.   C.   Predicted
  #.  A  TP   FN   FN
  #   B  FP   TN   TN
  #   C  FP   TN   TN
  for true_class, pred_class in zip(y_true, y_pred):
    #Update the corresponding cell in the confusion matrix
    conf_matrix[int(true_class), int(pred_class)] += 1
  return conf_matrix
def calculate_metrics(conf_matrix):
  #Calculating true positives, false positives, false negatives for each class
  TP = np.diag(conf_matrix)  #obtained as a matrix of order 1xn
  FP = np.sum(conf_matrix, axis=0) - TP   #sum of column-TP
  FN = np.sum(conf_matrix, axis=1) - TP   #sum of row -TP
  # calculating true negatives for each class
  TN = np.sum(conf_matrix) - (TP + FP + FN) #remaining part

  # calculating accuracy for each class
  accuracy = (TP + TN) / np.sum(conf_matrix)

  # calculating precision for each class
  precision = TP / (TP + FP)

  # calculating recall for each class
  recall = TP / (TP + FN)

  # calculating F1 score for each class
  f1_score = 2 * (precision * recall) / (precision + recall)

  return accuracy, precision, recall, f1_score


conf_matrix=calculate_confusion_matrix(y_test,predicted,3)
accuracy, precision, recall, f1_score = calculate_metrics(conf_matrix)
print(conf_matrix)
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1_score)

[0, 1, 1, 1, 2, 1, 0, 0, 0, 2, 1, 1, 0, 1, 2, 2, 1, 1, 0, 0, 0, 0, 2, 1, 2, 0, 2, 0, 2, 2]
Analysing our model.
[[11.  0.  0.]
 [ 0. 10.  2.]
 [ 0.  0.  7.]]
Accuracy: [1.         0.93333333 0.93333333]
Precision: [1.         1.         0.77777778]
Recall: [1.         0.83333333 1.        ]
F1 Score: [1.         0.90909091 0.875     ]
