### Import Libraries

In [431]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random

### Load

In [432]:
data = pd.read_csv('iris.csv').drop('Id', axis=1)

## Train-Test Split

### Separate Classes

In [433]:
def separate_species(species):
    class_data = train[train['Species'] == species].iloc[:, :]
    return class_data

In [434]:
train_class, train_class_X = [], []
for c in classes:
    train_class.append(separate_species(c))
test = pd.DataFrame()
for df in train_class:
    sample = df.sample(n = 1)
    test = pd.concat([test, sample], ignore_index=True)
    df.drop(sample.index)
    train_class_X.append(df.drop(['Species'], axis=1))
test

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,4.6,3.4,1.4,0.3,Iris-setosa
1,5.5,2.5,4.0,1.3,Iris-versicolor
2,7.6,3.0,6.6,2.1,Iris-virginica


In [435]:
test_X = test.iloc[:, :-1]
test_y = test.iloc[:, -1]
test_X

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
0,4.6,3.4,1.4,0.3
1,5.5,2.5,4.0,1.3
2,7.6,3.0,6.6,2.1


In [436]:
test_y

0        Iris-setosa
1    Iris-versicolor
2     Iris-virginica
Name: Species, dtype: object

## Mahalanobis Distance

### Mean Vector

In [437]:
def mean_vector(df):
    return np.array(df.mean(axis=0).round(4))

In [438]:
mean_vectors = []
for i in range(len(classes)):
    mean_vectors.append(mean_vector(train_class_X[i]))

### Covariance Matrix

In [439]:
inv_covs = []
for i in range(len(classes)):
    inv_covs.append(np.linalg.inv(np.cov(train_class_X[i].T)))

### Distance

In [440]:
def mahalanobis_distance(x, y, cov):
    res = np.dot(cov, x - y)
    res =  np.dot((x-y).T, res)
    return np.sqrt(res)

### Prediction

In [441]:
def predict_class(x, original_class):
    dists = []
    for i in range(len(classes)):
        dists.append((mahalanobis_distance(x, mean_vectors[i], inv_covs[i]), classes[i], original_class))
    min_dist = min(dists)
    df = pd.DataFrame({
        'MahalanobisDistance': [min_dist[0]],
        'PredictedClass': [min_dist[1]],
        'OriginalClass': [min_dist[2]]
    })
    return df

In [442]:
preds = pd.DataFrame(columns = ['PredictedClass', 'OriginalClass', 'MahalanobisDistance'])
for i in range(len(test_X)):
    x = np.array(test_X.iloc[i])
    original_class = test_y.iloc[i]
    preds = pd.concat([preds, predict_class(x, original_class)], ignore_index=True)

In [443]:
preds

Unnamed: 0,PredictedClass,OriginalClass,MahalanobisDistance
0,Iris-setosa,Iris-setosa,1.8286
1,Iris-versicolor,Iris-versicolor,1.283398
2,Iris-virginica,Iris-virginica,2.033908
