# import libraries

In [44]:
from sklearn.datasets import load_iris
import pandas as pd
import numpy as np
from scipy.spatial.distance import mahalanobis

# load iris

In [3]:
iris_dataset = load_iris()
type(iris_dataset)

sklearn.utils._bunch.Bunch

# extract data

In [17]:
X = iris_dataset.data
Y = iris_dataset.target
columns = iris_dataset.feature_names
columns.append('class')

print(f"X.shape: {X.shape}")
print(f"Y.shape: {Y.shape}")
print(f"len(columns): {len(columns)}")

X.shape: (150, 4)
Y.shape: (150,)
len(columns): 5


In [12]:
# concate y to x

data = np.c_[X, Y]
print(f"data.shape: {data.shape}")

data.shape: (150, 5)


In [18]:
columns

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)',
 'class']

In [19]:
df = pd.DataFrame(data=data, columns=columns)
df.head(1)

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
0,5.1,3.5,1.4,0.2,0.0


In [23]:
mean_setosa = df[df['class'] == 0].iloc[:, :-1].mean()
mean_versicolor = df[df['class'] == 1].iloc[:, :-1].mean()
mean_virginica = df[df['class'] == 2].iloc[:, :-1].mean()

print(f"mean_setosa:\n{mean_setosa}\n\n")
print(f"mean_versicolor:\n{mean_versicolor}\n\n")
print(f"mean_virginica:\n{mean_virginica}\n\n")

mean_setosa:
sepal length (cm)    5.006
sepal width (cm)     3.428
petal length (cm)    1.462
petal width (cm)     0.246
dtype: float64


mean_versicolor:
sepal length (cm)    5.936
sepal width (cm)     2.770
petal length (cm)    4.260
petal width (cm)     1.326
dtype: float64


mean_virginica:
sepal length (cm)    6.588
sepal width (cm)     2.974
petal length (cm)    5.552
petal width (cm)     2.026
dtype: float64




# Euclidean

In [27]:
euc_set_ver_dist = np.linalg.norm(mean_setosa - mean_versicolor)
euc_set_vir_dist = np.linalg.norm(mean_setosa - mean_virginica)
euc_vir_ver_dist = np.linalg.norm(mean_virginica - mean_versicolor)

print(f"euc_set_ver_dist: {euc_set_ver_dist}")
print(f"euc_set_vir_dist: {euc_set_vir_dist}")
print(f"euc_vir_ver_dist: {euc_vir_ver_dist}")

euc_set_ver_dist: 3.2082811597489393
euc_set_vir_dist: 4.754507335150509
euc_vir_ver_dist: 1.6204888151418995


# Manhattan

In [28]:
man_set_ver_dist = np.sum(np.abs(mean_setosa - mean_versicolor))
man_set_vir_dist = np.sum(np.abs(mean_setosa - mean_virginica))
man_vir_ver_dist = np.sum(np.abs(mean_virginica - mean_versicolor))

print(f"man_set_ver_dist: {man_set_ver_dist}")
print(f"man_set_vir_dist: {man_set_vir_dist}")
print(f"man_vir_ver_dist: {man_vir_ver_dist}")

man_set_ver_dist: 5.466000000000001
man_set_vir_dist: 7.905999999999999
man_vir_ver_dist: 2.847999999999997


# Chebyshev

The Chebyshev distance, also known as the maximum metric or chessboard distance, measures the greatest difference between the coordinates of two points along any single dimension. It is particularly useful in grid-based environments, such as chess or robotics, where diagonal movement is as easy as horizontal or vertical movement.

In [39]:
def find_chebyshev(d1, d2):
    return np.max([np.abs((d1.iloc[i] - d2.iloc[i])) for i in np.arange(d1.shape[0])])

In [56]:
[np.abs((mean_setosa.iloc[i] - mean_setosa.iloc[i])) for i in np.arange(mean_setosa.shape[0])]

[np.float64(0.0), np.float64(0.0), np.float64(0.0), np.float64(0.0)]

In [40]:
cheb_set_ver_dist = find_chebyshev(mean_setosa, mean_versicolor)
cheb_set_vir_dist = find_chebyshev(mean_setosa, mean_virginica)
cheb_vir_ver_dist = find_chebyshev(mean_versicolor, mean_virginica)

print(f"cheb_set_ver_dist: {cheb_set_ver_dist}")
print(f"cheb_set_vir_dist: {cheb_set_vir_dist}")
print(f"cheb_vir_ver_dist: {cheb_vir_ver_dist}")

cheb_set_ver_dist: 2.7979999999999996
cheb_set_vir_dist: 4.09
cheb_vir_ver_dist: 1.2919999999999998


# Cosine distance

In [41]:
def cosine_distance(u, v):
    return np.dot(u, v) / (np.linalg.norm(u) * np.linalg.norm(v))

In [43]:
cos_set_ver_dist = cosine_distance(mean_setosa, mean_versicolor)
cos_set_vir_dist = cosine_distance(mean_setosa, mean_virginica)
cos_vir_ver_dist = cosine_distance(mean_versicolor, mean_virginica)

print(f"cos_set_ver_dist: {cos_set_ver_dist}")
print(f"cos_set_vir_dist: {cos_set_vir_dist}")
print(f"cos_vir_ver_dist: {cos_vir_ver_dist}")

cos_set_ver_dist: 0.9245352736964948
cos_set_vir_dist: 0.8881023519747967
cos_vir_ver_dist: 0.9957131046752432


# Mahalanobis

In [55]:
df_cov = df.iloc[:, :-1].cov()
df_mean = df.iloc[:, :-1].cov()

print(f"df_cov:\n{df_cov}")
print(f"df_mean:\n{df_mean}")

df_cov:
                   sepal length (cm)  sepal width (cm)  petal length (cm)  \
sepal length (cm)           0.685694         -0.042434           1.274315   
sepal width (cm)           -0.042434          0.189979          -0.329656   
petal length (cm)           1.274315         -0.329656           3.116278   
petal width (cm)            0.516271         -0.121639           1.295609   

                   petal width (cm)  
sepal length (cm)          0.516271  
sepal width (cm)          -0.121639  
petal length (cm)          1.295609  
petal width (cm)           0.581006  
df_mean:
                   sepal length (cm)  sepal width (cm)  petal length (cm)  \
sepal length (cm)           0.685694         -0.042434           1.274315   
sepal width (cm)           -0.042434          0.189979          -0.329656   
petal length (cm)           1.274315         -0.329656           3.116278   
petal width (cm)            0.516271         -0.121639           1.295609   

                   pe

$$
D_M(x)=\sqrt{(x-\mu)^TS^{-1}(x-\mu)}
$$

In [54]:
def mahalanobis(x, mean, cov):
    x = np.asarray(x)
    mean = np.asarray(mean)
    cov = np.asarray(cov)

    diff = x - mean
    inv_cov = np.linalg.inv(cov)
    return np.sqrt(diff.T @ inv_cov @ diff)

In [58]:
mah_set_vir_dist = mahalanobis(mean_setosa, mean_virginica, df_cov.to_numpy())
mah_set_ver_dist = mahalanobis(mean_setosa, mean_versicolor, df_cov.to_numpy())
mah_vir_ver_dist = mahalanobis(mean_versicolor, mean_virginica, df_cov.to_numpy())

print(f"mah_set_ver_dist: {mah_set_ver_dist}")
print(f"mah_set_vir_dist: {mah_set_vir_dist}")
print(f"mah_vir_ver_dist: {mah_vir_ver_dist}")

mah_set_ver_dist: 1.8488812142294817
mah_set_vir_dist: 2.354813745130505
mah_vir_ver_dist: 1.3007943639501551


In [60]:
print(f"euc_set_ver_dist: {euc_set_ver_dist}")
print(f"euc_set_vir_dist: {euc_set_vir_dist}")
print(f"euc_vir_ver_dist: {euc_vir_ver_dist}")
print("\n########################\n")
print(f"man_set_ver_dist: {man_set_ver_dist}")
print(f"man_set_vir_dist: {man_set_vir_dist}")
print(f"man_vir_ver_dist: {man_vir_ver_dist}")
print("\n########################\n")
print(f"cheb_set_ver_dist: {cheb_set_ver_dist}")
print(f"cheb_set_vir_dist: {cheb_set_vir_dist}")
print(f"cheb_vir_ver_dist: {cheb_vir_ver_dist}")
print("\n########################\n")
print(f"cos_set_ver_dist: {cos_set_ver_dist}")
print(f"cos_set_vir_dist: {cos_set_vir_dist}")
print(f"cos_vir_ver_dist: {cos_vir_ver_dist}")
print("\n########################\n")
print(f"mah_set_ver_dist: {mah_set_ver_dist}")
print(f"mah_set_vir_dist: {mah_set_vir_dist}")
print(f"mah_vir_ver_dist: {mah_vir_ver_dist}")

euc_set_ver_dist: 3.2082811597489393
euc_set_vir_dist: 4.754507335150509
euc_vir_ver_dist: 1.6204888151418995

########################

man_set_ver_dist: 5.466000000000001
man_set_vir_dist: 7.905999999999999
man_vir_ver_dist: 2.847999999999997

########################

cheb_set_ver_dist: 2.7979999999999996
cheb_set_vir_dist: 4.09
cheb_vir_ver_dist: 1.2919999999999998

########################

cos_set_ver_dist: 0.9245352736964948
cos_set_vir_dist: 0.8881023519747967
cos_vir_ver_dist: 0.9957131046752432

########################

mah_set_ver_dist: 1.8488812142294817
mah_set_vir_dist: 2.354813745130505
mah_vir_ver_dist: 1.3007943639501551
