In [40]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt


In [27]:
iris_df = sns.load_dataset('iris') #loading dataset iris from seaborn
iris_df 

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica


In [5]:
iris_df.head() #prints the first 5 values 

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [6]:
iris_df.describe() #to check the numerical statistics

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
count,150.0,150.0,150.0,150.0
mean,5.843333,3.057333,3.758,1.199333
std,0.828066,0.435866,1.765298,0.762238
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


In [7]:
iris_df.shape #tells no of Rows and Columns

(150, 5)

In [8]:
X_var = iris_df.iloc[:,0:4] #selecting only numerical values as X variable
X_var

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


### taking 2 rows of dataset as x1 and x2

In [33]:
x1= X_var.iloc[1]  #taking the 1st row as x1
x1

sepal_length    4.9
sepal_width     3.0
petal_length    1.4
petal_width     0.2
Name: 1, dtype: float64

In [32]:
x2= X_var.iloc[2]  #taking the 2nd row as x2
x2

sepal_length    4.7
sepal_width     3.2
petal_length    1.3
petal_width     0.2
Name: 2, dtype: float64

### Applying Distance Metrics on iris

#### Euclidean Distance

In [13]:
#Example
from sklearn.metrics import DistanceMetric
dist =  DistanceMetric.get_metric('euclidean')
X = [[1,2],[3,4],[5,6]]
Y = [[7,8],[9,10]]
dist.pairwise(X,Y)

array([[ 8.48528137, 11.3137085 ],
       [ 5.65685425,  8.48528137],
       [ 2.82842712,  5.65685425]])

* for scipy we can directly pass it
* but for sklearn we have to reshape the series into a 2d array

In [34]:
from scipy.spatial import distance #importing distance from scipy

distance.euclidean(x1,x2) 

0.30000000000000016

In [35]:
x = x1.to_numpy().reshape(1, -1)
y = x2.to_numpy().reshape(1, -1)

from sklearn.metrics import DistanceMetric
dist =  DistanceMetric.get_metric('euclidean')
dist.pairwise(x,y)

array([[0.3]])

#### Manhattan Distance

In [36]:
# Example
from sklearn.metrics import DistanceMetric
dist =  DistanceMetric.get_metric('manhattan')
X = [[1,2],[3,4],[5,6]]
Y = [[7,8],[9,10]]
dist.pairwise(X,Y)

array([[12., 16.],
       [ 8., 12.],
       [ 4.,  8.]])

In [38]:
x = x1.to_numpy().reshape(1, -1)
y = x2.to_numpy().reshape(1, -1)

from sklearn.metrics import DistanceMetric
dist =  DistanceMetric.get_metric('manhattan')
dist.pairwise(x,y)

array([[0.5]])

In [39]:
from scipy.spatial import distance #importing distance from scipy

distance.cityblock(x1,x2) 

0.5000000000000002

#### Chebyshev Distance

In [41]:
# Example
from sklearn.metrics import DistanceMetric
dist =  DistanceMetric.get_metric('chebyshev')
X = [[1,2],[3,4],[5,6]]
Y = [[7,8],[9,10]]
dist.pairwise(X,Y)

array([[6., 8.],
       [4., 6.],
       [2., 4.]])

In [42]:
from scipy.spatial import distance #importing distance from scipy

distance.chebyshev(x1,x2) 

0.20000000000000018

In [43]:
x = x1.to_numpy().reshape(1, -1)
y = x2.to_numpy().reshape(1, -1)

from sklearn.metrics import DistanceMetric
dist =  DistanceMetric.get_metric('chebyshev')
dist.pairwise(x,y)

array([[0.2]])

#### Minkowski Distance

In [44]:
# Example
from sklearn.metrics import DistanceMetric
dist =  DistanceMetric.get_metric('minkowski')
X = [[1,2],[3,4],[5,6]]
Y = [[7,8],[9,10]]
dist.pairwise(X,Y)

array([[ 8.48528137, 11.3137085 ],
       [ 5.65685425,  8.48528137],
       [ 2.82842712,  5.65685425]])

In [47]:
from scipy.spatial import distance #importing distance from scipy

distance.minkowski(x1,x2,3) 

0.25712815906582376

In [51]:
x = x1.to_numpy().reshape(1, -1)
y = x2.to_numpy().reshape(1, -1)

from sklearn.metrics import DistanceMetric
dist =  DistanceMetric.get_metric('minkowski', p= 3)
dist.pairwise(x,y)

array([[0.25712816]])