In [28]:
from scipy.spatial.distance import euclidean, cityblock, chebyshev, cosine, mahalanobis
from sklearn.datasets import load_breast_cancer
import pandas as pd
import numpy as np

In [29]:
cancer = load_breast_cancer()
df = pd.DataFrame(cancer.data, columns=cancer.feature_names)
df["target"] = cancer.target  # 0 is malignant and 1 is benign
print(cancer.DESCR)
df

.. _breast_cancer_dataset:

Breast cancer Wisconsin (diagnostic) dataset
--------------------------------------------

**Data Set Characteristics:**

:Number of Instances: 569

:Number of Attributes: 30 numeric, predictive attributes and the class

:Attribute Information:
    - radius (mean of distances from center to points on the perimeter)
    - texture (standard deviation of gray-scale values)
    - perimeter
    - area
    - smoothness (local variation in radius lengths)
    - compactness (perimeter^2 / area - 1.0)
    - concavity (severity of concave portions of the contour)
    - concave points (number of concave portions of the contour)
    - symmetry
    - fractal dimension ("coastline approximation" - 1)

    The mean, standard error, and "worst" or largest (mean of the three
    worst/largest values) of these features were computed for each image,
    resulting in 30 features.  For instance, field 0 is Mean Radius, field
    10 is Radius SE, field 20 is Worst Radius.

    - 

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,target
0,17.99,10.38,122.80,1001.0,0.11840,0.27760,0.30010,0.14710,0.2419,0.07871,...,17.33,184.60,2019.0,0.16220,0.66560,0.7119,0.2654,0.4601,0.11890,0
1,20.57,17.77,132.90,1326.0,0.08474,0.07864,0.08690,0.07017,0.1812,0.05667,...,23.41,158.80,1956.0,0.12380,0.18660,0.2416,0.1860,0.2750,0.08902,0
2,19.69,21.25,130.00,1203.0,0.10960,0.15990,0.19740,0.12790,0.2069,0.05999,...,25.53,152.50,1709.0,0.14440,0.42450,0.4504,0.2430,0.3613,0.08758,0
3,11.42,20.38,77.58,386.1,0.14250,0.28390,0.24140,0.10520,0.2597,0.09744,...,26.50,98.87,567.7,0.20980,0.86630,0.6869,0.2575,0.6638,0.17300,0
4,20.29,14.34,135.10,1297.0,0.10030,0.13280,0.19800,0.10430,0.1809,0.05883,...,16.67,152.20,1575.0,0.13740,0.20500,0.4000,0.1625,0.2364,0.07678,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
564,21.56,22.39,142.00,1479.0,0.11100,0.11590,0.24390,0.13890,0.1726,0.05623,...,26.40,166.10,2027.0,0.14100,0.21130,0.4107,0.2216,0.2060,0.07115,0
565,20.13,28.25,131.20,1261.0,0.09780,0.10340,0.14400,0.09791,0.1752,0.05533,...,38.25,155.00,1731.0,0.11660,0.19220,0.3215,0.1628,0.2572,0.06637,0
566,16.60,28.08,108.30,858.1,0.08455,0.10230,0.09251,0.05302,0.1590,0.05648,...,34.12,126.70,1124.0,0.11390,0.30940,0.3403,0.1418,0.2218,0.07820,0
567,20.60,29.33,140.10,1265.0,0.11780,0.27700,0.35140,0.15200,0.2397,0.07016,...,39.42,184.60,1821.0,0.16500,0.86810,0.9387,0.2650,0.4087,0.12400,0


In [30]:
benign_samples = df[df["target"]==1].sample(3, random_state=42)
malignant_samples = df[df["target"]==0].sample(3, random_state=42)

In [31]:
features = ["mean radius", "mean texture", "mean perimeter", "mean area"]

benign_X = benign_samples[features].values
malignant_X = malignant_samples[features].values

In [32]:
B = benign_X
M = malignant_X

results = []

for i, b in enumerate(B):
    for j, m in enumerate(M):
        euclidean_distance = euclidean(b, m)
        manhattan_distance = cityblock(b, m)
        chebyshev_distance = chebyshev(b, m)
        cosine_distance = cosine(b, m)
        VI = np.linalg.inv(np.cov(df[features].values.T))
        Mahalanobis_distance = mahalanobis(b, m, VI)

        results.append({
            "Benign": i+1,
            "Malignant": j+1,
            "Euclidean": euclidean_distance,
            "Manhattan": manhattan_distance,
            "Chebyshev": chebyshev_distance,
            "Cosine": cosine_distance,
            "Mahalanobis": Mahalanobis_distance
        })

dist_df = pd.DataFrame(results)
dist_df


Unnamed: 0,Benign,Malignant,Euclidean,Manhattan,Chebyshev,Cosine,Mahalanobis
0,1,1,554.31695,605.55,552.9,0.000705,2.691264
1,1,2,386.494761,427.24,384.9,0.000273,5.272914
2,1,3,390.717301,426.42,389.8,0.000514,2.018278
3,2,1,874.352553,956.173,871.8,0.005965,3.255041
4,2,2,706.575146,777.863,703.8,0.004594,4.929422
5,2,3,710.742957,777.043,708.7,0.00537,3.694854
6,3,1,560.54278,610.87,559.1,0.000725,2.377794
7,3,2,392.747766,432.56,391.1,0.000301,5.275123
8,3,3,396.938735,431.74,396.0,0.000528,1.705095
