# Statistical Distances with `Python` and `R`

## Index

* [Statistical Distances ](#1)
* * [Distance Definition](#2)
* * [Distance Matrix](#3)

* [ Distances with quantitative variables](#1)
* * [Euclidean Distance](#2)
* * * [Disadvantages](#3)
* * * [Euclidean Distance in `R`](#4)
* * * [Euclidean Distance in `Python`](#5)

## Data-set in  `Python` <a class="anchor" id="1"></a>


In [1]:
import numpy as np

In [2]:
np.random.seed(123)

# Quantitative

X1 = np.random.normal(loc=10, scale=15, size=50)
X2 = np.random.normal(loc=10, scale=15, size=50)
X3 = np.random.normal(loc=10, scale=15, size=50)
X4 = np.random.normal(loc=10, scale=15, size=50)

# Binary Categorical / Dummies ( categories: 0,1)

X5 = np.random.uniform(low=0.0, high=1.0, size=50).round()
X6 = np.random.uniform(low=0.0, high=1.0, size=50).round() 
X7 = np.random.uniform(low=0.0, high=1.0, size=50).round() 


# Multiple categorical

X8 = np.random.uniform(low=0, high=4, size=50).round()   # categories: 0,1,2,3,4
X9 = np.random.uniform(low=0, high=3, size=50).round()   # categories: 0,1,2,3
X10 = np.random.uniform(low=0, high=5, size=50).round()  # categories: 0,1,2,3,4,5

In [3]:
import pandas as pd

In [4]:
Data_set_Python = pd.DataFrame({'X1': X1 , 'X2': X2, 'X3': X3 , 'X4': X4 , 'X5': X5 , 
                         'X6': X6 , 'X7': X7 , 'X8': X8 , 'X9': X9 , 'X10': X10 })

## Data-set in  `R` <a class="anchor" id="1"></a>


In [5]:
import rpy2

%load_ext rpy2.ipython

import rpy2.robjects as robjects



In [6]:
%%R

# Quantitative

X1 <- c(-6.28445905,  24.9601817 ,  14.24467747, -12.59442071,
         1.32099622,  34.77154806, -26.40018865,   3.56631057,
        28.98904388,  -3.00110603,  -0.18329227,   8.57936547,
        32.37084439,   0.41647005,   3.34027061,   3.48473087,
        43.08895124,  42.80179133,  25.06080847,  15.79279599,
        21.06052864,  32.36098042,  -4.03750803,  27.63743567,
        -8.80821002,   0.43372746,  23.60657794, -11.4302105 ,
         7.8989692 ,  -2.92632344,   6.16570944, -31.97883658,
       -16.57299657,  -0.49815852,  23.91193648,   7.39546476,
        10.04268874,  20.32334067,  -3.19304515,  14.25440986,
        -2.08049777, -15.91504241,   4.13650309,  18.60708794,
        15.07883576,   9.82254258,  45.88547899,  16.1936824 ,
        24.68104009,  43.57215008)

X2 <- c(-9.41127985,  -5.58182315,  36.15568338,  -1.97094103,
        10.44524845,  26.03973954,  23.36059587,  36.32329273,
        32.43466206,  26.04089005,  -1.59063071,  21.92294002,
        14.71407992,  -9.8939819 ,  31.2594857 ,  22.10854802,
        10.68235121,   6.50361909,  -7.97451717,  12.9928611 ,
        17.02658679,  -2.46732476,  27.43306074,  -6.4580457 ,
       -21.84650525,  25.59590636,   3.94950943,   8.10955622,
        -2.56275084, -14.08944141,  28.82856062,  -0.33303476,
        34.91428732,  22.10962279,   5.2786278 ,  -6.28853602,
        -0.9869298 ,  -8.18784697,  41.30670039,  12.46661845,
        27.25308314,  -9.01028074,  12.71552694,  27.66792908,
         4.97483857,  25.46671688,  -6.26851868, -10.45207317,
        15.69100918,   4.31235348)

X3 <- c(19.63082034, -19.66831897,  20.68396953,  48.97455891,
         9.63061028,  10.51213193,  12.69324227, -17.92963566,
        16.3921996 , -14.08114616,   3.58480603,  28.64304324,
        -1.02825434,  17.51873484,  25.19108581,  14.18111284,
       -10.56422705,   5.01287087,  39.39117014, -20.37568644,
         5.86320979,   1.71837893,  11.81121045,  21.22323426,
        34.13036452,   5.94651412,  22.18511995,  17.49610217,
        17.11520947,   1.54114103,  -4.95982203,  -6.50064669,
        -1.34655814,  14.82529864,  21.4142409 ,  14.85203272,
         1.76567356,  37.08955165,  32.78298435,   4.68999831,
        -2.35147109,  11.95322431,  29.00947968,  14.99147466,
        18.34823057,   6.81879817,  16.84406343,  33.16816677,
         6.40496828,  12.14961599)

X4 <- c(13.80724715,  14.25588034, -11.17833314, -18.15302984,
        -5.29482606,  12.51913443,  18.30784249,   2.0398816 ,
        30.65886224,   7.85236039,  10.30473997,   7.09054194,
        12.01040189,  20.56711111,  19.98480157,  -3.47634411,
        32.85495665,  -6.42539686,  11.18840521,   5.8840514 ,
        -5.73487516,   8.87319118,  -1.1122066 ,  11.09360865,
        16.04628942,  32.07894053,  14.61076328,   0.8316199 ,
         4.12570284,  12.09967159,  11.40191244,  31.89383902,
        30.93029395,   4.61596111,   1.77036808, -28.35581906,
         1.7661938 ,  -4.67086559,   4.67763313,  15.87376364,
        12.65788494,   9.55047989,  12.99373167,   8.1082334 ,
        12.95528399, -38.46582512,   5.96059765,   8.33723918,
         4.88107426,   6.73080607)



# Binary Categorical / Dummies ( categories: 0,1)

X5 <- c(0., 1., 1., 0., 0., 1., 1., 1., 0., 0., 1., 0., 0., 0., 1., 0., 1.,
       1., 0., 0., 1., 0., 0., 1., 1., 1., 0., 0., 1., 0., 1., 0., 0., 0.,
       0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 1., 0., 1., 0., 1., 0.)

X6 <- c(0., 1., 0., 1., 1., 1., 1., 1., 0., 1., 0., 1., 0., 1., 1., 1., 0.,
       1., 0., 1., 0., 0., 0., 1., 0., 1., 1., 0., 1., 1., 0., 0., 0., 1.,
       1., 1., 1., 1., 0., 0., 1., 0., 0., 0., 0., 1., 0., 1., 1., 1.)    

X7 <- c(1., 1., 0., 0., 0., 1., 0., 0., 1., 1., 0., 1., 0., 0., 0., 0., 1.,
       1., 1., 1., 0., 0., 0., 0., 0., 0., 1., 0., 1., 0., 1., 0., 0., 1.,
       1., 0., 1., 1., 1., 1., 0., 1., 1., 0., 1., 0., 1., 0., 0., 1.) 



# Multiple categorical

X8 <- c(3., 3., 3., 1., 2., 3., 3., 0., 0., 1., 4., 2., 1., 1., 1., 2., 1.,
       3., 2., 1., 4., 1., 4., 0., 3., 2., 2., 0., 0., 1., 4., 2., 1., 2.,
       1., 1., 1., 3., 2., 1., 4., 3., 1., 4., 4., 2., 3., 2., 1., 0.) 

X9 <- c(1., 1., 1., 1., 3., 0., 2., 1., 1., 1., 3., 1., 0., 3., 3., 1., 2.,
       0., 0., 3., 1., 2., 3., 1., 0., 0., 2., 3., 1., 1., 2., 3., 2., 1.,
       2., 0., 0., 2., 1., 2., 1., 1., 3., 0., 2., 0., 2., 1., 0., 2.)       
        
X10 <- c(4., 3., 0., 1., 2., 1., 4., 1., 3., 5., 5., 1., 0., 5., 4., 5., 2.,
       4., 1., 2., 4., 1., 4., 2., 2., 2., 5., 3., 4., 4., 5., 2., 2., 3.,
       2., 5., 2., 2., 3., 2., 5., 0., 1., 2., 1., 1., 4., 3., 0., 5.)        

In [7]:
%%R

Data_set_R <- cbind(X1,X2,X3,X4,X5,X6,X7,X8,X9,X10)

## Statistical Distances <a class="anchor" id="1"></a>



The concept of distance between elements of a set $\varepsilon$ allows us to interpret geometrically many classical techniques of multivariate analysis.

This interpretation is possible both with quantitative and categorical variables, or even when no variables are available, as long as it makes sense to obtain a measure of proximity between the elements of $\varepsilon$



###  Distance Definition <a class="anchor" id="1"></a>



Given a set of elements $\Omega$

#### Almost-metric <a class="anchor" id="1"></a>


It is called **quasi-metric** or **dissimilarity** to any mapping $\delta : \Omega \hspace{0.05cm}x\hspace{0.05cm} \Omega \rightarrow \mathbb{R}$ that satisfies the following properties:



1) $\hspace{0.15cm}\delta (i,j) \geq 0 \hspace{0.25cm}, \forall i,j \in \Omega$

2) $\hspace{0.15cm}\delta (i,i) = 0 \hspace{0.25cm}, \forall i \in  \Omega$

3) $\hspace{0.15cm}\delta (i,j) = \delta (j, i) \hspace{0.25cm}, \forall i,j \in \Omega $



#### Semi-metric <a class="anchor" id="1"></a>


It is called **semi-metric** to any dissimilarity (quasi-metric)  that satisfies the triangular inequality:



4) $\hspace{0.15cm} \delta (i,j) \hspace{0.1 cm}\leq \hspace{0.1 cm} \delta (i,k) + \delta (k,j) \hspace{0.25cm}, \forall i,j,k \in \Omega$



#### Metric <a class="anchor" id="1"></a>


It is called a **metric** to any semi-metric that satisfies:

5) $\hspace{0.15cm} \delta (i,j)=0 \hspace{0.15cm}\Leftrightarrow\hspace{0.15cm} i=j$




#### Distance <a class="anchor" id="1"></a>

A **distance** is a metric or semi-metric
 

### Distance Matrix <a class="anchor" id="1"></a>



When $\varepsilon$ is a finite set, we will have a distance matrix:



$$
D= \begin{pmatrix}
0 & \delta_{12}&...&\delta_{1n}\\
\delta_{21} & 0&...&\delta_{2n}\\
...&...&...&...\\
\delta_{n1}& \delta_{n2}&...& 0\\
\end{pmatrix}
$$
con $\delta_{ij}=\delta_{ji}$



We will also use the matrix of squares of distances:



$$
D^{(2)}= 
\begin{pmatrix}
0 & \delta^2_{12}&...&\delta^2_{1n}\\
\delta^2_{21} & 0&...&\delta^2_{2n}\\
...&...&...&...\\
\delta^2_{n1}& \delta^2_{n2}&...& 0\\
\end{pmatrix}
$$





No debe confundirse con  $D^2=D\cdot D$



## Distances with quantitative variables <a class="anchor" id="1"></a>

Let $X_1,...,X_p$ be quantitative variables,

Let $\hspace{0.1cm} x_i=(x_{i1},...,x_{ip})^t \hspace{0.1cm}$ and $\hspace{0.1cm} x_j=(x_{j1},...,x_{jp})^t \hspace{0.1cm}$ the values ​​(observations) of the variables $X_1,...,X_p$ for the elements or individuals $i$ and $j$ of the sample $\Omega$.


## Euclidean Distance <a class="anchor" id="1"></a>


 
The Euclidean distance between the elements / individuals $i$ and $j$ of $\Omega$ with respect to the quantitative variables $X_1,...,X_p$ is defined as:



 $$
\delta^2(i,j)_{Euclidea} = \sum_{k=1}^{p} (x_{ik} - x_{jk})\hspace{0.05cm}^2 = (x_i - x_j)\hspace{0.05cm}^t\cdot (x_i - x_j) = sum \left( \hspace{0.05cm} (x_i - x_j)^2 \hspace{0.05cm} \right)
$$



$$
\delta(i,j)_{Euclidea} =\sqrt{\sum_{k=1}^{p} (x_{ik} - x_{jk})\hspace{0.05cm}^2  }  = \sqrt{(x_i - x_j)\hspace{0.05cm}^t\cdot (x_i - x_j)} = \sqrt{ sum \left( \hspace{0.05cm} (x_i - x_j)^2  \hspace{0.05cm} \right) }
 $$

 


Where $\hspace{0.05cm} sum \left( \hspace{0.05cm} (x_i - x_j)^2 \hspace{0.05cm} \right) \hspace{0.05cm}$ is a vectorial operation.

### Disadvantages <a class="anchor" id="1"></a>


 
Although it is one of the most popular distances, it is not suitable in many cases for the following reasons:

1) It assumes that the variables are uncorrelated and with unit variance (although this last problem can be solved by standardizing the variables to unit variance by dividing them by their respective standard deviations).

2) It is not invariant against changes in scale (changes in measurement units) of the variables.


 
Let's see what this means in more detail:

If a change of scale is applied to the variables $a\cdot X_j + b$, with $a\neq 1$ and $b\neq 0$

Now the observations for elements $i$ and $j$ are $a\cdot x_i + b$ and $a\cdot x_j + b$

Then the Euclidean distance between the elements $i$ and $j$ with respect to the scaled variables $a\cdot X_j + b$ is:

$$
\delta^2(i,j)_{Euclidea} = a^2 \cdot (x_i - x_j)^t\cdot (x_i - x_j)
$$


### Euclidean Distance in `R` <a class="anchor" id="1"></a>

In [8]:
%%R 

Dist_Euclidea_R <- function(i,j, Quantitative_Data_set){

Quantitative_Data_set=as.matrix(Quantitative_Data_set)  
  
Dist_Euclidea = sum( (Quantitative_Data_set[i,] - Quantitative_Data_set[j,])^2 )

Dist_Euclidea<-sqrt(Dist_Euclidea)

return(Dist_Euclidea)

}

In [9]:
%%R

library(tidyverse)

Data_set_R <- as.data.frame(Data_set_R)

Quantitative_Data_R <- Data_set_R %>% select(1:4)

R[write to console]: -- Attaching packages --------------------------------------- tidyverse 1.3.1 --

R[write to console]: v ggplot2 3.3.6     v purrr   0.3.4
v tibble  3.1.7     v dplyr   1.0.9
v tidyr   1.2.0     v stringr 1.4.0
v readr   2.1.2     v forcats 0.5.1

R[write to console]: -- Conflicts ------------------------------------------ tidyverse_conflicts() --
x dplyr::filter() masks stats::filter()
x dplyr::lag()    masks stats::lag()



In [10]:
%%R 

Dist_Euclidea_R(1,2, Quantitative_Data_R)

[1] 50.35391


### Euclidean Distance in `Python` <a class="anchor" id="1"></a>


In [11]:
def Dist_Euclidea_Python(i, j, Quantitative_Data_set):

    Dist_Euclidea = ( ( Quantitative_Data_set.iloc[i-1, ] - Quantitative_Data_set.iloc[j-1, ] )**2 ).sum()

    Dist_Euclidea = np.sqrt(Dist_Euclidea)

    return Dist_Euclidea

In [12]:
Quantitative_Data_Python = Data_set_Python.iloc[ : , [0,1,2,3] ] 

In [13]:
Dist_Euclidea_Python(1, 2, Quantitative_Data_Python)

50.35390686386084

### Euclidean Distances Matrix in `R` <a class="anchor" id="1"></a>

In [14]:
%%R

Dist_Euclidea_Matrix_R <- function( Quantitative_Data_set ){
  
  Quantitative_Data_set=as.matrix(Quantitative_Data_set)
  
  M <- matrix(NA, ncol =dim(Quantitative_Data_set)[1] , nrow=dim(Quantitative_Data_set)[1] )
  
  for(i in 1:dim(Quantitative_Data_set)[1] ){
    for(j in 1:dim(Quantitative_Data_set)[1]){
    
      M[i,j]=Dist_Euclidea_R(i,j, Quantitative_Data_set)
  
   }
  }
  return(M)
}

In [15]:
%%R

Dist_Euclidea_Matrix_R(Quantitative_Data_R)[1:10,1:10]

          [,1]     [,2]     [,3]     [,4]     [,5]     [,6]     [,7]     [,8]
 [1,]  0.00000 50.35391 55.88544 44.47121 30.28231 55.01982 39.33222 61.13876
 [2,] 50.35391  0.00000 64.28101 84.76773 45.34684 44.83372 67.37388 48.64144
 [3,] 55.88544 64.28101  0.00000 54.98164 31.38217 34.47783 52.43096 42.18753
 [4,] 44.47121 84.76773 54.98164  0.00000 45.39912 73.81242 58.97470 81.31139
 [5,] 30.28231 45.34684 31.38217 45.39912  0.00000 40.99075 38.75224 38.57564
 [6,] 55.01982 44.83372 34.47783 73.81242 40.99075  0.00000 61.54206 44.70198
 [7,] 39.33222 67.37388 52.43096 58.97470 38.75224 61.54206  0.00000 47.62804
 [8,] 61.13876 48.64144 42.18753 81.31139 38.57564 44.70198 47.62804  0.00000
 [9,] 57.35650 55.05364 44.72150 79.73168 50.86690 20.92743 57.58937 51.56037
[10,] 49.39225 43.05869 44.39039 74.35722 31.57531 45.31425 37.16027 14.05185
          [,9]    [,10]
 [1,] 57.35650 49.39225
 [2,] 55.05364 43.05869
 [3,] 44.72150 44.39039
 [4,] 79.73168 74.35722
 [5,] 50.86690 31.5753

In [16]:
%%R

Dist_Euclidea_Matrix_R(Quantitative_Data_R)[1,2]

[1] 50.35391


In [17]:
%%R

Dist_Euclidea_Matrix_R(Quantitative_Data_R)[5,3]

[1] 31.38217


### Euclidean Distances Matrix in `Python` <a class="anchor" id="1"></a>

In [18]:
def Dist_Euclidea_Matrix_Python( Quantitative_Data_set ):

    M = np.zeros((Quantitative_Data_set.shape[0] , Quantitative_Data_set.shape[0]))

    for i in range(0 , Quantitative_Data_set.shape[0]):
        for j in range(0 , Quantitative_Data_set.shape[0]):

            M[i,j]=Dist_Euclidea_Python(i+1,j+1, Quantitative_Data_set)
                 
    return M

In [19]:
np.set_printoptions(threshold=np.inf)

In [20]:
Dist_Euclidea_Matrix_Python(Quantitative_Data_Python)

array([[ 0.        , 50.35390686, 55.88543575, 44.47121042, 30.28230619,
        55.0198165 , 39.33222296, 61.13875643, 57.35650319, 49.39224756,
        19.18668603, 36.45683618, 50.06287534,  9.76176818, 42.61250194,
        37.64782144, 64.1564009 , 57.32179616, 37.17418244, 51.50386721,
        44.92383319, 43.43833526, 40.57450703, 34.19526501, 19.39727407,
        42.32909321, 32.85049264, 22.50294156, 18.65831141, 19.06081274,
        47.19933709, 41.86399342, 52.95147825, 33.684141  , 35.71670108,
        44.692872  , 28.3142343 , 36.82009972, 53.27475603, 33.58550615,
        42.97081537, 13.03742629, 26.20711906, 45.25990645, 25.80158224,
        66.12512498, 52.92367433, 26.82407183, 42.9370094 , 52.72631243],
       [50.35390686,  0.        , 64.28100719, 84.76773428, 45.34683551,
        44.83371544, 67.37388444, 48.6414443 , 55.05364378, 43.05869447,
        34.70508944, 58.39668596, 28.6240421 , 45.20726822, 62.20843455,
        51.84754056, 31.96851235, 38.74606183, 59.

OJO con la salida de Python:

Cada array representa una fila, y cada elemento dentro del aaray representa una columan. Por tanto el elemento 3 del array 5 es el elemento de la fila 5 y columna 3 de la matriz de distancias.

In [21]:
Dist_Euclidea_Matrix_Python(Quantitative_Data_Python)[1,2]

64.28100718719624

In [22]:
Dist_Euclidea_Matrix_Python(Quantitative_Data_Python)[5,3]

73.81241882537256

## Minkowski Distance <a class="anchor" id="1"></a>



The Minkowski distance with parameter $\hspace{0.1cm} q=1,2,3,... \hspace{0.1cm}$ between the individuals $i$ and $j$ with respect to the quantitative variables $X_1,. ..,X_k$ is:


$$
\delta_q(i,j)_{Minkowski } = \left( \sum_{k=1}^{p}  \mid x_{ik} - x_{jk} \mid  ^q  \right)^{(1/q)} =  sum \left( \hspace{0.1cm} \mid x_i - x_j \mid  ^q \hspace{0.1cm}\right)^{(1/q)}    
$$

### Disadvantages <a class="anchor" id="1"></a>


1) **Assumes** that the **variables** are **uncorrelated** and with **unit variance**.

2) It is **not invariant** against changes in scale (changes in measurement units) of the variables.

3) It is **hardly Euclideanizable** (we will see later what this means).


### Particular cases of the Minkowski distance <a class="anchor" id="1"></a>



**Euclidean Distance:**



\begin{gather*}
 \delta_2(i,j)_{Minkowski }=\delta (i,j)_{Euclidea }   \hspace{1cm} (q=2)
 \end{gather*}
 


 **Manhattan Distance:**  




\begin{gather*}
 \delta_1(i,j)_{Minkowski }= \sum_{k=1}^{p}  \mid x_{ik} - x_{jk} \mid \hspace{1cm} (q=1)
 \end{gather*}



 **Dominant Distance:** 




\begin{gather*}
 \delta_{\infty}(i,j)_{Minkowski }= max \lbrace  \hspace{0.1cm} \mid x_{i1} - x_{j1} \mid \hspace{0.1cm},...,\hspace{0.1cm} \mid x_{ip} - x_{jp} \mid \hspace{0.1cm}  \rbrace \hspace{1cm} (q\rightarrow \infty)
 \end{gather*}


### Minkowski Distance in `R` <a class="anchor" id="1"></a>

In [23]:
%%R

Dist_Minkowski_R <- function(i,j, q , Quantitative_Data_set){
  
Quantitative_Data_set=as.matrix(Quantitative_Data_set)  

Dist_Minkowski = ( sum( ( abs(Quantitative_Data_set[i,] - Quantitative_Data_set[j,]) )^q ) )^(1/q)
  
return(Dist_Minkowski)
}

Particular cases:

**Euclidean Distance** $\hspace{0.1cm} (q=2)$

In [28]:
%%R

Dist_Minkowski_R(1,2, q=2, Quantitative_Data_R)

[1] 50.35391


  **Manhattan Distance** $\hspace{0.1cm} (q=1)$

In [29]:
%%R

Dist_Minkowski_R(1,2, q=1, Quantitative_Data_R)

[1] 74.82187


### Minkowski Distance in `Python` <a class="anchor" id="1"></a>

In [35]:
def Dist_Minkowski_Python(i,j, q , Quantitative_Data_set):

    Dist_Minkowski = ( ( ( ( Quantitative_Data_set.iloc[i-1, ] - Quantitative_Data_set.iloc[j-1, ] ).abs() )**q ).sum() )**(1/q)

    return Dist_Minkowski

Particular cases:

**Euclidean Distance** $\hspace{0.1cm} (q=2)$

In [52]:
Dist_Minkowski_Python(1,2, 2 , Quantitative_Data_Python)

50.35390686386084

  **Manhattan Distance** $\hspace{0.1cm} (q=1)$

In [37]:
Dist_Minkowski_Python(1,2, 1 , Quantitative_Data_Python)

74.821869942812

### Minkowski Distances Matrix in `R` <a class="anchor" id="1"></a>

In [41]:
%%R

Dist_Minkowski_Matrix_R <- function(q , Quantitative_Data_set ){
  
  Quantitative_Data_set=as.matrix(Quantitative_Data_set)
  
  M<-matrix(NA, ncol =dim(Quantitative_Data_set)[1] , nrow=dim(Quantitative_Data_set)[1] )
  
  for(i in 1:dim(Quantitative_Data_set)[1] ){
    for(j in 1:dim(Quantitative_Data_set)[1]){
    
  M[i,j]=Dist_Minkowski_R(i,j, q , Quantitative_Data_set)
  
   }
  }
 return(M)
}

In [43]:
%%R

Dist_Minkowski_Matrix_R(q=2 , Quantitative_Data_R)[1:10,1:10]

          [,1]     [,2]     [,3]     [,4]     [,5]     [,6]     [,7]     [,8]
 [1,]  0.00000 50.35391 55.88544 44.47121 30.28231 55.01982 39.33222 61.13876
 [2,] 50.35391  0.00000 64.28101 84.76773 45.34684 44.83372 67.37388 48.64144
 [3,] 55.88544 64.28101  0.00000 54.98164 31.38217 34.47783 52.43096 42.18753
 [4,] 44.47121 84.76773 54.98164  0.00000 45.39912 73.81242 58.97470 81.31139
 [5,] 30.28231 45.34684 31.38217 45.39912  0.00000 40.99075 38.75224 38.57564
 [6,] 55.01982 44.83372 34.47783 73.81242 40.99075  0.00000 61.54206 44.70198
 [7,] 39.33222 67.37388 52.43096 58.97470 38.75224 61.54206  0.00000 47.62804
 [8,] 61.13876 48.64144 42.18753 81.31139 38.57564 44.70198 47.62804  0.00000
 [9,] 57.35650 55.05364 44.72150 79.73168 50.86690 20.92743 57.58937 51.56037
[10,] 49.39225 43.05869 44.39039 74.35722 31.57531 45.31425 37.16027 14.05185
          [,9]    [,10]
 [1,] 57.35650 49.39225
 [2,] 55.05364 43.05869
 [3,] 44.72150 44.39039
 [4,] 79.73168 74.35722
 [5,] 50.86690 31.5753

### Minkowski Distances Matrix in `Python` <a class="anchor" id="1"></a>

In [56]:
def Dist_Minkowski_Matrix_Python(q , Quantitative_Data_set):

    M = np.zeros((Quantitative_Data_set.shape[0] , Quantitative_Data_set.shape[0]))

    for i in range(0 , Quantitative_Data_set.shape[0]):
        for j in range(0 , Quantitative_Data_set.shape[0]):

            M[i,j] = Dist_Minkowski_Python(i+1,j+1, q, Quantitative_Data_set)
                 
    return M

In [57]:
Dist_Minkowski_Matrix_Python(2 , Quantitative_Data_Python)

array([[ 0.        , 50.35390686, 55.88543575, 44.47121042, 30.28230619,
        55.0198165 , 39.33222296, 61.13875643, 57.35650319, 49.39224756,
        19.18668603, 36.45683618, 50.06287534,  9.76176818, 42.61250194,
        37.64782144, 64.1564009 , 57.32179616, 37.17418244, 51.50386721,
        44.92383319, 43.43833526, 40.57450703, 34.19526501, 19.39727407,
        42.32909321, 32.85049264, 22.50294156, 18.65831141, 19.06081274,
        47.19933709, 41.86399342, 52.95147825, 33.684141  , 35.71670108,
        44.692872  , 28.3142343 , 36.82009972, 53.27475603, 33.58550615,
        42.97081537, 13.03742629, 26.20711906, 45.25990645, 25.80158224,
        66.12512498, 52.92367433, 26.82407183, 42.9370094 , 52.72631243],
       [50.35390686,  0.        , 64.28100719, 84.76773428, 45.34683551,
        44.83371544, 67.37388444, 48.6414443 , 55.05364378, 43.05869447,
        34.70508944, 58.39668596, 28.6240421 , 45.20726822, 62.20843455,
        51.84754056, 31.96851235, 38.74606183, 59.

### Dominant Distance in `R` <a class="anchor" id="1"></a>

In [None]:
Dist_Dominante <- function(i,j,   Matriz_Datos_Cuantitativos){
  
Matriz_Datos_Cuantitativos=as.matrix(Matriz_Datos_Cuantitativos)  

Dist_Dominante =  max( abs(Matriz_Datos_Cuantitativos[i,] - Matriz_Datos_Cuantitativos[j,]) )
  
return(Dist_Dominante)
}

In [None]:
Dist_Dominante(1,2, Datos_Cuantitativos)

### Dominant Distance in `Python` <a class="anchor" id="1"></a>

Programamos la matriz de distancias dominantes:

Matriz_Dist_Dominante <- function( Matriz_Datos_Cuantitativos ){
  
  Matriz_Datos_Cuantitativos=as.matrix(Matriz_Datos_Cuantitativos)
  
  M<-matrix(NA, ncol =dim(Matriz_Datos_Cuantitativos)[1] , nrow=dim(Matriz_Datos_Cuantitativos)[1] )
  
  for(i in 1:dim(Matriz_Datos_Cuantitativos)[1] ){
    for(j in 1:dim(Matriz_Datos_Cuantitativos)[1]){
    
  M[i,j]=Dist_Dominante(i,j,  Matriz_Datos_Cuantitativos)
  
   }
  }
 return(M)
}

Matriz_Dist_Dominante(Datos_Cuantitativos)

## Bibliography <a class="anchor" id="1"></a>

https://numpy.org/doc/stable/reference/random/legacy.html