In [1]:
import numpy as np
x = np.random.random(10)
y = np.random.random(10)

In [2]:
x

array([ 0.2027935 ,  0.31394456,  0.1960384 ,  0.27455305,  0.73423524,
        0.49304154,  0.39459916,  0.93357666,  0.25406378,  0.31207493])

In [3]:
y

array([ 0.58752716,  0.41383598,  0.30174012,  0.74574908,  0.57339749,
        0.32131536,  0.47729764,  0.81684854,  0.24995744,  0.47294776])

## 1. 欧式距离

### 按公式计算

In [5]:
d1=np.sqrt(np.sum(np.square(x-y)))
d1

0.70208042137425797

### 使用scipy

In [8]:
from scipy.spatial.distance import euclidean
euclidean(x, y)

0.702080421374258

## 2. 曼哈顿距离
### 按公式计算

In [9]:
d1=np.sum(np.abs(x-y))
d1

1.7584925174357311

In [10]:
from scipy.spatial.distance import cityblock
cityblock(x, y)

1.7584925174357311

## 3. 切比雪夫距离
### 按公式计算

In [11]:
d1=np.max(np.abs(x-y))
d1

0.47119602850106401

In [12]:
from scipy.spatial.distance import chebyshev
chebyshev(x, y)

0.47119602850106401

## 4. 闵可夫斯基距离Minkowski Distance

In [15]:
from scipy.spatial.distance import minkowski
minkowski(x, y, 2)

0.702080421374258

## 5 标准化欧式距离 Standardized Euclidean distance

In [16]:
X=np.vstack([x,y])
sk=np.var(X,axis=0,ddof=1)
d1=np.sqrt(((x - y) ** 2 /sk).sum())
d1

4.4721359549995796

In [18]:
from scipy.spatial.distance import pdist
d2=pdist(X,'seuclidean')
d2

array([ 4.47213595])

In [46]:
from scipy.spatial.distance import seuclidean
z = np.ones_like(x)*0.02459
seuclidean(x, y, z)

4.4772113684686046

## 6. 马氏距离 Mahalanobis Distance

In [47]:
#马氏距离要求样本数要大于维数，否则无法求协方差矩阵
#此处进行转置，表示10个样本，每个样本2维
X=np.vstack([x,y])
XT=X.T

In [48]:
#方法一：根据公式求解
S=np.cov(X)   #两个维度之间协方差矩阵
SI = np.linalg.inv(S) #协方差矩阵的逆矩阵
#马氏距离计算两个样本之间的距离，此处共有10个样本，两两组合，共有45个距离。
n=XT.shape[0]
d1=[]
for i in range(0,n):
    for j in range(i+1,n):
        delta=XT[i]-XT[j]
        d=np.sqrt(np.dot(np.dot(delta,SI),delta.T))
        d1.append(d)

In [51]:
len(d1)

45

In [52]:
#方法二：根据scipy库求解
from scipy.spatial.distance import pdist
d2=pdist(XT,'mahalanobis')

In [54]:
len(d2)

45

## 余弦距离  或余弦相似度

In [56]:
d1=np.dot(x,y)/(np.linalg.norm(x)*np.linalg.norm(y))
d1

0.90699149561206016

In [58]:
#方法二：根据scipy库求解
from scipy.spatial.distance import pdist
X=np.vstack([x,y])
d2=1-pdist(X,'cosine')
d2

array([ 0.9069915])

In [59]:
from scipy.spatial.distance import cosine
d3 = 1 - cosine(x, y)
d3

0.90699149561206038

## 皮尔逊相关系数（Pearson correlation）

In [61]:
x_=x-np.mean(x)
y_=y-np.mean(y)
d1=np.dot(x_,y_)/(np.linalg.norm(x_)*np.linalg.norm(y_))
d1

0.52484179810211873

In [62]:
X=np.vstack([x,y])
d2=np.corrcoef(X)[0][1]
d2

0.52484179810211884

In [64]:
from scipy.spatial.distance import correlation
1 - correlation(x, y)

0.52484179810211873

## 汉明距离 Hamming distance

In [66]:
import numpy as np
from scipy.spatial.distance import pdist
x=np.random.random(10)>0.5
y=np.random.random(10)>0.5

x_=np.asarray(x,np.int32)
y_=np.asarray(y,np.int32)

#方法一：根据公式求解
d1=np.mean(x_!=y_)

#方法二：根据scipy库求解
X=np.vstack([x_,y_])
d2=pdist(X,'hamming')

In [67]:
d1,d2

(0.59999999999999998, array([ 0.6]))

In [68]:
from scipy.spatial.distance import hamming
hamming(x_, y_)

0.59999999999999998

## 10 杰卡德相似系数 Jaccard similarity coefficient

In [69]:
import numpy as np
from scipy.spatial.distance import pdist
x=np.random.random(10)>0.5
y=np.random.random(10)>0.5

x_=np.asarray(x,np.int32)
y_=np.asarray(y,np.int32)

#方法一：根据公式求解
up=np.double(np.bitwise_and((x_ != y_),np.bitwise_or(x_ != 0, y_ != 0)).sum())
down=np.double(np.bitwise_or(x_ != 0, y_ != 0).sum())
d1=(up/down)
           

#方法二：根据scipy库求解
X=np.vstack([x_,y_])
d2=pdist(X,'jaccard')

In [70]:
d1,d2

(0.69999999999999996, array([ 0.7]))

In [71]:
from scipy.spatial.distance import jaccard
jaccard(x_, y_)

0.69999999999999996

## 布雷柯蒂斯距离 Bray Curtis Distance

In [72]:
import numpy as np
from scipy.spatial.distance import pdist
x=np.array([11,0,7,8,0])
y=np.array([24,37,5,18,1])

#方法一：根据公式求解
up=np.sum(np.abs(y-x))
down=np.sum(x)+np.sum(y)
d1=(up/down)
           
#方法二：根据scipy库求解
X=np.vstack([x,y])
d2=pdist(X,'braycurtis')

In [74]:
d1,d2

(0.56756756756756754, array([ 0.56756757]))

In [75]:
from scipy.spatial.distance import braycurtis
braycurtis(x,y)

0.56756756756756754

## 12. 编辑距离 Levenshtein Distance

In [78]:
import Levenshtein
x = 'eeba'
y = 'abac'
Levenshtein.distance(x,y)

3