## 矩阵欧式距离

In [1]:
import numpy as np

In [2]:
def euclidean_distances(x,y,squared=True):
    """
    计算成对（平方）的欧几里得距离。
    x: m × k; 
    y: n × k
    """
    assert isinstance(x, np.ndarray) and x.ndim == 2  # 断言是否是数组且x维度=2
    assert isinstance(y, np.ndarray) and y.ndim == 2  # 断言是否是数组且y维度=2
    assert x.shape[1] == y.shape[1]  # 维度1,向量个数是否相同(同K)
    # 求每行向量平方和,且保持维度。 dim=(m,1)
    x_square = np.sum(x*x, axis=1, keepdims=True)  
    if x is y:
        y_square = x_square.T  # 当x=y时,y中向量的平方和等于x平方和的转置
    else:
        # 求每行向量平方和,且保持维度,并转置。 dim=(1,n)
        y_square = np.sum(y*y,axis=1,keepdims=True).T  
    distances = np.dot(x,y.T)  # x @ y^T
    distances *= -2  # -2 × (x @ y^T)。 dim=(m,n)
    distances += x_square  # 广播机制相加
    distances += y_square
    # 由于浮点舍入错误，结果可能小于0。
    np.maximum(distances,0,distances)
    if x is y:
        # 确保向量与自身的距离为0
        # This may not be the case due to floating point rounding errors.
        distances.flat[::distances.shape[0] + 1] = 0.0
    if not squared:
        distances=np.sqrt(distances)  # 如果不是平方和距离则开根号
    return distances





In [3]:
x = np.array([[11,12,4,6,8],[2,3,7,5,9],[14,8,9,11,10]])
y = np.array([[12,10,8,9,5],[5,4,7,11,10],[13,15,14,7,9],[18,5,9,5,7]])
print(x.shape,y.shape)

(3, 5) (4, 5)


In [4]:
euclidean_distances(x,y,True)

array([[ 39, 138, 115, 125],
       [182,  47, 318, 268],
       [ 38, 101,  92,  70]])

In [5]:
euclidean_distances(x,y,False)

array([[ 6.244998  , 11.74734012, 10.72380529, 11.18033989],
       [13.49073756,  6.8556546 , 17.8325545 , 16.37070554],
       [ 6.164414  , 10.04987562,  9.59166305,  8.36660027]])

## 余弦相似度与余弦距离

In [6]:
def cosine_Similarity(a, b):
    """
    计算余弦相似度。
    a: array_like
        N×M矩阵
    b: array_like
        L×M矩阵
    """
    # 先转numpy后进行归一化(L2归一)
    a = np.asarray(a) / np.linalg.norm(a,axis=1,keepdims=True)
    b = np.asarray(b) / np.linalg.norm(b,axis=1,keepdims=True)

    return np.dot(a, b.T)  # 余弦相似度
    

In [24]:
a = np.array([[2,1,1,0,1],[1,0,2,1,4]])
b = np.array([[2,0,1,0,1],[1,0,2,1,4]])
cosine_Similarity(a,b)  # 余弦相似度,越相似值越接近1

array([[0.9258201 , 0.64465837],
       [0.69631062, 1.        ]])

In [9]:
def cosine_distance(a, b):
    """
    计算余弦距离。
    a: array_like
        N×M矩阵
    b: array_like
        L×M矩阵
    """
    return 1 - cosine_Similarity(a, b)  # 余弦距离,即(1-余弦相似度)

In [10]:
cosine_distance(a, b)

array([[1.27128439e-01, 7.41799002e-02, 3.55341629e-01],
       [1.38359563e-01, 3.03689376e-01, 1.11022302e-16]])

In [11]:
np.around(cosine_distance(a, b),decimals=6)  # 保留6位来观察

array([[0.127128, 0.07418 , 0.355342],
       [0.13836 , 0.303689, 0.      ]])

## torch中距离公式

`TripletMarginWithDistanceLoss(distance_function)`  
Math:
$$
\ell(a, p, n) = L = \{l_1,\dots,l_N\}^\top, \quad
        l_i = \max \{d(a_i, p_i) - d(a_i, n_i) + {\rm margin}, 0\} 
$$

$$
\ell(x, y) =
    \begin{cases}
        \operatorname{mean}(L), &  \text{if reduction} = \text{`mean';}\\
        \operatorname{sum}(L),  &  \text{if reduction} = \text{`sum'.}
    \end{cases}
$$