## Eval Cosine Similairy

参考链接：https://github.com/xiangking/PyTorch_CoSENT/blob/main/CoSENT_ATEC.ipynb

In [1]:
import torch
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

a1 = np.random.random((5, 10))
b1 = np.random.random((5, 10))

a2 = torch.from_numpy(a1)
b2 = torch.from_numpy(b1)

#### 定义正则化的函数

In [9]:
def l2_normalize(x):
    # 这一步相当于求： ||x||
    norm = (x ** 2).sum(axis=1, keepdims=True) ** 0.5
    # return x / norm
    return x / np.clip(norm, 1e-8, np.inf) # np.clip(a, a_min, a_max)

### numpy

In [18]:
cosine_similarity(a1, b1)

array([[0.72170833, 0.66135354, 0.65483169, 0.7802302 , 0.72604197],
       [0.69296034, 0.83372952, 0.63956145, 0.88773847, 0.82563329],
       [0.74454191, 0.78951296, 0.7076026 , 0.78417534, 0.78279068],
       [0.77675723, 0.72564313, 0.82322539, 0.70335139, 0.81429267],
       [0.74744944, 0.81791773, 0.71088846, 0.68847126, 0.80378599]])

In [21]:
aa1 = l2_normalize(a1)
bb1 = l2_normalize(b1)
np.inner(aa1, bb1)

array([[0.72170833, 0.66135354, 0.65483169, 0.7802302 , 0.72604197],
       [0.69296034, 0.83372952, 0.63956145, 0.88773847, 0.82563329],
       [0.74454191, 0.78951296, 0.7076026 , 0.78417534, 0.78279068],
       [0.77675723, 0.72564313, 0.82322539, 0.70335139, 0.81429267],
       [0.74744944, 0.81791773, 0.71088846, 0.68847126, 0.80378599]])

### Pytorch

In [24]:
import torch.nn.functional as F

F.cosine_similarity(a2, b2)

tensor([0.7217, 0.8337, 0.7076, 0.7034, 0.8038], dtype=torch.float64)

In [40]:
λ = 20

aa2 = F.normalize(a2, p=2, dim=1, eps=1e-8)
bb2 = F.normalize(b2, p=2, dim=1, eps=1e-8)

cosine  = torch.sum(aa2 * bb2, dim=1)
cosine_scale = λ * cosine
print(cosine_scale.shape)
cosine_scale

torch.Size([5])


tensor([14.4342, 16.6746, 14.1521, 14.0670, 16.0757], dtype=torch.float64)

In [42]:
cosine_scale[:, None]

tensor([[14.4342],
        [16.6746],
        [14.1521],
        [14.0670],
        [16.0757]], dtype=torch.float64)

In [43]:
cosine_scale[None, :]

tensor([[14.4342, 16.6746, 14.1521, 14.0670, 16.0757]], dtype=torch.float64)

In [44]:
cosine_scale[:, None] - cosine_scale[None, :]

tensor([[ 0.0000, -2.2404,  0.2821,  0.3671, -1.6416],
        [ 2.2404,  0.0000,  2.5225,  2.6076,  0.5989],
        [-0.2821, -2.5225,  0.0000,  0.0850, -1.9237],
        [-0.3671, -2.6076, -0.0850,  0.0000, -2.0087],
        [ 1.6416, -0.5989,  1.9237,  2.0087,  0.0000]], dtype=torch.float64)

In [49]:
label = np.array([1, 0, 1, 0, 1], dtype=np.long)
print('label', label)
print('label[:, None]', label[:, None])
print('labels[None, :]', label[None, :])

label [1 0 1 0 1]
label[:, None] [[1]
 [0]
 [1]
 [0]
 [1]]
labels[None, :] [[1 0 1 0 1]]


In [70]:
labels = torch.from_numpy(label[:, None] < label[None, :])
labels = labels.long()

cos_sim = cosine_scale -  (1 - labels) * 1e12

# 还要加上一个 1
cosine_sim  = torch.cat((torch.zeros(1),cos_sim.view(-1)), dim= 0)
cosine_sim

tensor([ 0.0000e+00, -1.0000e+12, -1.0000e+12, -1.0000e+12, -1.0000e+12,
        -1.0000e+12,  1.4434e+01, -1.0000e+12,  1.4152e+01, -1.0000e+12,
         1.6076e+01, -1.0000e+12, -1.0000e+12, -1.0000e+12, -1.0000e+12,
        -1.0000e+12,  1.4434e+01, -1.0000e+12,  1.4152e+01, -1.0000e+12,
         1.6076e+01, -1.0000e+12, -1.0000e+12, -1.0000e+12, -1.0000e+12,
        -1.0000e+12], dtype=torch.float64)

### 验证

tensor(-1.9000e+13, dtype=torch.float64)

In [71]:
torch.logsumexp(cosine_sim, dim=0)

tensor(17.0613, dtype=torch.float64)

## 全部流程代码

In [76]:
import torch
import torch.nn.functional as F

sent_a = torch.randn(5, 10)
sent_b = torch.randn(5, 10)
print(sent_a.shape)
label_ids = torch.LongTensor([1, 0, 1, 0, 1])
λ = 20

torch.Size([5, 10])


In [96]:
a_norm = F.normalize(sent_a, p=2, dim=1, eps=1e-8)
b_norm = F.normalize(sent_b, p=2, dim=1, eps=1e-8)
# 正则化之后，通过内积求余弦相似度
ab_cosine = torch.sum(a_norm * b_norm, dim=1) * λ # (batch_size)
# log(1 + ∑e^λ(si - sj))
# 实现 ∂(si -sj),其中 λ 取20
ab_cosine_diff = ab_cosine[:, None] - ab_cosine[None, :]
# 实现的结果 负样本的cosine 值减去 正样本的值，那么 正样本的值应该都是 -np.inf
# 通过label_id 进行筛选，并构造和 ab_cosine_diff 一致的数据结构
# 如果 是正样本则不应该进行计算，只有negative - positive 对应的位置才是 正值
labels = label_ids[:, None] < label_ids[None, :]
labels = labels.long()
# 将ab_cosine_diff 中不应该计算的值(即labels 中值为0 的位置)映射为 1e-12
ab_exp_diff = ab_cosine_diff - (1 - labels) * 1e12
# log 里面还有个1， 对应 exp 就是在最前面加一个0
ab_exp_diff = torch.cat((torch.zeros(1), ab_exp_diff.view(-1)), dim=0)
loss = torch.logsumexp(ab_exp_diff, dim=0)
loss


tensor(15.9048)