## Regression Loss

### 1. MSE

In [198]:
import numpy as np

y_true_np = np.array([3.0, 5.0, 2.5])
y_pred_np = np.array([2.5, 5.5, 2.0])

def mse_loss(pred, target):
    return np.mean((pred-target)**2)
mse = mse_loss(y_pred_np, y_true_np)
print("MSE:", mse)

MSE: 0.25


In [199]:
import torch

y_true_t = torch.tensor([3.0, 5.0, 2.5])
y_pred_t = torch.tensor([2.5, 5.5, 2.0], requires_grad=True)
# requires_grad: 这个tensor参与梯度计算，需要在反向传播的时候计算他的梯度

def mse_loss(pred, target):
    return torch.mean((pred-target)**2)
mse = mse_loss(y_pred_t, y_true_t)
print("MSE:", mse)

MSE: tensor(0.2500, grad_fn=<MeanBackward0>)


### 2. MAE 

In [200]:
def mae_loss(pred, target):
    return np.mean(np.abs(pred-target))

mae = mae_loss(y_pred_np, y_true_np)
print("MAE:", mae)

MAE: 0.5


In [201]:
def mae_loss(pred, target):
    return torch.mean(torch.abs(pred-target))

mae = mae_loss(y_pred_t, y_true_t)
print("MAE:", mae)

MAE: tensor(0.5000, grad_fn=<MeanBackward0>)


### 3. Huber Loss

In [202]:
def huber_loss(pred, target, delta=1.0):
    diff = np.abs(pred-target)
    loss = np.where(
        diff<=delta, 
        0.5*diff**2, 
        delta*(diff-0.5*delta)
    )
    return np.mean(loss)
huber_loss = huber_loss(y_pred_np, y_true_np)
print("huber loss:", huber_loss)

huber loss: 0.125


In [203]:
def huber_loss(pred, target, delta=1.0):
    diff = torch.abs(pred-target)
    loss = torch.where(
        diff<=delta, 
        0.5*diff**2, 
        delta*(diff-0.5*delta)
    )
    return loss.mean()
huber_loss = huber_loss(y_pred_t, y_true_t)
print("huber loss:", huber_loss)

huber loss: tensor(0.1250, grad_fn=<MeanBackward0>)


### 4. RMSE

In [204]:
def rmse_loss(pred, target):
    return np.sqrt(np.mean((pred-target)**2))
rmse = rmse_loss(y_pred_np, y_true_np)
print("RMSE:", rmse)

RMSE: 0.5


In [205]:
def rmse_loss(pred, target):
    return torch.sqrt(torch.mean((pred-target)**2))
rmse = rmse_loss(y_pred_t, y_true_t)
print("RMSE:", rmse)

RMSE: tensor(0.5000, grad_fn=<SqrtBackward0>)


## Classification Loss

### 1. Binary Cross Entropy

In [206]:
# labels
y_true_np = np.array([1, 0, 1])
y_true_t = torch.tensor([1., 0., 1.])

# logits
logits_np = np.array([0.2, 0.6, 0.2])
logits_t = torch.tensor([0.2, 0.6, 0.2], requires_grad=True)

In [207]:
def bce_loss(pred, target, eps=1e-4):
    return -np.mean(target*np.log(pred+eps)+(1-target)*np.log(1-pred+eps))

bce_loss = bce_loss(logits_np, y_true_np)
print("BCE Loss:", bce_loss)

BCE Loss: 1.3779722793012823


In [208]:
def bce_loss(pred, target, eps=1e-4):
    return torch.mean(target*torch.log(pred+eps)+(1-target)*torch.log(1-pred+eps))
bce_loss = bce_loss(logits_t, y_true_t)
print("BCE Loss:", bce_loss)

BCE Loss: tensor(-1.3780, grad_fn=<MeanBackward0>)


### 2. Categorical Cross-Entropy (multiclass, but have only one label)

In [209]:
# batch=3, classes=4
logits_np = np.array([
    [2.0, 1.0, 0.1, -1.0],  # batch 1
    [0.5, 2.5, 0.3, 0.1], # batch 2
    [1.2, 0.2, 2.0, 0.5] # batch 3
])

labels_np = np.array([0, 1, 2])

logits_t = torch.tensor([
    [2.0, 1.0, 0.1, -1.0],
    [0.5, 2.5, 0.3, 0.1],
    [1.2, 0.2, 2.0, 0.5]
])

labels_t = torch.tensor([0, 1, 2])

In [210]:
def softmax(logits):
    exp = np.exp(logits)
    return exp/np.sum(exp, axis=1, keepdims=True)

def categorical_cross_entropy(logits, labels, eps=1e-12):
    probs = softmax(logits)
    N = logits.shape[0] 
    loss = -np.log(probs[np.arange(N), labels]+eps)
    return np.mean(loss)

ce = categorical_cross_entropy(logits_np, labels_np)
print("CE Loss:", ce)

CE Loss: 0.44939341449096365


In [211]:
def softmax(logits):
    exp = torch.exp(logits)
    return exp/torch.sum(exp, dim=1, keepdims=True)

def categorical_cross_entropy(logits, labels, eps=1e-12):
    probs = softmax(logits)
    N = logits.shape[0]
    loss = -torch.log(probs[torch.arange(N), labels]+eps)
    return loss.mean()

ce = categorical_cross_entropy(logits_t, labels_t)
print("CE Loss:", ce)

CE Loss: tensor(0.4494)


## Ranking Loss

### 1. Pointwise 

#### 1.1. MSE (same as the regression loss)

#### 1.2. MAE (same as the regression loss)

#### 1.3 Binary Cross Entropy (same as the classifaction loss)

### 2. Pairwise

#### 2.1 Hinge Loss 

In [212]:
pos_np = np.array([3.0, 2.5, 1.2])
neg_np = np.array([1.0, 2.0, 1.0])

pos_t = torch.tensor([3.0, 2.5, 1.2])
neg_t = torch.tensor([1.0, 2.0, 1.0])

In [213]:
def hinge_loss(pos, neg, margin=1.0):
    return np.mean(np.maximum(0, margin-(pos-neg)))

hinge_loss = hinge_loss(pos_np, neg_np)
print("Hinge Loss:", hinge_loss)

Hinge Loss: 0.43333333333333335


In [214]:
def hinge_loss(pos, neg, margin=1.0):
    return torch.mean(torch.clamp(margin-(pos-neg), min=0))

hinge_loss = hinge_loss(pos_t, neg_t)
print("Hinge Loss:", hinge_loss)

Hinge Loss: tensor(0.4333)


#### 2.2 Logistic Loss

In [215]:
def logistic_loss(pos, neg):
    return np.mean(np.log(1+np.exp(-(pos-neg))))

logistic_loss = logistic_loss(pos_np, neg_np)
print("Logistic Loss:", logistic_loss)

Logistic Loss: 0.3997146215348904


In [216]:
def logistic_loss(pos, neg):
    return torch.mean(torch.log(1+torch.exp(-(pos-neg))))
logistic_loss = logistic_loss(pos_t, neg_t)
print("Logistic Loss:", logistic_loss)

Logistic Loss: tensor(0.3997)


#### 2.3 BPR Loss

In [217]:
def bpr_loss(pos, neg):
    return -np.mean(np.log(1/(1+np.exp(-(pos-neg)))))
bpr_loss = bpr_loss(pos_np, neg_np)
print("BPR Loss:", bpr_loss)

BPR Loss: 0.3997146215348904


In [218]:
def bpr_loss(pos, neg):
    return -torch.mean(torch.log(1/(1+torch.exp(-(pos-neg)))))
bpr_loss = bpr_loss(pos_t, neg_t)
print("BPR Loss:", bpr_loss)

BPR Loss: tensor(0.3997)


#### 2.4 Margin Ranking Loss (SVM Rank)

In [219]:
def margin_ranking_loss(pos, neg, y, margin=1):
    return np.mean(np.maximum(0, margin-y*(pos-neg)))

margin_ranking_loss = margin_ranking_loss(pos_np, neg_np, y=1, margin=1)
print("Margin Ranking Loss:", margin_ranking_loss)

Margin Ranking Loss: 0.43333333333333335


In [220]:
def margin_ranking_loss(pos, neg, y, margin=1):
    return torch.mean(torch.clamp(margin-y*(pos-neg), min=0))

margin_ranking_loss = margin_ranking_loss(pos_t, neg_t, y=1, margin=1)
print("Margin Ranking Loss:", margin_ranking_loss)

Margin Ranking Loss: tensor(0.4333)


### 3. Listwise

#### 3.1 ListNet Loss

In [221]:
pred_np = np.array([2.1, 1.0, 0.5, -0.2, 1.5])
true_np = np.array([3.0, 2.0, 1.0, 0.0, 2.0])

pred_t = torch.tensor([2.1, 1.0, 0.5, -0.2, 1.5])
true_t = torch.tensor([3.0, 2.0, 1.0, 0.0, 2.0])

In [222]:
def softmax(x):
    exp = np.exp(x)
    return exp/np.sum(exp)

def listnet_loss(pred_scores, true_scores):
    P_true = softmax(true_scores)
    P_pred = softmax(pred_scores)
    loss = -np.sum(P_true*np.log(P_pred))
    return loss

listnet_loss = listnet_loss(pred_np, true_np)
print("Listnet Loss:", listnet_loss)

Listnet Loss: 1.2790021343290645


In [223]:
def softmax(x):
    exp = torch.exp(x)
    return exp/torch.sum(exp)

def listnet_loss(pred_scores, true_scores):
    P_true = softmax(true_scores)
    P_pred = softmax(pred_scores)
    loss = -torch.sum(P_true*np.log(P_pred))
    return loss

listnet_loss = listnet_loss(pred_t, true_t)
print("Listnet Loss:", listnet_loss)

Listnet Loss: tensor(1.2790)


  loss = -torch.sum(P_true*np.log(P_pred))


#### 3.2 InfoNCE Loss

In [224]:
q_np = np.random.randn(4, 8)
k_np = np.random.randn(4, 8)

print("Query:", q_np)
print("Key:", k_np)


Query: [[-0.70834323 -1.43688441 -1.95069156  1.69155427  0.83022497  0.41467171
   0.90003164  0.71840541]
 [ 0.07494945 -0.00951139 -0.85909234 -1.54578104 -0.13860638 -1.40494792
  -1.08105348 -1.11175955]
 [-1.1548614   0.852863   -0.67854815  0.01383249 -1.29700878  1.84277265
  -0.98883151 -0.3330663 ]
 [-0.25418583 -1.26277186  1.26739175 -0.88663619 -0.04162381  1.66623129
   0.13419099 -1.53298745]]
Key: [[-1.1658711  -1.59747827  0.13002691  0.16656563 -0.29080775  0.68623307
   0.55509394 -1.01824037]
 [ 1.14979626  0.32240163 -1.32089382 -1.4061373  -0.66141214 -0.43399513
  -0.06035458  0.16194982]
 [-0.7389324   2.15157486 -1.66363523 -0.08159468 -0.62202395  0.02009376
  -0.14919493 -0.1887624 ]
 [ 0.27590294 -0.66227243 -1.36478619 -0.13411698  0.09396432 -0.33804415
   1.43749694 -0.91166615]]


In [225]:
q_t = torch.randn(4, 8)
k_t = torch.randn(4, 8)

print("Query:", q_t)
print("Key:", k_t)

Query: tensor([[-1.4569,  1.6060,  0.7825, -0.8799,  1.1309,  2.0260, -0.5535, -0.2143],
        [-0.3234,  0.9806,  0.0604, -1.1901,  1.6995,  1.1238, -0.8171,  0.6541],
        [ 1.2541, -0.1434,  0.9729, -1.0916,  0.5634,  0.6972, -0.0422, -1.4592],
        [-1.3636, -1.8282,  1.1501, -1.0705,  1.2855,  0.1473,  0.6627,  0.3934]])
Key: tensor([[-0.1116,  0.0834, -0.2129, -0.3214, -1.2527,  0.3528, -0.7192, -0.8067],
        [-0.2126, -0.2815,  1.0260, -0.7254, -0.9999, -0.1493,  0.4120,  0.1061],
        [-0.1742,  1.4896,  0.7534, -0.8012, -1.2617, -0.3526, -0.0418,  0.6707],
        [-0.9145, -1.3882,  0.2915, -1.5326, -1.3399,  0.5254, -1.4797,  0.7023]])


In [226]:
def info_nce_loss(query, key, temperature=0.07):
    """
    query: (N, D)
    key: (N, D)
    """
    logits = query@key.T/temperature
    exp = np.exp(logits)
    probs = exp/np.sum(exp, axis=1, keepdims=True)

    loss = -np.log(np.diag(probs))
    return np.mean(loss)

info_nce_loss = info_nce_loss(q_np, k_np)
print("InfoNCE:", info_nce_loss)

InfoNCE: 20.538288848477517


In [227]:
def info_nce_loss(query, key, temperature=0.07):
    logits = query@key.T/temperature

    exp = torch.exp(logits)
    probs = exp/torch.sum(exp, dim=1, keepdim=True)
    loss = -torch.log(torch.diag(probs))

    return loss.mean()

info_nce_loss = info_nce_loss(q_t, k_t)
print("InfoNCE:", info_nce_loss)

InfoNCE: tensor(18.2527)
