# [Pytorch Loss Functions in Plain Python](https://medium.com/@zhang_yang/pytorch-loss-funtions-in-plain-python-b79c05f8b53f)

In [1]:
import numpy as np
import torch
import torch.nn as nn

torch.__version__

'1.3.1'

# L1Loss

In [2]:
x = torch.randn(2,3)
y = torch.randn(2,3)

In [3]:
x

tensor([[-1.0239, -1.2644,  0.3209],
        [-1.2047, -0.0058,  0.2522]])

In [4]:
y

tensor([[ 2.4410,  0.1568, -0.3652],
        [-0.5740,  0.6904, -1.0142]])

In [5]:
nn.L1Loss()(x, y)

tensor(1.3610)

In [6]:
nn.L1Loss(reduction='none')(x, y)

tensor([[3.4649, 1.4212, 0.6861],
        [0.6308, 0.6963, 1.2665]])

In [7]:
abs(x.numpy() - y.numpy())

array([[3.4649143, 1.4212184, 0.6861099],
       [0.630767 , 0.6962625, 1.2664555]], dtype=float32)

In [8]:
abs(x.numpy() - y.numpy()).mean()

1.3609546

# MSELoss

In [9]:
x = torch.randn(2, 3)
y = torch.randn(2, 3)

print(x)
print(y)

tensor([[-0.0128,  0.8704,  0.0537],
        [ 0.7203, -0.2734,  1.8637]])
tensor([[ 1.4851,  0.0346,  1.6662],
        [-0.5716, -1.4263,  0.6127]])


In [10]:
nn.MSELoss(reduction='none')(x,y)

tensor([[2.2436, 0.6987, 2.6002],
        [1.6690, 1.3290, 1.5652]])

In [11]:
nn.MSELoss()(x,y)

tensor(1.6843)

In [12]:
(x.numpy() - y.numpy()) ** 2

array([[2.2435777, 0.6987036, 2.6002054],
       [1.6689583, 1.3290129, 1.565206 ]], dtype=float32)

In [13]:
((x.numpy() - y.numpy()) ** 2).mean()

1.6842772

# CrossEntropyLoss

In [14]:
x = torch.randn(2, 4)
y = torch.LongTensor(2).random_(2)

print(x)
print(y)

tensor([[ 1.3506,  0.1631,  0.7784, -0.1374],
        [ 0.9350, -1.7690, -0.7312,  0.4905]])
tensor([1, 1])


In [15]:
nn.CrossEntropyLoss(reduction='none')(x, y)

tensor([1.9271, 3.3444])

In [16]:
nn.CrossEntropyLoss()(x, y)

tensor(2.6357)

In [17]:
x = x.numpy()
y = y.numpy()

lst = []

for k in range(len(x)):
    lst.append(-np.log(np.exp(x[k][y[k]]) / np.exp(x[k]).sum()))
    
lst, np.mean(lst)

([1.9270953, 3.3443615], 2.6357284)

# NLLLoss
### LogSoftmax

In [18]:
x = torch.randn(2, 4)
x

tensor([[-1.3730, -0.7259,  0.1820,  1.5294],
        [-1.0940,  0.1268, -1.7371, -0.9017]])

In [19]:
y = nn.LogSoftmax(dim=1)(x)
y

tensor([[-3.2528, -2.6056, -1.6978, -0.3504],
        [-1.8128, -0.5920, -2.4559, -1.6205]])

In [20]:
x = x.numpy()
lst = []

for k in range(len(x)):
    lst.append(np.log(np.exp(x[k]) / np.exp(x[k]).sum()))
    
lst

[array([-3.2528155, -2.605648 , -1.6977761, -0.3504241], dtype=float32),
 array([-1.812798  , -0.59198576, -2.4559085 , -1.6205477 ], dtype=float32)]

### NLLLoss

In [21]:
x0 = torch.randn(3, 4)
x = nn.LogSoftmax(dim=1)(x0)
x0, x

(tensor([[-0.9414, -1.1975,  1.9285,  0.4562],
         [ 0.1583, -0.6890,  1.2086, -0.6242],
         [ 1.0559,  1.0176,  1.4333,  0.2493]]),
 tensor([[-3.1551, -3.4112, -0.2852, -1.7575],
         [-1.5570, -2.4042, -0.5066, -2.3395],
         [-1.3525, -1.3909, -0.9751, -2.1592]]))

In [22]:
y = torch.LongTensor(3).random_(4)
y

tensor([1, 2, 0])

In [23]:
nn.NLLLoss()(x, y)

tensor(1.7568)

In [24]:
nn.NLLLoss(reduction='none')(x, y)

tensor([3.4112, 0.5066, 1.3525])

In [25]:
x = x.numpy()
y = y.numpy()

lst = []
for k in range(len(x)):
    lst.append(-x[k][y[k]])
    
lst, np.mean(lst)

([3.411236, 0.50663644, 1.3525444], 1.7568055)

# PoissonNLLLoss

In [44]:
x = torch.randn(2, 4)
x

tensor([[ 1.0233, -1.0185,  0.3346, -0.6253],
        [-0.6767,  0.1118, -0.5326,  0.7291]])

In [45]:
y = torch.randn(2, 4)
y

tensor([[ 0.6070,  0.1216, -0.9366,  0.5629],
        [ 1.0292,  0.6052,  0.1757,  0.4074]])

In [46]:
nn.PoissonNLLLoss()(x, y)

tensor(1.2445)

In [47]:
nn.PoissonNLLLoss(reduction='none')(x, y)

tensor([[2.1611, 0.4850, 1.7107, 0.8871],
        [1.2048, 1.0506, 0.6806, 1.7762]])

In [48]:
x = x.numpy()
y = y.numpy()

In [49]:
# target∗log(target)−target+0.5∗log(2πtarget)
def sterling_approx(y):
    return y*np.log(y) - y + 0.5*np.log(np.pi*y)

In [50]:
lst = []
for k in range(len(x)):
    lsti = []
    for i in range(len(x[k])):
        lss = np.exp(x[k,i])-y[k,i]*x[k,i] + (sterling_approx(y[k,i]) if y[k,i]>1 else 0)
        lsti.append(lss)
    lst.append(lsti)

In [51]:
np.array(lst)

array([[2.1611166 , 0.48501673, 1.71070874, 0.88708782],
       [0.7919314 , 1.0506202 , 0.68064564, 1.77618265]])

In [52]:
np.mean(lst)

1.192913723779477

# KLDivLoss

In [64]:
x = torch.rand(2,3)
y = torch.rand(2,3)
x

tensor([[0.3531, 0.5185, 0.8857],
        [0.7641, 0.4680, 0.3894]])

In [65]:
xlog = torch.log(x)
xlog

tensor([[-1.0409, -0.6569, -0.1214],
        [-0.2691, -0.7593, -0.9431]])

In [66]:
y

tensor([[0.0242, 0.4290, 0.0314],
        [0.5383, 0.3223, 0.9766]])

In [67]:
nn.KLDivLoss()(xlog, y)



tensor(0.0563)

In [68]:
nn.KLDivLoss(reduction='none')(xlog, y)

tensor([[-0.0649, -0.0813, -0.1049],
        [-0.1886, -0.1202,  0.8978]])

In [69]:
x = x.numpy()
xlog = np.log(x)
y = y.numpy()

In [75]:
lst = []
for i in range(len(x)):
    lsti = []
    for j in range(len(x[i])):
        lsti.append(y[i][j] * (np.log(y[i][j]) - xlog[i][j]))
    lst.append(lsti)
    
np.array(lst)

array([[-0.06494526, -0.08127436, -0.10485492],
       [-0.188559  , -0.12019934,  0.8978182 ]], dtype=float32)

In [76]:
np.mean(lst)

0.05633089

# BCELoss

### Sigmoid

In [77]:
x = torch.randn(2, 4)
y = nn.Sigmoid()(x)
x

tensor([[ 1.7421, -0.1650, -1.8803, -0.7209],
        [-0.7866, -0.5148, -0.9993, -1.8408]])

In [78]:
y

tensor([[0.8510, 0.4588, 0.1324, 0.3272],
        [0.3129, 0.3741, 0.2691, 0.1370]])

In [79]:
x = x.numpy()

In [80]:
1 / (1 + np.exp(-x))

array([[0.8509585 , 0.4588451 , 0.13235117, 0.32719254],
       [0.31290498, 0.37406158, 0.26907104, 0.13695356]], dtype=float32)

### Single label

In [81]:
x0 = torch.randn(3)
x = nn.Sigmoid()(x0)
x

tensor([0.5824, 0.6062, 0.6175])

In [82]:
y = torch.FloatTensor(3).random_(2)
y

tensor([1., 1., 1.])

In [83]:
nn.BCELoss()(x, y)

tensor(0.5078)

In [86]:
nn.BCELoss(reduction='none')(x, y)

tensor([0.5407, 0.5005, 0.4821])

In [87]:
loss = nn.BCELoss(reduction='sum')
lss = loss(x, y)
lss

tensor(1.5234)

In [88]:
x = x.numpy()
y = y.numpy()

In [89]:
lst = []
for i in range(len(x)):
    lst.append(-np.log(x[i]) if y[i]==1 else -np.log(1-x[i]))
lst, np.mean(lst)

([0.5406827, 0.5005495, 0.48212603], 0.50778604)

#### Equivalently

In [90]:
lst = []
for i in range(len(x)):
    lst.append(-np.log(x[i])*y[i] + -np.log(1-x[i])*(1-y[i]))
lst, np.mean(lst)

([0.5406826734542847, 0.5005494952201843, 0.48212602734565735],
 0.5077860653400421)

### multilabel

In [91]:
x0 = torch.randn(3, 2)
x = nn.Sigmoid()(x0)
x

tensor([[0.8808, 0.2046],
        [0.6959, 0.2456],
        [0.5516, 0.6824]])

In [92]:
y = torch.FloatTensor(3, 2).random_(2)
y

tensor([[1., 0.],
        [1., 0.],
        [1., 0.]])

In [93]:
nn.BCELoss()(x, y)

tensor(0.4570)

In [95]:
nn.BCELoss(reduction='none')(x, y)

tensor([[0.1269, 0.2289],
        [0.3625, 0.2819],
        [0.5949, 1.1468]])

In [96]:
x = x.numpy()
y = y.numpy()

In [97]:
lst = []
for i in range(len(x)):
    lsti = []
    for j in range(len(x[i])):
        lsti.append(-np.log(x[i][j]) if y[i][j]==1 else -np.log(1-x[i][j]))
    lst.append(lsti)
np.array(lst), np.mean(lst)

(array([[0.1268706 , 0.22887151],
        [0.36250266, 0.28185339],
        [0.5949406 , 1.14682008]]), 0.45697647364412486)

#### Equivalently

In [98]:
lst = []
for i in range(len(x)):
    lst.append(-np.log(x[i])*y[i] + -np.log(1-x[i])*(1-y[i]))
np.array(lst), np.mean(lst)

(array([[0.1268706 , 0.22887152],
        [0.36250266, 0.28185338],
        [0.5949406 , 1.1468201 ]], dtype=float32), 0.4569765)

# BCEWithLogitsLoss
This is just simply adding a sigmoid in front of BCELoss above.

### single label

In [100]:
x = torch.randn(3)
x

tensor([-0.3009,  1.4924, -0.6867])

In [101]:
xs = nn.Sigmoid()(x)
xs

tensor([0.4253, 0.8164, 0.3348])

In [102]:
y = torch.FloatTensor(3).random_(2)
y

tensor([0., 0., 1.])

In [103]:
nn.BCELoss()(xs, y)

tensor(1.1145)

In [104]:
nn.BCEWithLogitsLoss()(x, y)

tensor(1.1145)

### multilabel

In [105]:
x = torch.randn(3, 2)
x

tensor([[-0.2136, -0.0021],
        [ 0.0182, -0.1834],
        [-1.9684,  0.5768]])

In [106]:
xs = nn.Sigmoid()(x)
xs

tensor([[0.4468, 0.4995],
        [0.5045, 0.4543],
        [0.1226, 0.6403]])

In [107]:
y = torch.FloatTensor(3, 2).random_(2)
y

tensor([[1., 1.],
        [0., 0.],
        [0., 1.]])

In [108]:
nn.BCELoss()(xs, y)

tensor(0.5640)

In [109]:
nn.BCEWithLogitsLoss()(x, y)

tensor(0.5640)

# MarginRankingLoss

In [110]:
x1 = torch.randn(3)
x2 = torch.randn(3)
y = torch.FloatTensor(np.random.choice([1, -1], 3))

x1, x2, y

(tensor([ 1.0091,  1.0829, -0.3210]),
 tensor([ 0.1536,  0.2888, -0.3647]),
 tensor([-1., -1.,  1.]))

In [111]:
nn.MarginRankingLoss(margin=0.1)(x1, x2, y)

tensor(0.6353)

In [112]:
x1 = x1.numpy()
x2 = x2.numpy()
y = y.numpy()
margin=0.1

In [113]:
lst = []
for i in range(len(x1)):
    lst.append(max(0, -y[i]*(x1[i]-x2[i]) + margin))

lst, np.mean(lst)

([0.9554818034172058, 0.8940297126770019, 0.05629009604454041],
 0.6352672040462494)

# HingeEmbeddingLoss

In [115]:
x = torch.randn(2, 3)
y = torch.FloatTensor(np.random.choice([-1, 1], (2, 3)))

x

tensor([[-0.1261, -0.3685,  1.4463],
        [ 1.6838, -0.6071, -0.7318]])

In [116]:
y

tensor([[ 1., -1.,  1.],
        [-1., -1.,  1.]])

In [117]:
nn.HingeEmbeddingLoss(margin=1)(x, y)

tensor(0.5940)

In [118]:
x = x.numpy()
y = y.numpy()
margin=1

In [119]:
lst=[]

for i in range(len(x)):
    lsti = []
    for j in range(len(x[i])):
        if y[i][j]==1:
            lsti.append(x[i][j])
        else:
            lsti.append(max(0, margin-x[i][j]))
    lst.append(lsti)
np.array(lst)

array([[-0.12608038,  1.36852798,  1.44625235],
       [ 0.        ,  1.6071291 , -0.73180032]])

In [120]:
np.mean(lst)

0.594004787504673

# MultiLabelMarginLoss
This is a very confusing class. Great reference here: https://blog.csdn.net/zhangxb35/article/details/72464152

### one-sample example

In [121]:
x = torch.randn(1, 4)
y = torch.LongTensor(1, 4).random_(-1, 4)
x

tensor([[-0.9266, -0.0905, -0.5449,  1.8377]])

In [122]:
y

tensor([[ 1,  0, -1,  2]])

In [123]:
nn.MultiLabelMarginLoss()(x, y)

tensor(2.1549)

In [124]:
x = x.numpy()
y = y.numpy()

In [125]:
lst = []
for k in range(len(x)):
    sm = 0
    js = []
    for j in range(len(y[k])):
        if y[k][j]<0: break 
        js.append(y[k][j])
    for i in range(len(x[k])):
        for j in js:
            if (i not in js) and (i!=j):
                print(i, j)
                sm += max(0, 1-(x[k][j] - x[k][i]))
    lst.append(sm/len(x[k]))

2 1
2 0
3 1
3 0


In [126]:
lst, np.mean(lst)

([2.1549257710576057], 2.1549257710576057)

### multi-sample example

In [127]:
x = torch.randn(3, 4)
y = torch.LongTensor(3, 4).random_(-1, 4)
x

tensor([[-1.6282, -0.9012, -0.6515, -0.9866],
        [ 0.3204,  1.5910, -1.4458, -0.3663],
        [ 0.0577,  0.1988,  0.0710,  0.7591]])

In [128]:
y

tensor([[ 1,  0, -1,  1],
        [ 3,  1,  1,  0],
        [-1,  3,  0, -1]])

In [129]:
nn.MultiLabelMarginLoss()(x, y)

tensor(0.4819)

In [130]:
x = x.numpy()
y = y.numpy()

lst = []
for k in range(len(x)):
    sm = 0
    js = []
    for j in range(len(y[k])):
        if y[k][j]<0: break 
        js.append(y[k][j])
    for i in range(len(x[k])):
        for j in js:
            if (i not in js) and (i!=j):
                sm += max(0, 1-(x[k][j] - x[k][i]))
    lst.append(sm/len(x[k]))

lst, np.mean(lst)

([1.4456799924373627, 0.0, 0.0], 0.4818933308124542)

# SmoothL1Loss

In [132]:
x = torch.randn(2, 3)
y = torch.randn(2, 3)

In [133]:
nn.SmoothL1Loss()(x, y)

tensor(0.0526)

In [134]:
nn.SmoothL1Loss(reduction='none')(x, y)

tensor([[0.0449, 0.0405, 0.0590],
        [0.1032, 0.0035, 0.0645]])

In [135]:
x = x.numpy() 
y = y.numpy()

In [136]:
def smoothl1loss(x, y):
    if abs(x-y)<1: return 1/2*(x-y)**2
    else: return abs(x-y)-1/2

In [137]:
lst = []
for i in range(len(x)):
    lsti=[]
    for j in range(len(x[i])):
        lsti.append(smoothl1loss(x[i][j], y[i][j]))
    lst.append(lsti)
np.array(lst), np.mean(lst)

(array([[0.04493607, 0.04045547, 0.05898079],
        [0.10324773, 0.003454  , 0.06454041]]), 0.05260241034167371)

# SoftMarginLoss

In [138]:
x = torch.randn(2, 4)
y = torch.FloatTensor(np.random.choice([-1, 1], (2, 4)))
x

tensor([[-1.1685,  0.6843,  0.6146,  0.3489],
        [ 0.7830,  0.3562, -0.4967,  0.5175]])

In [139]:
y

tensor([[-1., -1., -1.,  1.],
        [-1.,  1., -1., -1.]])

In [140]:
nn.SoftMarginLoss()(x, y)

tensor(0.7618)

In [141]:
x = x.numpy()
y = y.numpy()

In [142]:
lst = []
for k in range(len(x)):
    sm = 0
    for i in range(len(x[k])):
        sm += np.log(1 + np.exp(-y[k][i]*x[k][i]))
    lst.append(sm/len(x[k]))

lst, np.mean(lst)

([0.7360331374316078, 0.787649392499445], 0.7618412649655264)

In [143]:
x = torch.randn(2, 4)
y = torch.FloatTensor(2, 4).random_(2)
x

tensor([[-1.6744,  1.0909, -0.2122,  0.2262],
        [ 1.0869,  0.3578, -0.2447,  0.6086]])

In [144]:
y

tensor([[1., 0., 0., 0.],
        [1., 1., 0., 0.]])

In [145]:
nn.MultiLabelSoftMarginLoss()(x, y)

tensor(0.8843)

In [146]:
x = x.numpy()
y = y.numpy()

In [147]:
lst = []
for k in range(len(x)):
    sm = 0
    for i in range(len(x[k])):
        sm -= y[k, i]*np.log(np.exp(x[k, i])/(1+np.exp(x[k, i]))) +\
            (1-y[k, i])*np.log(1/(1+np.exp(x[k, i])))
    lst.append(sm/len(x[k]))

lst, np.mean(lst)

([1.1580149049159205, 0.6105233018754658], 0.8842691033956931)

# CosineEmbeddingLoss

In [148]:
x1 = torch.randn(2, 3)
x2 = torch.randn(2, 3)
y = torch.FloatTensor(np.random.choice([1, -1], 2))

x1

tensor([[ 0.2333, -0.0910,  0.3032],
        [ 1.5083, -1.7052, -0.3622]])

In [149]:
x2

tensor([[-0.1864,  0.8042, -0.5174],
        [ 1.0617, -0.0511, -1.4875]])

In [150]:
y

tensor([-1.,  1.])

In [151]:
nn.CosineEmbeddingLoss(margin=0.1)(x1, x2, y)

tensor(0.2357)

In [152]:
x1 = x1.numpy()
x2 = x2.numpy()
y = y.numpy()
margin=0.1

In [153]:
from scipy.spatial.distance import cosine

def cos(x, y): return 1-cosine(x, y)

In [154]:
lst = []
for k in range(len(x1)):
    if y[k] == 1: lst.append(1-cos(x1[k], x2[k]))
    elif y[k] == -1: lst.append(max(0, cos(x1[k], x2[k])-margin))
lst, np.mean(lst)

([0, 0.4714839458465576], 0.2357419729232788)

# MultiMarginLoss

In [156]:
x = torch.randn(2, 4)
y = torch.LongTensor(2).random_(4)
x

tensor([[-0.3378, -0.7942, -0.1373,  1.5074],
        [ 0.2412, -1.7311,  0.2608, -0.6482]])

In [157]:
y

tensor([2, 1])

In [158]:
nn.MultiMarginLoss(margin=0.9, p=2)(x, y)

tensor(3.4462)

In [159]:
x = x.numpy()
y = y.numpy()
p=2
margin=0.9

In [160]:
lst = []
for k in range(len(x)):
    sm = 0
    for i in range(len(x[k])):
        if i!= y[k]:
            sm += max(0, (margin - x[k, y[k]] + x[k, i])**p)
    lst.append(sm/len(x[k]))

lst, np.mean(lst)

([1.7560256290751362, 5.136282128949668], 3.446153879012402)

# TripletMarginLoss

In [162]:
x1 = torch.randn(2, 3)
x2 = torch.randn(2, 3)
x3 = torch.randn(2, 3)
margin = 0.9
p = 2

x1

tensor([[-0.2179, -1.5578,  0.0271],
        [ 1.4947,  1.5312, -1.1462]])

In [163]:
nn.TripletMarginLoss(margin=margin, p=p)(x1, x2, x3)

tensor(0.0265)

In [164]:
x1 = x1.numpy()
x2 = x2.numpy()
x3 = x3.numpy()

In [165]:
def d(x1, x2, p):
    return sum((x1-x2)**p)**(1/p)

In [166]:
lst = []
for k in range(len(x1)):
    sm = 0
    for i in range(len(x1[k])):
        sm += max(d(x1[k], x2[k], p)-d(x1[k], x3[k], p)+margin, 0) 
    lst.append(sm/len(x1[k]))

lst, np.mean(lst)

([0.0529652801530639, 0.0], 0.02648264007653195)

# References
- https://pytorch.org/docs/0.4.0/nn.html#loss-functions
- https://blog.csdn.net/zhangxb35/article/details/72464152