## MSELoss

In [1]:
import torch
import numpy as np

In [6]:
class MSELoss:
    def __init__(self):
        self.x = None
        self.y = None
    
    def __call__(self, x, y):
        self.x = x
        self.y = y
        return np.sum(np.square(x - y)) / x.size
    
    def backward(self):
        dx = 2 * (self.x - self.y) / self.x.size
        return dx, -dx

In [7]:
np.random.seed(123)
np.set_printoptions(precision=6, suppress=True, linewidth=80)

x_numpy = np.random.random(27)
y_numpy = np.random.random(27)
x_torch = torch.tensor(x_numpy, requires_grad=True)
y_torch = torch.tensor(y_numpy, requires_grad=True)

loss_func_numpy = MSELoss()
loss_func_torch = torch.nn.MSELoss().float()

loss_numpy = loss_func_numpy(x_numpy, y_numpy)
loss_torch = loss_func_torch(x_torch, y_torch)

loss_torch.backward()
dx_numpy, dy_numpy = loss_func_numpy.backward()

In [8]:
print(loss_numpy)
print(loss_torch.data.numpy())
print("----------")
print(dx_numpy)
print(x_torch.grad.numpy())
print("----------")
print(dy_numpy)
print(y_torch.grad.numpy())

0.11696011656631419
0.1169601165663142
----------
[ 0.034682 -0.000561 -0.029935  0.034016  0.021168 -0.000575  0.03608
  0.019185  0.012494 -0.002536 -0.040756 -0.015934 -0.004686 -0.041798
  0.02092   0.031164 -0.01721  -0.051175  0.020822  0.003614 -0.026012
  0.02444   0.008264  0.036326 -0.007696 -0.020748 -0.013576]
[ 0.034682 -0.000561 -0.029935  0.034016  0.021168 -0.000575  0.03608
  0.019185  0.012494 -0.002536 -0.040756 -0.015934 -0.004686 -0.041798
  0.02092   0.031164 -0.01721  -0.051175  0.020822  0.003614 -0.026012
  0.02444   0.008264  0.036326 -0.007696 -0.020748 -0.013576]
----------
[-0.034682  0.000561  0.029935 -0.034016 -0.021168  0.000575 -0.03608
 -0.019185 -0.012494  0.002536  0.040756  0.015934  0.004686  0.041798
 -0.02092  -0.031164  0.01721   0.051175 -0.020822 -0.003614  0.026012
 -0.02444  -0.008264 -0.036326  0.007696  0.020748  0.013576]
[-0.034682  0.000561  0.029935 -0.034016 -0.021168  0.000575 -0.03608
 -0.019185 -0.012494  0.002536  0.040756  0.015

## cross-entropy

In [9]:
import torch
import numpy as np

In [16]:
class Entropy:
    def __init__(self):
        self.nx = None
        self.ny = None
        self.ndx = None
        
    def loss(self, nx, ny):
        self.nx = nx
        self.ny = ny
        loss = np.sum(- ny * np.log(nx))
        return loss
    
    def backward(self):
        self.ndx = - self.ny / self.nx
        return self.ndx

In [17]:
np.random.seed(123)
np.set_printoptions(precision=3, suppress=True, linewidth=120)

entropy = Entropy()

x = np.random.random([5, 10])
y = np.random.random([5, 10])
x_tensor = torch.tensor(x, requires_grad=True)
y_tensor = torch.tensor(y, requires_grad=True)

loss_numpy = entropy.loss(x, y)
grad_numpy = entropy.backward()

loss_tensor = (- y_tensor * torch.log(x_tensor)).sum()
loss_tensor.backward()
grad_tensor = x_tensor.grad

In [19]:
print("Python Loss :", loss_numpy)
print("PyTorch Loss :", loss_tensor.data.numpy())

print("\nPython dx :")
print(grad_numpy)
print("\nPyTorch dx :")
print(grad_tensor.data.numpy())

Python Loss : 22.609416116382963
PyTorch Loss : 22.60941611638296

Python dx :
[[ -0.173  -2.888  -2.658  -0.989  -0.476  -0.719  -0.425  -0.995  -1.82   -1.302]
 [ -1.95   -0.804  -1.425 -11.306  -2.116  -0.113  -4.185  -1.389  -0.365  -1.076]
 [ -0.151  -1.042  -0.866  -1.184  -0.022  -1.841  -1.539  -0.696  -0.521  -1.102]
 [ -3.461  -1.596  -1.287  -0.788  -2.173  -2.695  -0.838  -0.049  -0.323  -0.793]
 [ -1.13   -8.609  -1.122  -1.838  -0.685  -2.762  -0.313  -0.405  -0.464  -0.56 ]]

PyTorch dx :
[[ -0.173  -2.888  -2.658  -0.989  -0.476  -0.719  -0.425  -0.995  -1.82   -1.302]
 [ -1.95   -0.804  -1.425 -11.306  -2.116  -0.113  -4.185  -1.389  -0.365  -1.076]
 [ -0.151  -1.042  -0.866  -1.184  -0.022  -1.841  -1.539  -0.696  -0.521  -1.102]
 [ -3.461  -1.596  -1.287  -0.788  -2.173  -2.695  -0.838  -0.049  -0.323  -0.793]
 [ -1.13   -8.609  -1.122  -1.838  -0.685  -2.762  -0.313  -0.405  -0.464  -0.56 ]]


## softmax

In [20]:
import numpy as np
import torch

In [41]:
class Softmax:
    def __init__(self):
        self.softmax = None
        self.grad = None
        self.dnx = None

    def __call__(self, nx):
        shifted_x = nx - np.max(nx)
        ex = np.exp(shifted_x)
        sum_ex = np.sum(ex)
        self.softmax = ex / sum_ex
        return self.softmax

    def get_grad(self):
        self.grad = self.softmax[:, np.newaxis] * self.softmax[np.newaxis, :]
        for i in range(len(self.grad)):
            self.grad[i, i] -= self.softmax[i]
        self.grad = - self.grad
        return self.grad

    def backward(self, dl):
        self.get_grad()
        self.dnx = np.sum(self.grad * dl, axis=1)
        return self.dnx

In [45]:
np.random.seed(123)
np.set_printoptions(precision=8, suppress=True, linewidth=120)

d_loss = np.array([11, 12, 13, 14, 15, 16, 17, 18, 19], dtype=float)
d_loss_tensor = torch.tensor(d_loss, requires_grad=True)

softmax_numpy = Softmax()
x_numpy = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=float)
soft_numpy = softmax_numpy(x_numpy)
x_grad_numpy = softmax_numpy.backward(d_loss)

x_tensor = torch.tensor(x_numpy, requires_grad=True)
soft_tensor = torch.nn.functional.softmax(x_tensor, dim=0)
soft_tensor.backward(d_loss_tensor)
x_grad_tensor = x_tensor.grad

In [46]:
print(soft_numpy)
print(soft_tensor.data.numpy())
print()
print(x_grad_numpy)
print(x_grad_tensor.data.numpy())

[0.00021208 0.00057649 0.00156706 0.00425972 0.01157912 0.03147531 0.08555877 0.23257286 0.63219858]
[0.00021208 0.00057649 0.00156706 0.00425972 0.01157912 0.03147531 0.08555877 0.23257286 0.63219858]

[-0.00157344 -0.00370057 -0.00849213 -0.01882428 -0.03959057 -0.07614301 -0.12141937 -0.09747922  0.36722258]
[-0.00157344 -0.00370057 -0.00849213 -0.01882428 -0.03959057 -0.07614301 -0.12141937 -0.09747922  0.36722258]


## softmax + cross-entropy

In [47]:
import torch
import numpy as np

In [48]:
class softmaxEntropy:
    def __init__(self):
        self.nx = None
        self.ny = None
        self.ndx = None
        self.softmax = None
        self.entropy = None
        self.loss = None
        
    def __call__(self, nx, ny):
        self.nx = nx
        self.ny = ny
        shifted_x = nx - np.max(nx)
        ex = np.exp(shifted_x)
        sum_ex = np.sum(ex)
        self.softmax = ex / sum_ex
        self.entropy = - np.log(self.softmax) * ny
        self.loss = np.sum(self.entropy)
        return self.loss
    
    def backward(self):
        self.ndx = self.softmax.copy() * np.sum(self.ny)
        self.ndx -= self.ny
        return self.ndx

In [50]:
np.set_printoptions(precision=8, suppress=True, linewidth=120)

x_numpy = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=np.float)
y_numpy = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], dtype=np.float)
x_tensor = torch.tensor(x_numpy, requires_grad=True)
y_tensor = torch.tensor(y_numpy)

softmax_entropy_loss = softmaxEntropy()
loss_numpy = softmax_entropy_loss(x_numpy, y_numpy)
dx_numpy = softmax_entropy_loss.backward()

log_softmax_loss = torch.nn.LogSoftmax(dim=0)
log_softmax_tensor = log_softmax_loss(x_tensor)
entropy_tensor = - log_softmax_tensor * y_tensor
loss_tensor = entropy_tensor.sum()
loss_tensor.backward()
dx_tensor = x_tensor.grad

print(loss_numpy)
print(loss_tensor.data.numpy())
print()
print(dx_numpy)
print(dx_tensor.data.numpy())

14.063482775853203
14.063482775853203

[-0.09904564 -0.19740579 -0.29294821 -0.38083126 -0.44789396 -0.45836109 -0.31498552  0.24657787  1.9448936 ]
[-0.09904564 -0.19740579 -0.29294821 -0.38083126 -0.44789396 -0.45836109 -0.31498552  0.24657787  1.9448936 ]
