## Custom Loss

- use nn.Module

- use functions

- use direct alrithmetic operations

### Example using nn.Module
https://spandan-madan.github.io/A-Collection-of-important-tasks-in-pytorch/

Here I show a custom loss called Regress_Loss

- it takes as input 2 kinds of input x and y. 
- it reshapes x to be similar to y
- it finally returns the loss by calculating L2 difference between reshaped x and y. 


In [332]:
import torch

In [333]:
## define loss
class Regress_Loss(torch.nn.Module):
    ## initialization
    def __init__(self):
        super(Regress_Loss,self).__init__() ## initialization         
    ## forward 
    def forward(self, Output, Target):
        assert(Output.shape == Target.shape)
        loss = (Output - Target).pow(2).sum()
        return loss

## initialize tensors
X = torch.ones((1, 3),  device = device, requires_grad = False)
Y = torch.randn_like(X, device = device, requires_grad = True)

## initialize loss
criterion = Regress_Loss()
lr = 0.01

## run optimization
for epoch in range(500):
    loss = criterion.forward(X, Y)
    loss.backward()
    with torch.no_grad():
        Y -= Y.grad * lr
        Y.grad.zero_()
        if epoch % 100 == 99:
            print(Y)

tensor([[0.9837, 0.9752, 0.9653]], device='cuda:0', requires_grad=True)
tensor([[0.9978, 0.9967, 0.9954]], device='cuda:0', requires_grad=True)
tensor([[0.9997, 0.9996, 0.9994]], device='cuda:0', requires_grad=True)
tensor([[1.0000, 0.9999, 0.9999]], device='cuda:0', requires_grad=True)
tensor([[1.0000, 1.0000, 1.0000]], device='cuda:0', requires_grad=True)


### Custom loss with function

Note that backpropagation is handled by Variables, and not by nn.Module.

You don’t need to write a nn.Module for that, a simple function is enough.

In [334]:
device = torch.device('cuda:0')

lr = 0.01
def power(t, n): return t ** n
X = torch.ones((1, 3),  device = device, requires_grad = False)
Y = torch.randn_like(X, device = device, requires_grad = True)

for epoch in range(500):
    ## calculate loss
    loss = power(Y - X,2.0).sum() ## neat work here, function in one line
    loss.backward()    
    ## update loss
    with torch.no_grad():
        Y -= Y.grad * lr
        Y.grad.zero_() ## like optimizer.zero_grad()
        if epoch % 100 == 0:
            print(Y)

tensor([[ 0.4363, -1.4599,  0.0363]], device='cuda:0', requires_grad=True)
tensor([[0.9252, 0.6738, 0.8722]], device='cuda:0', requires_grad=True)
tensor([[0.9901, 0.9567, 0.9831]], device='cuda:0', requires_grad=True)
tensor([[0.9987, 0.9943, 0.9978]], device='cuda:0', requires_grad=True)
tensor([[0.9998, 0.9992, 0.9997]], device='cuda:0', requires_grad=True)


### Custom loss directly

In [356]:
batch_size = 16
input_dims = 128
hidden_size= 32
output_dims= 8

# Create random Tensors to hold input and outputs
x = torch.randn(input_dims,  batch_size, device=device)
y = torch.randn(output_dims, batch_size, device=device)
w1 = torch.randn(hidden_size, input_dims,  device=device, requires_grad=True)
w2 = torch.randn(output_dims, hidden_size, device=device, requires_grad=True)

learning_rate = 1e-4

In [357]:
for epoch in range(500):

    y_pred = w2.mm(w1.mm(x).clamp(min=0))
    loss = (y_pred - 1).pow(2).sum()
    loss.backward()

    with torch.no_grad():
        w1 -= learning_rate * w1.grad
        w2 -= learning_rate * w2.grad

        # Manually zero the gradients after running the backward pass
        w1.grad.zero_()
        w2.grad.zero_()
        if epoch % 50 == 0:
            print(epoch, loss.item())


0 253814.78125
50 10.663934707641602
100 0.8452450633049011
150 0.0856180340051651
200 0.01071980595588684
250 0.0021665175445377827
300 0.0007943820091895759
350 0.00039197015576064587
400 0.0002094614173984155
450 0.00011387151607777923


In [370]:
T = torch.randn((5,5))
T[T>0.5] = -10 + T[T>0.5]
T

tensor([[-0.8627,  0.2651, -0.9993, -9.2004, -9.4639],
        [-0.5937, -1.5699, -0.2681, -1.3841, -0.1898],
        [ 0.3971, -0.9961, -2.0465, -0.4930,  0.4887],
        [-0.5223,  0.0751, -8.9803, -1.3236, -0.7704],
        [-9.3694, -0.5970,  0.0582,  0.3238, -0.0877]])

tensor([[-0.4673,  0.4062, -0.7304,  0.0000, -0.6690],
        [-0.4327,  0.0000,  0.0000,  0.0000,  0.0000],
        [-0.7553, -0.0244, -0.6737, -0.2495, -1.5302],
        [-0.6705,  0.0000,  0.2208,  0.3384,  0.2874],
        [ 0.0000,  0.2077,  0.0000,  0.0000, -0.5580]])