### define a class

In [18]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [20]:
class Net(nn.Module):  # to inherit nn.Module

    def __init__(self, nr_hidden):  # to inherit
        super(Net, self).__init__()
        self.fcl1 = nn.Linear(10, nr_hidden)  # a Fully Connected Layer connecting 10 nodes to 100 nodeds
        self.fcl2 = nn.Linear(nr_hidden, 10)
    
    def forward(self, x):
        """x: input features, size of 10"""
        x = F.relu(self.fcl1(x))  # relu: rectified linear unit
        x = self.fcl2(x)
        return x

In [35]:
net = Net(100)

### some important ones for optimizing the parameters of the model

In [36]:
params = list(net.parameters())  # the parameters function of nn.Module
params[0]

Parameter containing:
tensor([[-0.1379,  0.0740,  0.0613,  0.1706, -0.1336,  0.2667,  0.2037,  0.2657,
          0.3153,  0.2470],
        [-0.2808,  0.2118, -0.0219,  0.1452, -0.0506, -0.1045,  0.1048, -0.1641,
          0.2527, -0.2164],
        [-0.2917, -0.0732,  0.3020, -0.2936, -0.1194, -0.3004,  0.1378, -0.2207,
          0.2732,  0.0831],
        [ 0.2646, -0.1935, -0.0147,  0.0183, -0.2405,  0.1116, -0.0556, -0.2133,
         -0.0530,  0.1360],
        [ 0.2595, -0.2146,  0.2527, -0.3110, -0.0637, -0.2077, -0.3146,  0.2262,
          0.1218,  0.2614],
        [ 0.1955, -0.1347,  0.0704,  0.0945, -0.2789,  0.0539,  0.0954,  0.1961,
          0.2868, -0.3127],
        [ 0.3035,  0.2805, -0.2137, -0.1314,  0.1587, -0.2057, -0.0884, -0.1857,
         -0.0284,  0.1338],
        [-0.0233,  0.2245, -0.2470,  0.1978, -0.1255,  0.1971,  0.0121, -0.2260,
         -0.1676, -0.0915],
        [ 0.0531,  0.1576,  0.2063, -0.0602,  0.0722,  0.0258, -0.1116, -0.2604,
         -0.1658, -0.3031

#### pass the input through the forward function

In [37]:
input = torch.randn(1, 10)
output = net.forward(input)
net(input)  # it's sufficient to just call net(input), forwad is called implicitly


tensor([[ 0.0590, -0.0152, -0.1902,  0.2802, -0.2360,  0.2543,  0.0207, -0.1116,
          0.1063,  0.1237]], grad_fn=<AddmmBackward0>)

#### loss function computation: MSE

In [38]:
criterion = nn.MSELoss()
target = torch.randn(1, 10)
loss = criterion(output, target)  # passing the arguments in the right order

#### gradient of loss wrt the parameters(all the leaf nodes)

In [39]:
loss.backward()  # backpropagation

# access it by calling 'grad' attribute
for param in params:
    print(param.grad)

tensor([[ 1.5774e-03,  4.0001e-04, -2.1005e-03, -4.1677e-04, -5.4968e-04,
         -5.4458e-04,  1.6546e-04, -1.2733e-03,  4.5772e-04,  2.2328e-04],
        [-0.0000e+00, -0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00, -0.0000e+00,  0.0000e+00, -0.0000e+00, -0.0000e+00],
        [ 3.2504e-03,  8.2425e-04, -4.3283e-03, -8.5880e-04, -1.1327e-03,
         -1.1222e-03,  3.4095e-04, -2.6237e-03,  9.4317e-04,  4.6009e-04],
        [-0.0000e+00, -0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00, -0.0000e+00,  0.0000e+00, -0.0000e+00, -0.0000e+00],
        [-4.4850e-02, -1.1373e-02,  5.9723e-02,  1.1850e-02,  1.5629e-02,
          1.5484e-02, -4.7045e-03,  3.6202e-02, -1.3014e-02, -6.3484e-03],
        [-9.0588e-03, -2.2971e-03,  1.2063e-02,  2.3934e-03,  3.1567e-03,
          3.1274e-03, -9.5021e-04,  7.3120e-03, -2.6285e-03, -1.2823e-03],
        [-0.0000e+00, -0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00, -0.0000e+0

#### a loop to update all parameters

##### OPT1: 'with torch.no_grad()': without adding the computations to the COMPUTATIONAL GRAPH; from scratch and have more control

In [None]:
for iteration in range(0, n_iter):
    net.zero_grad()  # set zeros to all parameters
    output = net(input)
    loss = criterion(output, target)
    loss.backward()  # param.grad is updated
    with torch.no_grad():  # net.requires_grad = False
        for param in model.parameters():
            param -= learning_rate*param.grad


##### OPT2: use of pre-defined optimizers instead of with block

In [None]:
optimizer = torch.optim.SGD(net.parameters(), lr=0.01)

for iteration in range(0, n_iter):
    net.zero_grad()  # set zeros to all parameters
    output = net(input)
    loss = criterion(output, target)
    loss.backward()  # update param.grad
    optimizer.step()  # do one optimization


#### Autograd: computing gradients