In [1]:
import torch
import numpy as np

In [2]:
torch.__version__

'1.8.1'

## Some definitions

In [3]:
a = torch.FloatTensor(3,2)

In [4]:
a

tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])

In [5]:
a.zero_()

tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])

In [7]:
b = torch.FloatTensor([[1,2,3], [3,2,1]])

In [8]:
b

tensor([[1., 2., 3.],
        [3., 2., 1.]])

In [9]:
b.zero_()

tensor([[0., 0., 0.],
        [0., 0., 0.]])

In [10]:
b

tensor([[0., 0., 0.],
        [0., 0., 0.]])

In [12]:
c = torch.FloatTensor([[1,2,3], [4,5,6]])


In [13]:
c.sum()

tensor(21.)

In [14]:
c.cuda()

AssertionError: Torch not compiled with CUDA enabled

## Gradients

In [15]:
v1 = torch.tensor([1.0, 1.0], requires_grad=True)
v2 = torch.tensor([2.0, 2.0])

In [16]:
v_sum = v1 + v2
v_res = (v_sum*2).sum()
v_res

tensor(12., grad_fn=<SumBackward0>)

In [None]:
"""
(v1)
    \
     (+) -- (*2) -- (sum)
    / v_sum          v_res
(v2)

"""

In [18]:
v1.is_leaf, v2.is_leaf

(True, True)

In [19]:
v_sum.is_leaf, v_res.is_leaf

(False, False)

In [20]:
v1.requires_grad

True

In [21]:
v2.requires_grad

False

In [22]:
v_res.requires_grad

True

### Calculate gradient

In [24]:
v_res.backward()

In [25]:
v1.grad

tensor([2., 2.])

This means that increasing any element of ```v1``` by one, the resulting value of ```v_res``` will grow by two. 

## NN building blocks

In [26]:
import torch.nn as nn

In [27]:
l = nn.Linear(2,5)
v = torch.FloatTensor([1,2])

In [28]:
l(v)

tensor([ 1.4664,  0.7280, -0.0961,  0.9867,  0.8560], grad_fn=<AddBackward0>)

In [29]:
l

Linear(in_features=2, out_features=5, bias=True)

In [30]:
l.state_dict()

OrderedDict([('weight',
              tensor([[ 0.2615,  0.4348],
                      [ 0.2027,  0.4945],
                      [-0.4220,  0.0338],
                      [-0.3141,  0.4801],
                      [ 0.2592,  0.0579]])),
             ('bias', tensor([ 0.3353, -0.4638,  0.2582,  0.3405,  0.4810]))])

In [38]:
(1 * 0.2615  + 2 * 0.4348 + 0.3353, 
 1 * 0.2027  + 2 * 0.4945 + (-0.4638),
 1 * -0.4220 + 2 * 0.0338 + 0.2583, 
 1 * -0.3141 + 2 * 0.4801 + 0.3405, 
 1 * 0.2592  + 2 * 0.0579 + 0.4810)

(1.4664, 0.7279, -0.09610000000000002, 0.9866000000000001, 0.856)

In [39]:
l(v)

tensor([ 1.4664,  0.7280, -0.0961,  0.9867,  0.8560], grad_fn=<AddBackward0>)

In [40]:
s = nn.Sequential(
    nn.Linear(2, 5), 
    nn.ReLU(), 
    nn.Linear(5, 20),
    nn.ReLU(),
    nn.Linear(20, 10),
    nn.Dropout(p=0.3), 
    nn.Softmax(dim=1)
)

In [41]:
s

Sequential(
  (0): Linear(in_features=2, out_features=5, bias=True)
  (1): ReLU()
  (2): Linear(in_features=5, out_features=20, bias=True)
  (3): ReLU()
  (4): Linear(in_features=20, out_features=10, bias=True)
  (5): Dropout(p=0.3, inplace=False)
  (6): Softmax(dim=1)
)

In [55]:
minibatch = torch.FloatTensor([[1,2], [3,4]])

In [56]:
s(minibatch)

tensor([[0.1762, 0.0860, 0.0850, 0.0718, 0.1025, 0.0974, 0.0864, 0.0850, 0.1363,
         0.0734],
        [0.2112, 0.0728, 0.2316, 0.0516, 0.1020, 0.0763, 0.0519, 0.0543, 0.0941,
         0.0543]], grad_fn=<SoftmaxBackward>)