In [1]:
import torch
torch.__version__

'2.1.1+cpu'

In [2]:
import numpy as np
np.__version__

'1.26.2'

In [3]:
data = [[1, 2],[3, 4]]
x_data = torch.tensor(data)
x_data.dim()

2

### To Numpy


In [4]:
np_array = np.array(data)
x_np = torch.from_numpy(np_array)

### Random Tensor with dimenions of data
### Ones Tensor with dimenions of data

In [5]:
x_ones = torch.ones_like(x_data)
print(f"Ones Tensor: \n {x_ones} \n")

x_rand = torch.rand_like(x_data, dtype=torch.float)
print(f"Random Tensor: \n {x_rand} \n")

Ones Tensor: 
 tensor([[1, 1],
        [1, 1]]) 

Random Tensor: 
 tensor([[0.7834, 0.9718],
        [0.7524, 0.0869]]) 



## Introduction to Autograd

In [6]:
from torchvision.models import resnet18, ResNet18_Weights
model = resnet18(pretrained=True)
data = torch.rand(1, 3, 224, 224)
labels = torch.rand(1, 1000)

  warn(
Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /home/angel/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100.0%


Forward Pass

In [7]:
prediction = model(data)

Backward Pass

In [8]:
loss = (prediction - labels).sum()
loss.backward()

Loading an Optimizer (SGD)

In [9]:
optim = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9)

In [10]:
optim.step()

Differentiation in Autograd

In [11]:
import torch 

a = torch.tensor([2., 3.], requires_grad=True)
b = torch.tensor([6., 4.], requires_grad=True)

In [12]:
Q = 3*a**3 - b**2

In [13]:
external_grad = torch.tensor([1., 1.])
Q.backward(gradient=external_grad)

In [14]:
# check if collected gradients are correct
print(9*a**2 == a.grad)
print(-2*b == b.grad)

tensor([True, True])
tensor([True, True])


## Introduction to Freezing Parameters

In [15]:
from torch import nn, optim

model = resnet18(weights=ResNet18_Weights.DEFAULT)

# Freeze all the parameters in the network
for param in model.parameters():
    param.requires_grad = False

In [16]:
model.fc = nn.Linear(512, 10)

In [17]:
optimizer = optim.SGD(model.fc.parameters(), lr=1e-2, momentum=0.9)


## Now We are getting into good stuff
### Creating a Neural network with nn

In [18]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__() # call the init function of nn.Module

        # 1 input image channel, 6 output channels, 5x5 square convolution
        # kernel
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)

        # an affine operation: y = Wx + b
        # nn.Linear applies a linear transformation to the incoming data: y=xA^T+b

        self.fc1 = nn.Linear(16 * 5 * 5, 120) # 5*5 from image dimension
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10) # 10 output channels
    
    def forward(self, x):
        # Max pooling over a (2, 2) window
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))

        # If the size is a square you can only specify a single number
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = torch.flatten(x, 1) # flatten all dimensions except batch dimension
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
net = Net()
print(net)


Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [19]:
params = list(net.parameters())
print(len(params))
print(params[0].size()) # conv1's weight

10
torch.Size([6, 1, 5, 5])


Trying on random dataset

In [20]:
input = torch.randn(1, 1, 32, 32)
out = net(input)
print(out)

tensor([[ 0.0231, -0.0506,  0.1119,  0.0956,  0.0191, -0.0771, -0.1032,  0.0847,
         -0.0130, -0.0889]], grad_fn=<AddmmBackward0>)


In [21]:
net.zero_grad()
out.backward(torch.randn(1, 10))

**Note**:
torch.nn only supports mini-batches. The entire torch.nn package only supports inputs that are a mini-batch of samples, and not a single sample.

For example, nn.Conv2d will take in a 4D Tensor of nSamples x nChannels x Height x Width.

If you have a single sample, just use input.unsqueeze(0) to add a fake batch dimension.

### Loss Function

In [22]:
output = net(input)
target = torch.randn(10) # a dummy target, for example
target = target.view(1, -1) # make it the same shape as output
criterion = nn.MSELoss()

loss = criterion(output, target)
print(loss)

tensor(0.7787, grad_fn=<MseLossBackward0>)


In [23]:
print(loss.grad_fn)  # MSELoss
print(loss.grad_fn.next_functions[0][0])  # Linear
print(loss.grad_fn.next_functions[0][0].next_functions[0][0])  # ReLU

<MseLossBackward0 object at 0x7f652513f280>
<AddmmBackward0 object at 0x7f652513d5a0>
<AccumulateGrad object at 0x7f652513f280>


### Backpropagation

In [24]:
net.zero_grad() # zeroes the gradient buffers of all parameters
print('conv1.bias.grad before backward')
print(net.conv1.bias.grad)

loss.backward()

print('conv1.bias.grad after backward')
print(net.conv1.bias.grad)

conv1.bias.grad before backward
None
conv1.bias.grad after backward
tensor([-0.0035,  0.0063,  0.0007,  0.0040,  0.0075,  0.0137])


Update the weights

In [27]:
learning_rate = 0.01
for f in net.parameters():
    f.data.sub_(f.grad.data * learning_rate)
