In [2]:
'''
How to work with and operations on Tensors
'''
import torch
import numpy as np

data = [[1, 2], [3, 4]]
np_array = np.array(data)
x_data = torch.tensor(data)
x_np = torch.from_numpy(np_array)
x_ones = torch.ones_like(x_data) # retains the properties of x_data
x_rand = torch.rand_like(x_data, dtype=torch.float) # overrides the datatype of x_data
shape = (2, 3,)
rand_tensor = torch.rand(shape)
ones_tensor = torch.ones(shape)
zeros_tensor = torch.zeros(shape)
tensor = torch.rand(3, 4)
if torch.cuda.is_available(): # We move our tensor to the GPU if available
  tensor = tensor.to('cuda')
  print(f"Device tensor is stored on: {tensor.device}\n")
t1 = torch.cat([tensor, tensor, tensor], dim=0) # 0 = vertical, 1 = horizontal


print(type(data))
print(data)
print()
print(type(x_data))
print(x_data)
print()
print(type(np_array))
print(np_array)
print()
print(type(x_np))
print(x_np)
print()
print(f"Ones Tensor: \n {x_ones} \n")
print(f"Random Tensor: \n {x_rand} \n")
print(f"Random Tensor: \n {rand_tensor} \n")
print(f"Ones Tensor: \n {ones_tensor} \n")
print(f"Zeros Tensor: \n {zeros_tensor}\n")
print(f"Shape of rand_tensor: \n {rand_tensor.shape}\n")
print(f"Shape of tensor: {tensor.shape}")
print(f"Datatype of tensor: {tensor.dtype}")
print(f"Device tensor is stored on: {tensor.device}\n")
print(f"{t1}\n")
print(f"tensor.mul(tensor) \n {tensor.mul(tensor)} \n")#  This computes the element-wise product
print(f"tensor * tensor \n {tensor * tensor}\n") # Alternative syntax
print(f"tensor.matmul(tensor.T) \n {tensor.matmul(tensor.T)} \n") # matrix multiplication
print(f"tensor @ tensor.T \n {tensor @ tensor.T}\n") # Alternative syntax
print(tensor, "\n")
tensor.add_(5)
print(tensor)

Device tensor is stored on: cuda:0

<class 'list'>
[[1, 2], [3, 4]]

<class 'torch.Tensor'>
tensor([[1, 2],
        [3, 4]])

<class 'numpy.ndarray'>
[[1 2]
 [3 4]]

<class 'torch.Tensor'>
tensor([[1, 2],
        [3, 4]])

Ones Tensor: 
 tensor([[1, 1],
        [1, 1]]) 

Random Tensor: 
 tensor([[0.7630, 0.0363],
        [0.2244, 0.7537]]) 

Random Tensor: 
 tensor([[0.9191, 0.3500, 0.8524],
        [0.6025, 0.1849, 0.6946]]) 

Ones Tensor: 
 tensor([[1., 1., 1.],
        [1., 1., 1.]]) 

Zeros Tensor: 
 tensor([[0., 0., 0.],
        [0., 0., 0.]])

Shape of rand_tensor: 
 torch.Size([2, 3])

Shape of tensor: torch.Size([3, 4])
Datatype of tensor: torch.float32
Device tensor is stored on: cuda:0

tensor([[0.5874, 0.6082, 0.0659, 0.0480],
        [0.0012, 0.1443, 0.8151, 0.5404],
        [0.7090, 0.5043, 0.4078, 0.3298],
        [0.5874, 0.6082, 0.0659, 0.0480],
        [0.0012, 0.1443, 0.8151, 0.5404],
        [0.7090, 0.5043, 0.4078, 0.3298],
        [0.5874, 0.6082, 0.0659, 0.0480],

In [5]:
'''
Hoe torch.autograd werkt
'''

import torch
from torchvision.models import resnet18, ResNet18_Weights
model = resnet18(weights=ResNet18_Weights.DEFAULT)
data = torch.rand(1, 3, 64, 64)
labels = torch.rand(1, 1000)
print(labels)

print(f"model parameter gradients before backwards propagation\n")
for idx, param in enumerate(model.parameters()):
    if idx == 1:
        print(f"{param.grad}\n")

prediction = model(data)
loss = (prediction - labels).sum()
loss.backward()

print(f"model param grads after backprop\n")
for idx, param in enumerate(model.parameters()):
    if idx == 1:
        print(f"{param.grad}\n")

print(f"model parameter values before gradient descent propagation\n")
for idx, param in enumerate(model.parameters()):
    if idx == 1:
        print(f"{param.data}\n")

optim = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9)
optim.step()

print(f"model parameter values after gradient descent propagation\n")
for idx, param in enumerate(model.parameters()):
    if idx == 1:
        print(f"{param.data}\n")

tensor([[1.5355e-02, 3.7194e-01, 4.8769e-01, 2.5854e-01, 1.0458e-01, 3.5576e-01,
         8.5156e-01, 3.8040e-01, 7.2108e-01, 6.1770e-02, 4.8500e-01, 7.7654e-01,
         1.6560e-01, 7.3190e-01, 4.6175e-01, 8.2499e-01, 9.1457e-01, 1.2807e-01,
         3.8200e-01, 8.9577e-01, 4.0654e-02, 5.8433e-02, 4.5897e-01, 8.9397e-01,
         8.9211e-01, 7.8768e-01, 7.3600e-01, 7.1633e-01, 8.9713e-01, 8.2204e-01,
         4.7312e-01, 4.4901e-01, 6.0948e-01, 1.0111e-01, 6.3374e-01, 4.4760e-01,
         9.8110e-01, 7.2244e-01, 4.9036e-01, 2.5769e-01, 2.1482e-01, 8.5950e-01,
         7.2001e-01, 7.5535e-01, 4.1619e-01, 8.6011e-01, 3.4187e-02, 6.6285e-01,
         6.0646e-01, 2.8766e-01, 6.5285e-01, 2.8390e-01, 7.9110e-01, 2.1669e-01,
         9.9682e-01, 3.9596e-01, 2.9570e-02, 4.9033e-01, 7.5687e-01, 3.0433e-01,
         2.0902e-01, 8.2820e-01, 7.6873e-02, 5.9432e-01, 8.2243e-02, 9.3149e-01,
         2.1099e-01, 2.0438e-01, 8.5684e-01, 5.9310e-01, 6.0987e-01, 8.9900e-01,
         8.7055e-02, 8.9929e

In [32]:
'''
Explanation of differentiation in autograd
'''

import torch

a = torch.tensor([2., 3.], requires_grad=True)
b = torch.tensor([6., 4.], requires_grad=True)

Q = 3*a**3 - b**2
print(Q)

external_grad = torch.tensor([1., 1.])
Q.backward(gradient=external_grad)

# check if collected gradients are correct
print(9*a**2 == a.grad)
print(-2*b == b.grad)
print(Q)

tensor([-12.,  65.], grad_fn=<SubBackward0>)
tensor([True, True])
tensor([True, True])
tensor([-12.,  65.], grad_fn=<SubBackward0>)


In [2]:
'''
How to freeze (part of) a CNN
'''

from torch import nn, optim
from torchvision.models import resnet18, ResNet18_Weights

model = resnet18(weights=ResNet18_Weights.DEFAULT)

# for layer in model.named_parameters():
#     print(layer)

# Freeze all the parameters in the network
for param in model.parameters():
    param.requires_grad = False

model.fc = nn.Linear(512, 10)

# Optimize only the classifier
optimizer = optim.SGD(model.parameters(), lr=1e-2, momentum=0.9)

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        # 1 input image channel, 6 output channels, 5x5 square convolution
        # kernel
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(16 * 5 * 5, 120)  # 5*5 from image dimension
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, input):
        # Convolution layer C1: 1 input image channel, 6 output channels,
        # 5x5 square convolution, it uses RELU activation function, and
        # outputs a Tensor with size (N, 6, 28, 28), where N is the size of the batch
        c1 = F.relu(self.conv1(input))
        # Subsampling layer S2: 2x2 grid, purely functional,
        # this layer does not have any parameter, and outputs a (N, 6, 14, 14) Tensor
        s2 = F.max_pool2d(c1, (2, 2))
        # Convolution layer C3: 6 input channels, 16 output channels,
        # 5x5 square convolution, it uses RELU activation function, and
        # outputs a (N, 16, 10, 10) Tensor
        c3 = F.relu(self.conv2(s2))
        # Subsampling layer S4: 2x2 grid, purely functional,
        # this layer does not have any parameter, and outputs a (N, 16, 5, 5) Tensor
        s4 = F.max_pool2d(c3, 2)
        # Flatten operation: purely functional, outputs a (N, 400) Tensor
        s4 = torch.flatten(s4, 1)
        # Fully connected layer F5: (N, 400) Tensor input,
        # and outputs a (N, 120) Tensor, it uses RELU activation function
        f5 = F.relu(self.fc1(s4))
        # Fully connected layer F6: (N, 120) Tensor input,
        # and outputs a (N, 84) Tensor, it uses RELU activation function
        f6 = F.relu(self.fc2(f5))
        # Gaussian layer OUTPUT: (N, 84) Tensor input, and
        # outputs a (N, 10) Tensor
        output = self.fc3(f6)
        return output


net = Net()
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [27]:
params = list(net.parameters())
print(len(params))
print(params[0].size())  # conv1's .weight

10
torch.Size([6, 1, 5, 5])


In [31]:
from PIL import Image
from torchvision import transforms as transform

im = Image.open('./eagle.jpg')
imTensor = transform.functional.pil_to_tensor(im)
imTensor = imTensor.float()
imTensor = imTensor.requires_grad_(True)
print(imTensor)

tensor([[[37., 36., 35.,  ..., 51., 52., 52.],
         [38., 37., 37.,  ..., 51., 51., 52.],
         [39., 39., 38.,  ..., 52., 51., 51.],
         ...,
         [89., 89., 89.,  ..., 26., 30., 32.],
         [88., 88., 88.,  ..., 31., 35., 37.],
         [87., 88., 88.,  ..., 33., 37., 39.]],

        [[47., 46., 44.,  ..., 59., 60., 60.],
         [48., 47., 46.,  ..., 59., 59., 60.],
         [49., 49., 48.,  ..., 60., 59., 59.],
         ...,
         [84., 84., 84.,  ..., 27., 29., 31.],
         [83., 83., 83.,  ..., 32., 34., 36.],
         [82., 83., 83.,  ..., 34., 36., 38.]],

        [[56., 55., 53.,  ..., 61., 62., 62.],
         [57., 56., 55.,  ..., 61., 61., 62.],
         [58., 58., 57.,  ..., 62., 61., 61.],
         ...,
         [54., 54., 54.,  ..., 32., 37., 39.],
         [53., 53., 53.,  ..., 37., 40., 42.],
         [52., 53., 53.,  ..., 39., 42., 44.]]], requires_grad=True)


In [20]:
input = torch.randn(1, 1, 32, 32)
out = net(input)
print(out)

tensor([[ 0.0913,  0.0627,  0.0170, -0.0013, -0.0475, -0.0181,  0.0994, -0.0417,
         -0.0979, -0.1149]], grad_fn=<AddmmBackward0>)


In [21]:
net.zero_grad()
out.backward(torch.randn(1, 10))

In [22]:
output = net(input)
target = torch.randn(10)  # a dummy target, for example
target = target.view(1, -1)  # make it the same shape as output
criterion = nn.MSELoss()

loss = criterion(output, target)
print(loss)

tensor(1.0830, grad_fn=<MseLossBackward0>)


In [23]:
print(loss.grad_fn)  # MSELoss
print(loss.grad_fn.next_functions[0][0])  # Linear
print(loss.grad_fn.next_functions[0][0].next_functions[0][0])  # ReLU

<MseLossBackward0 object at 0x73f1a5bc63b0>
<AddmmBackward0 object at 0x73f1a5bc4b50>
<AccumulateGrad object at 0x73f1a5bc4b50>


In [24]:
net.zero_grad()     # zeroes the gradient buffers of all parameters

print('conv1.bias.grad before backward')
print(net.conv1.bias.grad)

loss.backward()

print('conv1.bias.grad after backward')
print(net.conv1.bias.grad)

conv1.bias.grad before backward
None
conv1.bias.grad after backward
tensor([-0.0094,  0.0021, -0.0162, -0.0068, -0.0159, -0.0153])


In [25]:
learning_rate = 0.01
for f in net.parameters():
    f.data.sub_(f.grad.data * learning_rate)

In [26]:
import torch.optim as optim

# create your optimizer
optimizer = optim.SGD(net.parameters(), lr=0.01)

# in your training loop:
optimizer.zero_grad()   # zero the gradient buffers
output = net(input)
loss = criterion(output, target)
loss.backward()
optimizer.step()    # Does the update