In [5]:
import torch
import numpy as np


In [3]:
from torchvision.models import resnet18, ResNet18_Weights



  warn(
Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /home/zhicun/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:01<00:00, 33.6MB/s]


In [4]:
model = resnet18(weights=ResNet18_Weights.DEFAULT)
data = torch.rand(1, 3, 64, 64)
labels = torch.rand(1, 1000)
prediction = model(data) # forward pass
loss = (prediction - labels).sum()
loss.backward() # callculates and stores the gradients for each model parameter in the parameter's .grid attribute
optim = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9)
optim.step() # initiate gradient descent, adjust parameters by its gradient

In [37]:
# differentiation in Autograd
a = torch.tensor([2., 3.], requires_grad=True)
b = torch.tensor([6., 4.], requires_grad=True)
Q=3*a**3 - b**2
external_grad = torch.tensor([2., 2.])
Q.backward(external_grad)
print(a.grad)
print(b.grad)

tensor([ 72., 162.])
tensor([-24., -16.])


In [30]:
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        # 1 input image channel, 6 output channels, 5x5 square convolution
        # kernel
        self.conv1 = nn.Conv2d(1,6,5)
        self.conv2 = nn.Conv2d(6,16,5)
        # an affine operation: y=wx+b
        self.fc1 = nn.Linear(16*5*5, 120) # 5*5 from image dimension
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
    
    def forward(self, x):
        # max pooling over a (2,2) window
        x = F.max_pool2d(F.relu(self.conv1(x)), (2,2)) # max_pool2d uses kernel_size as its default value of stride
        # if the size is a square, you can specify a single number
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = torch.flatten(x,1) # flatten all dimensions except the batch dimension
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)

        return x
net = Net()
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [25]:
params = list(net.parameters())
print(len(params))
print(params[1].size())

10
torch.Size([6])


In [43]:
# forward
input = torch.randn(1, 1, 32, 32)
out = net(input)
print(out)

tensor([[-0.0672, -0.1044, -0.0029, -0.0333,  0.0744, -0.0528, -0.0457,  0.0217,
         -0.0193, -0.1134]], grad_fn=<AddmmBackward0>)


In [44]:
net.zero_grad()
out.backward(torch.randn(1,10)) # g'(B)*Vn

In [46]:
# Loss function
output = net(input)
target = torch.randn(10)
print(target)
target = target.view(1,-1) # make it the same shape as output
criterion = nn.MSELoss()
loss = criterion(output, target)
print(loss)

tensor([ 0.1726,  0.9573,  0.5493, -0.9286,  0.8087,  0.2284,  1.9443,  0.1414,
         0.5538,  0.2184])
tensor(0.7322, grad_fn=<MseLossBackward0>)


In [51]:
def print_grad_fn_chain(tensor):
    chain = []
    grad_fn = tensor.grad_fn

    while grad_fn is not None:
        chain.append(str(grad_fn).split('(')[0])
        if grad_fn.next_functions:
            grad_fn = grad_fn.next_functions[0][0]
        else:
            grad_fn = None

    print(" -> ".join(chain))

print_grad_fn_chain(loss)



<MseLossBackward0 object at 0x7fb87679aa70> -> <AddmmBackward0 object at 0x7fb87679a410> -> <AccumulateGrad object at 0x7fb87684a560>
<MseLossBackward0 object at 0x7fb876799510>


In [52]:
# Backprop
net.zero_grad()
print(net.conv1.bias.grad)

loss.backward()
print(net.conv1.bias.grad)


None
tensor([-5.9789e-03, -9.6068e-03,  8.3356e-03,  3.9717e-05, -5.2271e-04,
        -3.0644e-03])


# UPdating weights
weight = weight - learning_rate * gradient
```python
learning_rate = 0.01
for f in net.parameters():
    f.data.sub_(f.grad.data * learning_rate)
```

Other different update rules: SGD, Adam, etc.
Use package `torch.optim`

In [None]:
# Updating the weights
import torch.optim as optim
optimizer = optim.SGD(net.parameters(), lr=0.01)
optimizer.zero_grad()
output = net(input)
loss = criterion(output, target)
loss.backward()
optimizer.step() # does the update

In [3]:
import numpy as np
terminated: np.ndarray = (
            (1<2)
            | (2>3)
        )
print(terminated)
reward = np.ones_like(terminated, dtype=np.float32)
print(reward)


True
1.0


tensor([[-5.9967e-21,  3.0812e-41,  0.0000e+00,  0.0000e+00, -5.2646e-21],
        [ 3.0812e-41, -6.5553e-22,  3.0812e-41, -9.4189e-27,  4.5864e-41],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00]])


NameError: name 'nn' is not defined