In [3]:
import torch 
import torchvision
import torch.nn as nn
import numpy as np
import torchvision.transforms as transforms

# 1. Basic autograd example 1

### Create few tensors

In [24]:
x = torch.tensor(1., requires_grad=True)
w = torch.tensor(2., requires_grad=True)
b = torch.tensor(3., requires_grad=True)

### Build a computational graph

In [25]:
y = w * (x ** 2) + b 

### Compute gradients

In [26]:
y.backward()

### Check gradients

In [27]:
print(x.grad)
print(w.grad) 
print(b.grad)

tensor(4.)
tensor(1.)
tensor(1.)


# 2. Basic autograd example 2

### Create tensors of shape (10, 3) and (10, 2)

In [66]:
x = torch.randn(100, 3)
y = torch.randn(100, 2)

In [67]:
x

tensor([[ 0.9215, -1.4073, -0.5302],
        [ 0.5547, -0.2823, -0.3932],
        [ 0.0691, -0.3990, -0.2395],
        [ 1.2972, -0.0585, -0.0850],
        [ 0.6575,  0.9284,  0.1457],
        [-2.0764,  0.5302, -0.1363],
        [ 0.0066, -1.8347,  1.7140],
        [-1.2544, -0.6777, -0.2777],
        [-0.4475,  0.0069,  1.4448],
        [ 1.3214, -0.7625, -0.3772],
        [-0.1402, -0.9672,  0.2399],
        [ 1.9564,  0.9225, -0.2217],
        [ 1.8487,  1.2962, -0.1633],
        [-0.2877, -0.9131,  1.0923],
        [-0.0323, -0.8700,  1.0462],
        [ 0.6605, -0.3008, -1.0597],
        [ 0.9257,  0.9217,  1.2901],
        [-0.1252, -0.5676, -0.1535],
        [-0.2638,  0.7316, -2.3815],
        [-1.1360,  1.2347,  0.1379],
        [-1.0879,  0.0995,  0.0407],
        [-1.0970,  0.7856, -0.0886],
        [ 0.5573, -0.2244, -0.0288],
        [-2.6283, -0.6531,  0.1727],
        [ 0.2660,  0.1223,  0.6896],
        [ 0.8011, -0.6728,  0.7237],
        [-0.1747,  1.8706,  1.0029],
 

In [68]:
y

tensor([[-1.2203,  0.5786],
        [-0.9259, -0.0552],
        [ 1.9616,  0.3412],
        [ 0.3131,  0.8968],
        [-0.9103,  0.8659],
        [-0.3770, -1.3565],
        [ 0.2604, -0.0728],
        [-1.7524,  1.7006],
        [-0.1987, -0.3497],
        [ 1.0093,  0.4641],
        [ 1.5778, -1.1819],
        [ 0.6105, -0.3671],
        [-0.6600, -0.1554],
        [ 0.5545, -1.3491],
        [-0.3230,  0.2650],
        [ 1.2099,  0.7716],
        [-0.2739, -0.0027],
        [ 1.1089, -0.8220],
        [ 2.4110,  1.4030],
        [ 0.9272, -1.3365],
        [-0.9447, -0.0034],
        [ 0.5677, -2.5220],
        [ 0.5993,  0.3751],
        [-2.1560, -2.2711],
        [-0.9242,  0.6587],
        [ 0.2043, -0.0402],
        [-0.6449, -1.0218],
        [ 1.2493, -0.3288],
        [ 0.2363, -0.3167],
        [-0.3108,  0.3837],
        [ 0.7967, -0.6776],
        [-0.2798,  0.4664],
        [-0.2482, -0.5944],
        [ 0.9108,  1.4136],
        [ 0.0081,  0.4763],
        [-1.5018,  0

### Build a fully connected layer

In [70]:
linear = nn.Linear(3, 2)
print ('w: ', linear.weight)
print ('b: ', linear.bias)

w:  Parameter containing:
tensor([[ 0.4099,  0.0721, -0.1224],
        [-0.0996, -0.2001, -0.0365]], requires_grad=True)
b:  Parameter containing:
tensor([-0.2153,  0.2147], requires_grad=True)


In [71]:
linear

Linear(in_features=3, out_features=2, bias=True)

In [72]:
linear.weight.shape

torch.Size([2, 3])

In [73]:
linear.bias.shape

torch.Size([2])

### Build loss function and optimizer

In [74]:
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(linear.parameters(), lr=0.01)

### Forward pass

In [75]:
y_pred = linear(x)

In [76]:
y_pred

tensor([[ 1.2594e-01,  4.2386e-01],
        [ 3.9844e-02,  2.3030e-01],
        [-1.8641e-01,  2.9637e-01],
        [ 3.2264e-01,  1.0032e-01],
        [ 1.0327e-01, -4.1869e-02],
        [-1.0116e+00,  3.2032e-01],
        [-5.5462e-01,  5.1847e-01],
        [-7.4441e-01,  4.8532e-01],
        [-5.7513e-01,  2.0507e-01],
        [ 3.1757e-01,  2.4944e-01],
        [-3.7185e-01,  4.1338e-01],
        [ 6.8029e-01, -1.5660e-01],
        [ 6.5592e-01, -2.2277e-01],
        [-5.3276e-01,  3.8609e-01],
        [-4.1931e-01,  3.5374e-01],
        [ 1.6347e-01,  2.4781e-01],
        [ 7.2651e-02, -1.0906e-01],
        [-2.8877e-01,  3.4633e-01],
        [ 2.0699e-02,  1.8161e-01],
        [-6.0894e-01,  7.5712e-02],
        [-6.5911e-01,  3.0161e-01],
        [-5.9759e-01,  1.6997e-01],
        [ 4.9755e-04,  2.0514e-01],
        [-1.3610e+00,  6.0075e-01],
        [-1.8188e-01,  1.3853e-01],
        [-2.3989e-02,  2.4307e-01],
        [-2.7491e-01, -1.7885e-01],
        [ 2.5532e-01,  1.281

### Compute loss

In [77]:
loss = criterion(y_pred, y)
print('loss: ', loss.item())

loss:  1.061033010482788


### Backward pass

In [86]:
loss.backward()

### Print out the gradients

In [87]:
print ('dL/dw: ', linear.weight.grad) 
print ('dL/db: ', linear.bias.grad)

dL/dw:  tensor([[ 0.3284, -0.1349, -0.0966],
        [-0.2074, -0.1234,  0.0205]])
dL/db:  tensor([-0.0631,  0.1585])


### 1-step gradient descent

In [95]:
optimizer.step()

### Print out the loss after 1-step gradient descent

In [96]:
pred = linear(x)
loss = criterion(pred, y)
print('loss after 1 step optimization: ', loss.item())

loss after 1 step optimization:  1.0438811779022217
