In [14]:
import torch

# Examples from Dr Data Science

Example 1

In [15]:
x = torch.arange(4.0, requires_grad=True) # create a torch variable
print(x) 
print(x.grad) # Function to calculate gradient of any function y (currently not defined) against x. Thus dy/dx = [dy/da1(x1) dy/da2(x2) ... dy/dan(xn)]

y = torch.dot(x,x) # output
print(y)

y.backward() # calculate gradients dy/dx
print(x.grad)

print(x.grad == 2*x) # verify pytorch calculation against analytical solution

tensor([0., 1., 2., 3.], requires_grad=True)
None
tensor(14., grad_fn=<DotBackward0>)
tensor([0., 2., 4., 6.])
tensor([True, True, True, True])


Example 2

In [16]:
x.grad.zero_() # PyTorch accumulates the gradient by default, we need to clear the previous values
y = x.sum() # f(x) = a1 + a2 + a3 ... + an
y.backward() # df/da = [1 1 1 1]
print(x.grad)

tensor([1., 1., 1., 1.])


Example 3

In [23]:
x = torch.arange(3.0, requires_grad=True)
y = x*x # f(x) = [x1^2  x2^2  x3^2]. Note: x*x is element wise operation
y.backward(torch.tensor([1.0, 1.0, 1.0]))
print(x.grad)

tensor([0., 1., 4.], grad_fn=<MulBackward0>)
tensor([0., 2., 4.])


Example 4

In [32]:
a = torch.tensor([2.0, 3.0], requires_grad=True)
b = torch.tensor([6., 4.], requires_grad=True)
Q = 3*a**3 - b**2

# example 4.1
external_grad = torch.tensor([1,1]) # match number of variables. In this case we have a and b hence 2 variables.
Q.backward(gradient=external_grad, retain_graph=True) # retain_graph=True can be used if you want to calculate grads again and don't want to define the output function again.
print(a.grad, b.grad)

# example 4.2
a.grad.zero_()
b.grad.zero_()
external_grad = torch.tensor([1,0]) # match number of variables. In this case we have a and b hence 2 variables.
Q.backward(gradient=external_grad)
print(a.grad, b.grad)

tensor([36., 81.]) tensor([-12.,  -8.])
tensor([36.,  0.]) tensor([-12.,   0.])


Detaching computation

Example 5

In [38]:
# Example 5.1 NO detach
x = torch.arange(4., requires_grad=True)
y = x*x
u = y
z = u*x
z.sum().backward()
print(x.grad)

# Example 5.2 detach 
x = torch.arange(4., requires_grad=True)
y = x*x
u = y.detach() # in dz/dx, u will be constants
z = u*x
z.sum().backward()
print(x.grad)

tensor([ 0.,  3., 12., 27.])
tensor([0., 1., 4., 9.])


# Pytorch Beginner Tutorials

In [18]:
x = torch.ones(5)
y = torch.ones(3)
w = torch.randn(5, 3, requires_grad=True)
b = torch.randn(3, requires_grad=True)
z = x @ w + b

loss = torch.nn.functional.mse_loss(z,y)

In [19]:
print("The weights:")
print(w)

print("The inputs:")
print(x)

print("The outputs:")
print(z)

print("The labels:")
print(y)

print("Loss at the 0th iteration = ", loss)

The weights:
tensor([[ 0.2067,  2.0519,  1.3838],
        [-1.5333, -2.2768, -0.8264],
        [ 1.2000,  0.5559,  0.2384],
        [-0.6258, -0.8957, -0.7719],
        [ 1.1750,  0.5825,  1.0196]], requires_grad=True)
The inputs:
tensor([1., 1., 1., 1., 1.])
The outputs:
tensor([-1.7978,  0.4972,  1.7200], grad_fn=<AddBackward0>)
The labels:
tensor([1., 1., 1.])
Loss at the 0th iteration =  tensor(2.8663, grad_fn=<MseLossBackward0>)


In [20]:
print(f"Gradient function for z = {z.grad_fn}")
print(f"Gradient function for loss = {loss.grad_fn}")

Gradient function for z = <AddBackward0 object at 0x000001BDC0650DC0>
Gradient function for loss = <MseLossBackward0 object at 0x000001BDC05F2950>


In [21]:
# gradients
print("Gradients dloss/dw and dloss/db before calling loss.backward()")
print(w.grad) # dloss/dw
print(b.grad) # dloss/db

Gradients dloss/dw and dloss/db before calling loss.backward()
None
None


In [22]:
print("Gradients dloss/dw and dloss/db after calling loss.backward()")
loss.backward()
print(w.grad)
print(b.grad)

Gradients dloss/dw and dloss/db after calling loss.backward()
tensor([[-1.8652, -0.3352,  0.4800],
        [-1.8652, -0.3352,  0.4800],
        [-1.8652, -0.3352,  0.4800],
        [-1.8652, -0.3352,  0.4800],
        [-1.8652, -0.3352,  0.4800]])
tensor([-1.8652, -0.3352,  0.4800])


# Datasets and Dataloaders

In [39]:
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt

In [41]:
training_data = datasets.MNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor()
)

test_data = datasets.MNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to data\MNIST\raw\train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:02<00:00, 3533126.67it/s]


Extracting data\MNIST\raw\train-images-idx3-ubyte.gz to data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to data\MNIST\raw\train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 24838157.44it/s]

Extracting data\MNIST\raw\train-labels-idx1-ubyte.gz to data\MNIST\raw






Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to data\MNIST\raw\t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 3420474.40it/s]


Extracting data\MNIST\raw\t10k-images-idx3-ubyte.gz to data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to data\MNIST\raw\t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 4553185.65it/s]


Extracting data\MNIST\raw\t10k-labels-idx1-ubyte.gz to data\MNIST\raw

