In [1]:
import torch
from torch import nn
import torch.nn.functional as F
from torchvision import datasets, transforms

# Define a transform to normalize the data
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
                                ])
# Download and load the training data
trainset = datasets.MNIST('~/.pytorch/MNIST_data/', download=True, train=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

In [2]:
# Build a feed-forward network
model = nn.Sequential(nn.Linear(784, 128),
                        nn.ReLU(),
                        nn.Linear(128, 64),
                        nn.ReLU(),
                        nn.Linear(64, 10))

# Define the loss
criterion = nn.CrossEntropyLoss()

# Get our data
images, labels = next(iter(trainloader))

# Flatten images
images = images.view(images.shape[0], -1)

# Forward pass, get our logits
logits = model(images)
# Calculate the loss the logits and the labels
loss = criterion(logits, labels)

print(loss)

tensor(2.2997, grad_fn=<NllLossBackward>)


In [3]:
# Build a feed-forward network
model = nn.Sequential(nn.Linear(784, 128),
                        nn.ReLU(),
                        nn.Linear(128, 64),
                        nn.ReLU(),
                        nn.Linear(64, 10),
                        nn.LogSoftmax(dim=1))

# Define the loss
criterion = nn.NLLLoss()

# Get our data
images, labels = next(iter(trainloader))

# Flatten images
images = images.view(images.shape[0], -1)

# Forward pass, get our logits
logits = model(images)
# Calculate the loss the logits and the labels
loss = criterion(logits, labels)

print(loss)

tensor(2.2904, grad_fn=<NllLossBackward>)


In [4]:
x = torch.randn(2, 2, requires_grad=True)
print(x)

tensor([[ 0.1774, -1.0488],
        [-0.3987,  0.0088]], requires_grad=True)


In [5]:
y = x**2
print(y)

tensor([[3.1476e-02, 1.0999e+00],
        [1.5896e-01, 7.6836e-05]], grad_fn=<PowBackward0>)


In [6]:
print(y.grad_fn)

<PowBackward0 object at 0x00000219F7674B00>


In [7]:
z = y.mean()
print(z)

tensor(0.3226, grad_fn=<MeanBackward1>)


In [8]:
print(x.grad)

None


In [9]:
z.backward()
print(x.grad)
print(x/2)

tensor([[ 0.0887, -0.5244],
        [-0.1994,  0.0044]])
tensor([[ 0.0887, -0.5244],
        [-0.1994,  0.0044]], grad_fn=<DivBackward0>)


In [10]:
# Build a feed-forward network
model = nn.Sequential(nn.Linear(784, 128),
                        nn.ReLU(),
                        nn.Linear(128, 64),
                        nn.ReLU(),
                        nn.Linear(64, 10),
                        nn.LogSoftmax(dim=1))

# Define the loss
criterion = nn.NLLLoss()
# Get our data
images, labels = next(iter(trainloader))
# Flatten images
images = images.view(images.shape[0], -1)
# Forward pass, get our logits
logps = model(images)
# Calculate the loss the logits and the labels
loss = criterion(logps, labels)

print(loss)

tensor(2.3046, grad_fn=<NllLossBackward>)


In [11]:
print('Before backward pass: \n', model[0].weight.grad)
loss.backward()
print('After backward pass: \n', model[0].weight.grad)

Before backward pass: 
 None
After backward pass: 
 tensor([[-0.0013, -0.0013, -0.0013,  ..., -0.0013, -0.0013, -0.0013],
        [ 0.0059,  0.0059,  0.0059,  ...,  0.0059,  0.0059,  0.0059],
        [-0.0011, -0.0011, -0.0011,  ..., -0.0011, -0.0011, -0.0011],
        ...,
        [-0.0021, -0.0021, -0.0021,  ..., -0.0021, -0.0021, -0.0021],
        [-0.0002, -0.0002, -0.0002,  ..., -0.0002, -0.0002, -0.0002],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]])


In [12]:
from torch import optim
optimizer = optim.SGD(model.parameters(), lr = 0.01)

In [13]:
print('Initial weights - ', model[0].weight)
images, labels = next(iter(trainloader))
images.resize_(64, 784)

#Clear the gradients, do this because gradients are accumulated
optimizer.zero_grad()

#Forward pass, then backward pass, then update weights
output = model.forward(images)
loss = criterion(output, labels)
loss.backward()
print('Gradient -', model[0].weight.grad)

Initial weights -  Parameter containing:
tensor([[ 0.0267,  0.0014,  0.0325,  ..., -0.0158, -0.0240, -0.0008],
        [ 0.0106, -0.0123, -0.0140,  ...,  0.0140, -0.0105, -0.0207],
        [ 0.0173, -0.0337, -0.0060,  ..., -0.0191, -0.0211,  0.0284],
        ...,
        [ 0.0276,  0.0223, -0.0297,  ...,  0.0222,  0.0241, -0.0159],
        [-0.0152,  0.0143, -0.0239,  ..., -0.0207, -0.0329,  0.0008],
        [ 0.0118,  0.0167,  0.0290,  ..., -0.0135, -0.0130, -0.0036]],
       requires_grad=True)
Gradient - tensor([[ 0.0001,  0.0001,  0.0001,  ...,  0.0001,  0.0001,  0.0001],
        [ 0.0020,  0.0020,  0.0020,  ...,  0.0020,  0.0020,  0.0020],
        [ 0.0002,  0.0002,  0.0002,  ...,  0.0002,  0.0002,  0.0002],
        ...,
        [ 0.0056,  0.0056,  0.0056,  ...,  0.0056,  0.0056,  0.0056],
        [-0.0003, -0.0003, -0.0003,  ..., -0.0003, -0.0003, -0.0003],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]])


In [14]:
optimizer.step()
print('Update weights -', model[0].weight)

Update weights - Parameter containing:
tensor([[ 0.0267,  0.0014,  0.0325,  ..., -0.0158, -0.0240, -0.0008],
        [ 0.0106, -0.0123, -0.0140,  ...,  0.0140, -0.0105, -0.0208],
        [ 0.0173, -0.0337, -0.0060,  ..., -0.0191, -0.0211,  0.0284],
        ...,
        [ 0.0275,  0.0223, -0.0297,  ...,  0.0222,  0.0241, -0.0159],
        [-0.0152,  0.0143, -0.0239,  ..., -0.0207, -0.0329,  0.0008],
        [ 0.0118,  0.0167,  0.0290,  ..., -0.0135, -0.0130, -0.0036]],
       requires_grad=True)
