In [1]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import numpy as np
import torch
from torchvision import datasets, transforms
import helper
import matplotlib.pyplot as plt
from torch import nn, optim
import torch.nn.functional as F
from collections import OrderedDict

transform = transforms.Compose([transforms.ToTensor(),
                               transforms.Normalize((0.5), (0.5)) ])

trainset = datasets.MNIST('MNIST_data/', download=True, train=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

input_size = 784
hidden_size = [128, 64]
output_size = 10

model = nn.Sequential(OrderedDict([
    ('fc1', nn.Linear(input_size, hidden_size[0])),
    ('relu1', nn.ReLU()),
    ('fc2', nn.Linear(hidden_size[0], hidden_size[1])),
    ('relu2', nn.ReLU()),
    ('output', nn.Linear(hidden_size[1], output_size)),
    ('softmax', nn.Softmax(dim=1)) 
]))

In [4]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)
print('Before ', model.fc1.weight)

Before  Parameter containing:
tensor([[-0.0331, -0.0077,  0.0099,  ...,  0.0333, -0.0076, -0.0044],
        [-0.0269,  0.0005, -0.0016,  ...,  0.0335, -0.0160, -0.0019],
        [ 0.0311,  0.0073, -0.0064,  ..., -0.0276, -0.0079, -0.0327],
        ...,
        [ 0.0314,  0.0341,  0.0145,  ..., -0.0276,  0.0254, -0.0074],
        [ 0.0287,  0.0218, -0.0033,  ...,  0.0287, -0.0207,  0.0237],
        [ 0.0027, -0.0003, -0.0326,  ..., -0.0011, -0.0002,  0.0319]],
       requires_grad=True)


In [5]:
images, labels = next(iter(trainloader))
images.resize_(64, 784)

optimizer.zero_grad()

# Forward pass
output = model.forward(images)
loss = criterion(output, labels)
loss.backward()
print('Gradient ', model.fc1.weight.grad)
optimizer.step()

Gradient  tensor([[-9.1989e-05, -9.1989e-05, -9.1989e-05,  ..., -9.1989e-05,
         -9.1989e-05, -9.1989e-05],
        [-9.7096e-05, -9.7096e-05, -9.7096e-05,  ..., -9.7096e-05,
         -9.7096e-05, -9.7096e-05],
        [-3.5609e-05, -3.5609e-05, -3.5609e-05,  ..., -3.5609e-05,
         -3.5609e-05, -3.5609e-05],
        ...,
        [-1.8508e-05, -1.8508e-05, -1.8508e-05,  ..., -1.8508e-05,
         -1.8508e-05, -1.8508e-05],
        [ 8.7466e-05,  8.7466e-05,  8.7466e-05,  ...,  8.7466e-05,
          8.7466e-05,  8.7466e-05],
        [ 7.0942e-06,  7.0942e-06,  7.0942e-06,  ...,  7.0942e-06,
          7.0942e-06,  7.0942e-06]])


In [6]:
print('Updated weights -', model.fc1.weight)

Updated weights - Parameter containing:
tensor([[-0.0331, -0.0077,  0.0099,  ...,  0.0333, -0.0076, -0.0044],
        [-0.0269,  0.0005, -0.0016,  ...,  0.0335, -0.0160, -0.0019],
        [ 0.0311,  0.0073, -0.0064,  ..., -0.0276, -0.0079, -0.0327],
        ...,
        [ 0.0314,  0.0341,  0.0145,  ..., -0.0276,  0.0254, -0.0074],
        [ 0.0287,  0.0218, -0.0033,  ...,  0.0287, -0.0207,  0.0237],
        [ 0.0027, -0.0003, -0.0326,  ..., -0.0011, -0.0002,  0.0319]],
       requires_grad=True)
