In [2]:
from fastai.imports import *
from fastai.transforms import *
from fastai.conv_learner import *
from fastai.model import *
from fastai.dataset import *
from fastai.sgdr import *
from fastai.plots import *

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F


class MNISTConvNet(nn.Module):

    def __init__(self):
        # this is the place where you instantiate all your modules
        # you can later access them using the same names you've given them in
        # here
        super(MNISTConvNet, self).__init__()
#         self.conv1 = nn.Conv2d(1, 10, 5)
#         self.pool1 = nn.MaxPool2d(2, 2)
#         self.conv2 = nn.Conv2d(10, 20, 5)
#         self.pool2 = nn.MaxPool2d(2, 2)
#         self.fc1 = nn.Linear(320, 50)
#         self.fc2 = nn.Linear(50, 10)
        self.sqn = nn.Sequential(nn.Conv2d(1, 10, 5), nn.ReLU(),nn.MaxPool2d(2, 2),
                                nn.Conv2d(10, 20, 5), nn.ReLU(), nn.MaxPool2d(2, 2),
                                Flatten(), nn.Linear(320, 50), nn.ReLU(), nn.Linear(50, 10),
                                nn.ReLU())

    # it's the forward function that defines the network structure
    # we're accepting only a single input in here, but if you want,
    # feel free to use more
    def forward(self, input):
#         x = self.pool1(F.relu(self.conv1(input)))
#         x = self.pool2(F.relu(self.conv2(x)))
#         x = x.view(x.size(0), -1)
#         x = F.relu(self.fc1(x))
#         x = F.relu(self.fc2(x))
        x = self.sqn(input)
        return x


In [4]:
net = MNISTConvNet()
print(net)

MNISTConvNet(
  (sqn): Sequential(
    (0): Conv2d(1, 10, kernel_size=(5, 5), stride=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), dilation=(1, 1), ceil_mode=False)
    (3): Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), dilation=(1, 1), ceil_mode=False)
    (6): Flatten(
    )
    (7): Linear(in_features=320, out_features=50, bias=True)
    (8): ReLU()
    (9): Linear(in_features=50, out_features=10, bias=True)
    (10): ReLU()
  )
)


In [5]:
def printgradnorm(self, grad_input, grad_output):
    print('Inside ' + self.__class__.__name__ + ' backward')
    print('Inside class:' + self.__class__.__name__)
    print('')
    print('grad_input: ', type(grad_input))
    print('grad_input[0]: ', type(grad_input[0]))
    print('grad_output: ', type(grad_output))
    print('grad_output[0]: ', type(grad_output[0]))
    print('')
    print('grad_input size:', grad_input[0].size())
    print('grad_output size:', grad_output[0].size())
    print('grad_input norm:', grad_input[0].norm())

In [6]:
inp = torch.randn(1, 1, 28, 28)

In [None]:
net.conv2.register_backward_hook(printgradnorm)

In [7]:
net = to_gpu(net)

In [8]:
target = V(torch.LongTensor([3]))
loss_fn = nn.CrossEntropyLoss() 

In [25]:
out = net(V(inp))
err = loss_fn(out, target)
err.backward(retain_graph=True)

Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.autograd.variable.Variable'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.autograd.variable.Variable'>

grad_input size: torch.Size([1, 10, 12, 12])
grad_output size: torch.Size([1, 20, 8, 8])
grad_input norm: Variable containing:
 0.1056
[torch.cuda.FloatTensor of size 1 (GPU 0)]



In [None]:
out.shape

In [26]:
one_hot = torch.FloatTensor(1, out.size()[-1]).zero_()
one_hot[0][0] = 1.0
one_hot = to_gpu(one_hot)

In [None]:
one_hot

In [27]:
out.backward(gradient=one_hot, retain_graph=True)

Inside Conv2d backward
Inside class:Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.autograd.variable.Variable'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.autograd.variable.Variable'>

grad_input size: torch.Size([1, 10, 12, 12])
grad_output size: torch.Size([1, 20, 8, 8])
grad_input norm: Variable containing:
 0
[torch.cuda.FloatTensor of size 1 (GPU 0)]



In [9]:
net_list = list(net.named_modules())

In [12]:
net_list[]

[('', MNISTConvNet(
    (sqn): Sequential(
      (0): Conv2d(1, 10, kernel_size=(5, 5), stride=(1, 1))
      (1): ReLU()
      (2): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), dilation=(1, 1), ceil_mode=False)
      (3): Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1))
      (4): ReLU()
      (5): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), dilation=(1, 1), ceil_mode=False)
      (6): Flatten(
      )
      (7): Linear(in_features=320, out_features=50, bias=True)
      (8): ReLU()
      (9): Linear(in_features=50, out_features=10, bias=True)
      (10): ReLU()
    )
  )), ('sqn', Sequential(
    (0): Conv2d(1, 10, kernel_size=(5, 5), stride=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), dilation=(1, 1), ceil_mode=False)
    (3): Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), dilation=(1, 1), ceil_mode=False)
    (6): Flatten(
    )
    (7): Linear(in_features=320, out_features=50,

In [24]:
net_list[-8][1].register_backward_hook(printgradnorm)

<torch.utils.hooks.RemovableHandle at 0x7f3e47caa208>