# More about PyTorch

A lot of implementation details are missing in the PyTorch Tutorial. We will work on a couple of things that will be important later on.


In [16]:
# As usual, a bit of setup
from __future__ import print_function
import time
import numpy as np
import matplotlib.pyplot as plt
import platform
import torch
import torch.nn as nn
from torch.autograd import Variable

%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

print("Python version: ", platform.python_version())

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Python version:  3.6.3


# Weight Initialization at Tensor Level

At the tensor level, we can use nn.init to indirectly initialize the content of the Tensor.

There is another more direct way using .data.normal_

In [10]:
w = torch.Tensor(3, 5)
torch.nn.init.normal(w)  # This is like np.random.randn()

print (w)
print (type(w))

# work for Variables also
w2 = Variable(w)
torch.nn.init.normal(w2)

print (w2)
print (type(w2))


 0.9550 -0.4278 -0.2605  0.5902  0.6849
-0.0509 -0.6427 -2.3600  0.8400 -0.7931
 0.6833 -0.3046  0.3007 -0.5518 -0.4444
[torch.FloatTensor of size 3x5]

<class 'torch.FloatTensor'>
Variable containing:
 0.6825 -0.0320  0.0557  1.1974 -0.2419
 0.7679 -0.3447 -0.4307 -1.4092 -0.5008
 1.0139  1.9033  2.7284  0.1253  0.3340
[torch.FloatTensor of size 3x5]

<class 'torch.autograd.variable.Variable'>


In [11]:
# old styled direct access to tensors data attribute
w2.data.normal_()  # inplace modify
print (w2)

Variable containing:
 1.1246 -1.0840 -0.0515 -0.2475  0.3643
-0.5712  1.1921 -0.8186  0.5920 -0.5610
 0.3099 -0.5385 -1.3637 -0.2489 -2.5477
[torch.FloatTensor of size 3x5]



# Weight Initialization at Module Level

At the module level, we can use .apply(fn) to initialize the weights in the module. Note that we need to write the fn function.


In [35]:
# These are the functions that will be used to operate on the submodules used by .apply(fn)

def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        print("Apply weight init on ",classname)
        m.weight.data.normal_(0.0, 0.01)
    elif classname.find('Linear') != -1:
        print("Apply weight init on ",classname)
        m.weight.data.normal_(0.0, 0.01)
        m.bias.data.fill_(0)    
        
def display_weights(m):

    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        print(classname)
        print(m.weight.data)
    elif classname.find('Linear') != -1:
        print(classname)
        print(m.weight.data)
        print(m.bias.data)
    elif classname.find('BatchNorm') != -1:
        print(classname)
        print(m.weight.data)
        print(m.bias.data)


class Flatten(nn.Module):
    def forward(self, x):
        N, C, H, W = x.size() # read in N, C, H, W
        return x.view(N, -1)  # "flatten" the C * H * W values into a single vector per image
        
simple_model = nn.Sequential(
                nn.Conv2d(3, 32, kernel_size=7, stride=2),  # 3 input image channel, 32 output channels
                nn.ReLU(inplace=True),
                Flatten(), # see above for explanation
                nn.Linear(5408, 10), # affine layer
              )

## .apply(fn)

Applies fn recursively to every submodule (as returned by .children()) as well as self. Typical use includes initializing the parameters of a model (see also torch-nn-init)

http://pytorch.org/docs/master/nn.html

In [37]:
simple_model.apply(weights_init)

# simple_model.apply(display_weights)

Apply weight init on  Conv2d
Apply weight init on  Linear


Sequential(
  (0): Conv2d (3, 32, kernel_size=(7, 7), stride=(2, 2))
  (1): ReLU(inplace)
  (2): Flatten(
  )
  (3): Linear(in_features=5408, out_features=10)
)

## Direct Access


In [39]:
# The neural net is defined as a class
class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        # 1 input image channel, 6 output channels, 5x5 square convolution
        # kernel
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

        # Added as a standard direct initialization
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                classname = m.__class__.__name__
                print("Apply weight init on ",classname)
                m.weight.data.normal_(0.0, 0.01)
            elif isinstance(m, nn.Linear):
                classname = m.__class__.__name__
                print("Apply weight init on ",classname)
                m.weight.data.normal_(0.0, 0.01)
                m.bias.data.fill_(0)

    def forward(self, x):
        # Max pooling over a (2, 2) window
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        # If the size is a square you can only specify a single number
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features


net = Net()

Apply weight init on  Conv2d
Apply weight init on  Conv2d
Apply weight init on  Linear
Apply weight init on  Linear
Apply weight init on  Linear
