# Pytorch 101 - 005
#### *Date* : 2019.05.26
#### *Auther* :`Jen-Huan Hu`

#### Let's review ```nn.Conv2d``` first 

In [25]:
import torch

import numpy as np

device = 'cuda' if torch.cuda.device_count() else 'cpu'
print('torch device using {}'.format( device ))

def add_batch_dim(x):
    # Assume x has shape of order : C, H, W !
    x = x[:,:,:, None]
    x = x.permute(3, 0, 1, 2)
    return x

C, H, W = 3, 5, 4
a = torch.randn((C, H, W)) * 10
a = add_batch_dim(a)
Conv1 = torch.nn.Conv2d(3, 2, 3, padding = 1) # Provide callable functor
print("We have a callable convolution %s" % Conv1)
print("with weights %s and bias %s" % (Conv1.weight, Conv1.bias) )
# DBG : print(Conv1.kernel_size, Conv1.stride, Conv1.padding)

def output_size_formula(x, conv):
    def the_formula(x_i, padding_i, k_i, stride_i ):
        # DBG : print(x_i, padding_i, k_i, stride_i)
        import math
        # Formula is sth like : floor( ( input size + 2 * padding - kernelsize ) / stride ) + 1
        return math.floor( ( x_i + 2 * padding_i - k_i ) / stride_i ) + 1
    outsize = []
    x_shape = x.shape[-2:] # N, C, H, W
    # DBG : print(x_shape)
    for i in range(len(conv.kernel_size)):
        outsize += [ the_formula(x_shape[i], conv.padding[i], conv.kernel_size[i], conv.stride[i]) ]
    return tuple(outsize)

print("Estimated output size is {}".format( output_size_formula( a, Conv1 ) ) )

print("Conv(a) = %s" % Conv1(a))
    

torch device using cuda
We have a callable convolution Conv2d(3, 2, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
with weights Parameter containing:
tensor([[[[ 0.1170,  0.1327,  0.1763],
          [-0.1010,  0.1747, -0.0708],
          [ 0.1855, -0.1766,  0.0171]],

         [[-0.0423,  0.1756,  0.1107],
          [-0.1809,  0.1849,  0.0903],
          [-0.0586, -0.0817, -0.1330]],

         [[-0.1005,  0.1882, -0.1384],
          [ 0.1120, -0.0500, -0.1065],
          [ 0.1580, -0.0640, -0.0662]]],


        [[[ 0.0221,  0.1694,  0.1233],
          [ 0.0340, -0.0098, -0.0784],
          [ 0.1606, -0.0991,  0.0396]],

         [[ 0.0662,  0.1246, -0.0520],
          [-0.0314, -0.0763, -0.1480],
          [-0.0309, -0.0221,  0.0753]],

         [[-0.1048, -0.0192,  0.1869],
          [ 0.0012,  0.0352, -0.0426],
          [-0.1420, -0.0227, -0.0885]]]], requires_grad=True) and bias Parameter containing:
tensor([0.0780, 0.0432], requires_grad=True)
Estimated output size is (5, 4)
C

#### And then take a glimpse of ```nn.MaxPool2d```
Ex. 
* m = nn.MaxPool2d(3, stride=2)  
* m = nn.MaxPool2d((3, 2), stride=(2, 1))

In [29]:
b = Conv1( a )
Max1 = torch.nn.MaxPool2d(2, 2, padding = 1)
print( Max1(b))


tensor([[[[-0.0390,  4.2346,  1.6433],
          [10.0859,  1.2600,  3.6550],
          [ 2.0949, 15.6823,  0.4681]],

         [[-0.3159,  9.3239,  2.8936],
          [ 4.6777,  0.7106, -1.0388],
          [ 0.9390, 10.2308,  0.4887]]]],
       grad_fn=<MaxPool2DWithIndicesBackward>)


#### And also ```nn.BatchNorm2d```

* num_features – CC from an expected input of size (N, C, H, W)(N,C,H,W)

* eps – a value added to the denominator for numerical stability. Default: 1e-5

* momentum – the value used for the running_mean and running_var computation. Can be set to None for cumulative moving average (i.e. simple average). Default: 0.1

* affine – a boolean value that when set to True, this module has learnable affine parameters. Default: True

* track_running_stats – a boolean value that when set to True, this module tracks the running mean and variance, and when set to False, this module does not track such statistics and always uses batch statistics in both training and eval modes. Default: True

In [31]:
channels = b.shape[1] # N, C, H, W
BN1 = torch.nn.BatchNorm2d( channels )
c = Max1(b)
print(BN1(c))

tensor([[[[-0.7463, -0.0184, -0.4598],
          [ 0.9782, -0.5251, -0.1171],
          [-0.3829,  1.9314, -0.6600]],

         [[-0.0590,  0.1074, -0.0036],
          [ 0.0272, -0.0413, -0.0715],
          [-0.0373,  0.1231, -0.0451]]]], grad_fn=<NativeBatchNormBackward>)


#### Today we are going to get familiar with 
1. ```torch.nn.Module```
2. ```torch.nn.Sequential```

import torch.nn as nn
import torch.nn.functional as F

class Model(nn.Module):   
    def __init__(self):   
        super(Model, self).__init__()   
        self.conv1 = nn.Conv2d(1, 20, 5)   
        self.conv2 = nn.Conv2d(20, 20, 5)   

    def forward(self, x):     
       x = F.relu(self.conv1(x))    
       return F.relu(self.conv2(x))    

In [34]:
class DemoModel(torch.nn.Module):
    def __init__(self):
        super(DemoModel, self).__init__()
        self.conv1 = torch.nn.Conv2d(3, 4, 3, padding = 1)
        self.max1 = torch.nn.MaxPool2d(2, 2, padding = 1)
        self.BN1 = torch.nn.BatchNorm2d( 4 )
        self.conv2 = torch.nn.Conv2d(4, 8, 3, padding = 1)
        self.max2 = torch.nn.MaxPool2d(2, 2, padding = 1)
        self.BN2 = torch.nn.BatchNorm2d( 8 )
    def forward(self, x):
        x = self.BN1( self.max1( torch.relu( self.conv1( x ) ) ) )
        x = self.BN2( self.max2( torch.relu( self.conv2( x ) ) ) )
        return x

#### And now I want to know if the DemoModel is constructed as I expected
> I will print the model

In [36]:
A = DemoModel()
print(A)

DemoModel(
  (conv1): Conv2d(3, 4, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (max1): MaxPool2d(kernel_size=2, stride=2, padding=1, dilation=1, ceil_mode=False)
  (BN1): BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(4, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (max2): MaxPool2d(kernel_size=2, stride=2, padding=1, dilation=1, ceil_mode=False)
  (BN2): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)


#### Now let's do it with ```torch.nn.Sequential```
And borrow some code from internets

In [66]:
class Flatten(torch.nn.Module):
    def forward(self, x):
        return x.view(x.size(0), -1)

In [74]:
from collections import OrderedDict
class DemoModel2(torch.nn.Module):
    def __init__(self):
        super(DemoModel2, self).__init__()
        self.conv1 = torch.nn.Conv2d(3, 4, 3, padding = 1)
        self.max1 = torch.nn.MaxPool2d(3, 3, padding = 1)
        self.BN1 = torch.nn.BatchNorm2d( 4 )
        self.conv2 = torch.nn.Conv2d(4, 8, 3, padding = 1)
        self.max2 = torch.nn.MaxPool2d(3, 3, padding = 1)
        self.BN2 = torch.nn.BatchNorm2d( 8 )
        self.flat2 = Flatten()
        self.fc2 = torch.nn.Linear(32, 5, True)
        
        self.combined = torch.nn.Sequential( OrderedDict([
              ('conv1', self.conv1),
              ('relu1', torch.nn.ReLU()),
              ('max1', self.max1),
              ('BN1', self.BN1),
              ('conv2', self.conv2),
              ('relu2', torch.nn.ReLU()),
              ('max2', self.max2),
              ('BN2', self.BN2),
              ('flat2', self.flat2),
              ('fc2', self.fc2)
        ]) )
    def forward(self, x):
        return self.combined(x)

In [75]:
B = DemoModel2()
print(B)

DemoModel2(
  (conv1): Conv2d(3, 4, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (max1): MaxPool2d(kernel_size=3, stride=3, padding=1, dilation=1, ceil_mode=False)
  (BN1): BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(4, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (max2): MaxPool2d(kernel_size=3, stride=3, padding=1, dilation=1, ceil_mode=False)
  (BN2): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (flat2): Flatten()
  (fc2): Linear(in_features=32, out_features=5, bias=True)
  (combined): Sequential(
    (conv1): Conv2d(3, 4, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu1): ReLU()
    (max1): MaxPool2d(kernel_size=3, stride=3, padding=1, dilation=1, ceil_mode=False)
    (BN1): BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv2): Conv2d(4, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu2): ReLU()
    (max2): Max

In [76]:
x = torch.randn((5, 3, 10, 10))
print(x.shape)

a1 = A(x)
b1 = B(x)
print(a1.shape)
print(b1.shape)
print(b1)

torch.Size([5, 3, 10, 10])
torch.Size([5, 8, 4, 4])
torch.Size([5, 5])
tensor([[-0.2187, -0.2665, -0.1491,  0.0903, -0.2394],
        [ 0.1285, -0.0998,  0.1162,  0.1700, -0.2390],
        [-0.3168, -0.0243,  0.0458, -0.2027, -0.0322],
        [-0.1828, -0.4088,  0.0997, -0.3462, -0.1795],
        [-0.1259, -0.4868, -0.0021, -0.2592, -0.2578]],
       grad_fn=<AddmmBackward>)
