In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

seed = 1337

torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)

### Example 4-1

In [None]:
class MultilayerPerceptron(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        """
        Args:
            input_dim (int): the size of the input vectors
            hidden_dim (int): the output size of the first Linear layer
            output_dim (int): the output size of the second Linear layer
        """
        super(MultilayerPerceptron, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x_in, apply_softmax=False):
        """The forward pass of the MLP
        
        Args:
            x_in (torch.Tensor): an input data tensor. 
                x_in.shape should be (batch, input_dim)
            apply_softmax (bool): a flag for the softmax activation
                should be false if used with the Cross Entropy losses
        Returns:
            the resulting tensor. tensor.shape should be (batch, output_dim)
        """
        intermediate = F.relu(self.fc1(x_in))
        output = self.fc2(intermediate)
        
        if apply_softmax:
            output = F.softmax(output, dim=1)
        return output

### Example 4-2

In [None]:
batch_size = 2 # number of samples input at once
input_dim = 3
hidden_dim = 100
output_dim = 4

# Initialize model
mlp = MultilayerPerceptron(input_dim, hidden_dim, output_dim)
print(mlp)

MultilayerPerceptron(
  (fc1): Linear(in_features=3, out_features=100, bias=True)
  (fc2): Linear(in_features=100, out_features=4, bias=True)
)


### Example 4-3

In [None]:
def describe(x):
    print("Type: {}".format(x.type()))
    print("Shape/size: {}".format(x.shape))
    print("Values: \n{}".format(x))

In [None]:
# Inputs
x_input = torch.rand(batch_size, input_dim)
describe(x_input)

Type: torch.FloatTensor
Shape/size: torch.Size([2, 3])
Values: 
tensor([[0.8329, 0.4277, 0.4363],
        [0.9686, 0.6316, 0.8494]])


In [None]:
y_output = mlp(x_input, apply_softmax=False)
describe(y_output)

Type: torch.FloatTensor
Shape/size: torch.Size([2, 4])
Values: 
tensor([[-0.2456,  0.0723,  0.1589, -0.3294],
        [-0.3497,  0.0828,  0.3391, -0.4271]], grad_fn=<AddmmBackward>)


### Example 4-4

In [None]:
y_output = mlp(x_input, apply_softmax=True)
describe(y_output)

Type: torch.FloatTensor
Shape/size: torch.Size([2, 4])
Values: 
tensor([[0.2087, 0.2868, 0.3127, 0.1919],
        [0.1832, 0.2824, 0.3649, 0.1696]], grad_fn=<SoftmaxBackward>)


### Example 4-13

In [None]:
class MultilayerPerceptron(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        """
        Args:
            input_dim (int): the size of the input vectors
            hidden_dim (int): the output size of the first Linear layer
            output_dim (int): the output size of the second Linear layer
        """
        super(MultilayerPerceptron, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x_in, apply_softmax=False):
        """The forward pass of the MLP
        
        Args:
            x_in (torch.Tensor): an input data tensor. 
                x_in.shape should be (batch, input_dim)
            apply_softmax (bool): a flag for the softmax activation
                should be false if used with the Cross Entropy losses
        Returns:
            the resulting tensor. tensor.shape should be (batch, output_dim)
        """
        intermediate = F.relu(self.fc1(x_in))
        output = self.fc2(F.dropout(intermediate, p=0.5))
        
        if apply_softmax:
            output = F.softmax(output, dim=1)
        return output

batch_size = 2 # number of samples input at once
input_dim = 3
hidden_dim = 100
output_dim = 4

# Initialize model
mlp = MultilayerPerceptron(input_dim, hidden_dim, output_dim)
print(mlp)

y_output = mlp(x_input, apply_softmax=False)
describe(y_output)

MultilayerPerceptron(
  (fc1): Linear(in_features=3, out_features=100, bias=True)
  (fc2): Linear(in_features=100, out_features=4, bias=True)
)
Type: torch.FloatTensor
Shape/size: torch.Size([2, 4])
Values: 
tensor([[ 0.0193,  0.0275,  0.2319,  0.3032],
        [-0.5323,  0.3183,  0.4194, -0.0205]], grad_fn=<AddmmBackward>)


### Example 4-14

In [None]:
batch_size = 2
one_hot_size = 10
sequence_width = 7
data = torch.randn(batch_size, one_hot_size, sequence_width)
conv1 = nn.Conv1d(in_channels=one_hot_size, out_channels=16, kernel_size=3)
intermediate1 = conv1(data)
print(data.size())
print(intermediate1.size())

torch.Size([2, 10, 7])
torch.Size([2, 16, 5])


### Example 4-15

In [None]:
conv2 = nn.Conv1d(in_channels=16, out_channels=32, kernel_size=3)
conv3 = nn.Conv1d(in_channels=32, out_channels=64, kernel_size=3)

intermediate2 = conv2(intermediate1)
intermediate3 = conv3(intermediate2)

print(intermediate2.size())
print(intermediate3.size())

torch.Size([2, 32, 3])
torch.Size([2, 64, 1])


In [None]:
y_output = intermediate3.squeeze()
print(y_output.size())

torch.Size([2, 64])


In [None]:
intermediate2.mean(dim=0).mean(dim=1).sum()

tensor(-0.0493, grad_fn=<SumBackward0>)

### Example 4-16

In [None]:
# Method 2 of reducing to feature vectors
print(intermediate1.view(batch_size, -1).size())

# Method 3 of reducing to feature vectors
print(torch.mean(intermediate1, dim=2).size())
# print(torch.max(intermediate1, dim=2).size())
# print(torch.sum(intermediate1, dim=2).size())

torch.Size([2, 80])
torch.Size([2, 16])


### Example 4-22

The full model will not be reproduced here. Instead, we will just show batch norm being used.  

In [None]:
conv1 = nn.Conv1d(in_channels=one_hot_size, out_channels=16, kernel_size=3)
conv2 = nn.Conv1d(in_channels=16, out_channels=32, kernel_size=3)
conv3 = nn.Conv1d(in_channels=32, out_channels=64, kernel_size=3)

conv1_bn = nn.BatchNorm1d(num_features=16)
conv2_bn = nn.BatchNorm1d(num_features=32)
    
intermediate1 = conv1_bn(F.relu(conv1(data)))
intermediate2 = conv2_bn(F.relu(conv2(intermediate1)))
intermediate3 = conv3(intermediate2)

print(intermediate1.size())
print(intermediate2.size())
print(intermediate3.size())

torch.Size([2, 16, 5])
torch.Size([2, 32, 3])
torch.Size([2, 64, 1])


Note: BatchNorm computes its statistics over the batch and sequence dimensions. In other words, the input to each batchnorm1d is a tensor of size `(B, C, L)` (where b=batch, c=channels, and l=length). Each `(B, L)` slice should have 0-mean.  This reduces covariate shift. 

In [None]:
intermediate2.mean(dim=(0, 2))

tensor([-2.9802e-08,  1.2418e-09,  0.0000e+00, -1.9868e-08, -9.9341e-09,
         9.9341e-09,  2.4835e-09, -9.9341e-09, -1.2418e-09, -1.9868e-08,
         1.8626e-09,  0.0000e+00, -1.2418e-09,  0.0000e+00,  1.9868e-08,
         9.3132e-10, -4.9671e-09, -9.9341e-09,  2.9802e-08,  9.9341e-09,
        -9.7013e-11,  9.9341e-09,  1.3970e-09,  0.0000e+00,  9.9341e-09,
         9.9341e-09,  1.9868e-08, -4.9671e-09, -1.2418e-09,  4.4703e-08,
         4.9671e-09, -5.9605e-08], grad_fn=<MeanBackward0>)


## Bonus Examples

In chapter 4, we cover convolutions. Below are code examples which instantiate the convolutions with various hyper parameter settings. 

In [None]:
x = torch.randn(1, 2, 3, 3)
describe(x)

conv1 = nn.Conv2d(in_channels=2, out_channels=1, kernel_size=2)
describe(conv1.weight)
describe(conv1(x))

Type: torch.FloatTensor
Shape/size: torch.Size([1, 2, 3, 3])
Values: 
tensor([[[[-1.3831,  0.5164,  0.2551],
          [-0.4873,  1.1319,  1.4091],
          [-1.0097,  1.3822,  2.5432]],

         [[-0.5137, -0.1112, -0.7682],
          [ 1.0231, -1.3065,  0.2210],
          [-0.3294,  0.6213,  2.1973]]]])
Type: torch.FloatTensor
Shape/size: torch.Size([1, 2, 2, 2])
Values: 
Parameter containing:
tensor([[[[-0.0395, -0.2742],
          [-0.1382,  0.0440]],

         [[-0.2400,  0.1503],
          [ 0.1100, -0.1167]]]], requires_grad=True)
Type: torch.FloatTensor
Shape/size: torch.Size([1, 1, 2, 2])
Values: 
tensor([[[[ 0.5734, -0.2716],
          [-0.4697, -0.1801]]]], grad_fn=<MkldnnConvolutionBackward>)


In [None]:
x = torch.randn(1, 1, 3, 3)
describe(x)

conv1 = nn.Conv2d(in_channels=1, out_channels=2, kernel_size=2)
describe(conv1.weight)
describe(conv1(x))

Type: torch.FloatTensor
Shape/size: torch.Size([1, 1, 3, 3])
Values: 
tensor([[[[-0.2682,  0.4390,  1.3682],
          [ 0.3038,  0.8558, -0.5000],
          [ 1.5619, -0.5929,  0.6817]]]])
Type: torch.FloatTensor
Shape/size: torch.Size([2, 1, 2, 2])
Values: 
Parameter containing:
tensor([[[[-0.0029, -0.3377],
          [-0.3707,  0.3836]]],


        [[[ 0.2779, -0.3865],
          [-0.1691,  0.4410]]]], requires_grad=True)
Type: torch.FloatTensor
Shape/size: torch.Size([1, 2, 2, 2])
Values: 
tensor([[[[ 0.4943, -0.5463],
          [-0.6703,  1.0738]],

         [[-0.1458, -0.9997],
          [-0.9996,  0.6042]]]], grad_fn=<MkldnnConvolutionBackward>)
