# Layers and Blocks

## Construct a MLP

In [1]:
import torch
import torch.nn as nn

net = nn.Sequential(nn.Linear(20, 256),
                    nn.ReLU(),
                    nn.Linear(256,10)
                    )

### Forward

In [2]:
x = torch.rand(size=(2, 20))
def init_weights(m):
    if type(m) == nn.Linear:
        # Initialize weight parameter by a normal distribition 
        # with a mean of 0 and standard deviation of 0.01.
        nn.init.normal_(m.weight.data, std=0.01)
        # The bias parameter is initialized to zero by default.
        m.bias.data.fill_(0.0)

net.apply(init_weights)
net(x)

tensor([[-2.6703e-04, -1.1188e-03, -1.5502e-03, -3.7548e-03, -1.3014e-04,
          4.5263e-03,  1.2260e-03, -1.9469e-03, -1.0568e-03,  8.9062e-04],
        [ 2.9756e-05,  1.4792e-04, -4.5574e-04, -6.7872e-04,  5.3927e-04,
          4.6126e-03,  5.9986e-04,  1.3424e-03, -2.8244e-03,  2.9399e-03]],
       grad_fn=<AddmmBackward>)

## Implement the Same MLP with A Custom Block

In [3]:
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.hidden = nn.Linear(20, 256)  
        self.relu = nn.ReLU()
        self.output = nn.Linear(256, 10) 

    def forward(self, x):
        return self.output(self.relu(self.hidden(x)))

### Forward 

In [4]:
net = MLP()
net.apply(init_weights)
net(x)

tensor([[ 0.0001,  0.0007, -0.0021,  0.0013,  0.0041, -0.0046,  0.0039,  0.0027,
         -0.0002, -0.0017],
        [ 0.0035,  0.0029, -0.0016,  0.0032,  0.0027, -0.0026,  0.0025,  0.0016,
         -0.0010, -0.0016]], grad_fn=<AddmmBackward>)

## Blocks with Code

In [5]:
class FancyMLP(nn.Module):
    def __init__(self):
        super(FancyMLP, self).__init__()
        # Random weight parameters are not iterated during training
        self.rand_weight = nn.Parameter(torch.empty(20,20).uniform_(0, 1))
        self.fc1 = nn.Linear(20, 20)
        self.fc2 = nn.Linear(20, 256)
        self.relu = nn.ReLU()


    def forward(self, x):
        x = self.fc1(x)
        # This layer will not be updated during training.
        x = self.relu(torch.matmul(x, torch.autograd.Variable(self.rand_weight).data) + 1)
        # Reuse the fully connected layer. 
        x = self.fc2(x)
        while x.norm().item() > 1:
            x /= 2
        if x.norm().item() < 0.8:
            x *= 10
        return x.sum()

### Forward

In [6]:
net = FancyMLP()
net.apply(init_weights)
net(x)

tensor(-3.3616, grad_fn=<SumBackward0>)

## Mix Things Together

In [7]:
class NestMLP(nn.Module):
    def __init__(self, **kwargs):
        super(NestMLP, self).__init__(**kwargs)
        self.net = nn.Sequential(nn.Linear(20, 64),
                                 nn.ReLU(),
                                 nn.Linear(64, 32),
                                 nn.ReLU()
                                 )
        self.fc = nn.Linear(32, 16)
        self.relu = nn.ReLU()
    def forward(self, x):
        return self.relu(self.fc(self.net(x)))
    
chimera = NestMLP()
chimera.apply(init_weights)
print(chimera)
chimera(x)

NestMLP(
  (net): Sequential(
    (0): Linear(in_features=20, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=32, bias=True)
    (3): ReLU()
  )
  (fc): Linear(in_features=32, out_features=16, bias=True)
  (relu): ReLU()
)


tensor([[0.0000e+00, 4.2913e-05, 0.0000e+00, 7.0529e-05, 0.0000e+00, 1.8853e-05,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         7.3905e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00],
        [0.0000e+00, 3.8530e-05, 0.0000e+00, 8.4401e-05, 0.0000e+00, 7.9739e-06,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         6.3707e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00]],
       grad_fn=<ReluBackward0>)