# Models

Here we look define and look through pretrained models in Pytorch as well as some famous architectures not in Pytorch.

## Table of Contents

## MLP

In [1]:
# Define a simple MLP using numpy with one hidden layer and one node
# The input is a vector of m features
# The output is a scalar
# The hidden layer has 1 node
# ReLU is used as the activation function for the hidden layer
# Loss function is the mean squared error

# Import the required libraries
import numpy as np

class SimpleMLP():
    def __init__(self, m_features):
        self.W1 = np.random.randn(m_features, 1)
        self.b1 = np.random.randn(1)
        
    def forward(self, X):
        Z1 = np.dot(X, self.W1) + self.b1
        H1 = np.maximum(0, Z1)
        Y_hat = H1
        return Y_hat, Z1
    
    def backward(self, X, Z1, Y, Y_hat):
        m = X.shape[0]
        dY_hat = Y_hat - Y              
        dZ1 = dY_hat * (Z1 > 0)         # Derivative of ReLU, element-wise
        dW1 = np.dot(X.T, dZ1) / m      # Derivative of W1
        db1 = np.sum(dZ1) / m           # Derivative of b1
        print("dW1: {}, db1: {}".format(dW1.shape, db1.shape))
        return dW1, db1
    
    def update(self, dW1, db1):
        self.W1 -= dW1 
        self.b1 -= db1
        
    def loss(self, Y, Y_hat):
        m = Y.shape[0]
        return np.sum((Y - Y_hat)**2) / (m)
    
    def train(self, X, Y, epochs):
        for i in range(epochs):
            Y_hat, Z1 = self.forward(X)
            loss = self.loss(Y, Y_hat)
            dW1, db1 = self.backward(X, Z1, Y, Y_hat)
            self.update(dW1, db1)
            print("Epoch: {}, Loss: {:.4f}".format(i, loss))

# Create a dataset
m = 100                   # Number of samples
n = 10                    # Number of features
X = np.random.randn(m, n) # 
Y = np.random.randn(m, 1)

# Create a model
model = SimpleMLP(n)

# Train the model
model.train(X, Y, 100)

dW1: (10, 1), db1: ()
Epoch: 0, Loss: 2.5399
dW1: (10, 1), db1: ()
Epoch: 1, Loss: 1.1929
dW1: (10, 1), db1: ()
Epoch: 2, Loss: 1.0066
dW1: (10, 1), db1: ()
Epoch: 3, Loss: 0.9559
dW1: (10, 1), db1: ()
Epoch: 4, Loss: 0.9391
dW1: (10, 1), db1: ()
Epoch: 5, Loss: 0.9319
dW1: (10, 1), db1: ()
Epoch: 6, Loss: 0.9259
dW1: (10, 1), db1: ()
Epoch: 7, Loss: 0.9223
dW1: (10, 1), db1: ()
Epoch: 8, Loss: 0.9198
dW1: (10, 1), db1: ()
Epoch: 9, Loss: 0.9179
dW1: (10, 1), db1: ()
Epoch: 10, Loss: 0.9165
dW1: (10, 1), db1: ()
Epoch: 11, Loss: 0.9153
dW1: (10, 1), db1: ()
Epoch: 12, Loss: 0.9144
dW1: (10, 1), db1: ()
Epoch: 13, Loss: 0.9136
dW1: (10, 1), db1: ()
Epoch: 14, Loss: 0.9130
dW1: (10, 1), db1: ()
Epoch: 15, Loss: 0.9124
dW1: (10, 1), db1: ()
Epoch: 16, Loss: 0.9119
dW1: (10, 1), db1: ()
Epoch: 17, Loss: 0.9104
dW1: (10, 1), db1: ()
Epoch: 18, Loss: 0.9096
dW1: (10, 1), db1: ()
Epoch: 19, Loss: 0.9090
dW1: (10, 1), db1: ()
Epoch: 20, Loss: 0.9087
dW1: (10, 1), db1: ()
Epoch: 21, Loss: 0.908

## SqueezeNet

In [114]:
import torch
import torchvision.models as models
import torchsummary as summary
import torchinfo
from torchinfo import summary
import os

model_name = "squeezenet1_0"
model_path = os.path.expanduser(f"~/Developer/Models/{model_name}.pth")
if not os.path.exists(model_path):
    model = models.squeezenet1_0(weights='IMAGENET1K_V1')
else:
    model = models.squeezenet1_0(weights='IMAGENET1K_V1')
    model.load_state_dict(torch.load(model_path))
    
print(summary(model, input_size=(1, 3, 224, 224)))
    
torch.save(model.state_dict(), model_path)

Layer (type:depth-idx)                   Output Shape              Param #
SqueezeNet                               [1, 1000]                 --
├─Sequential: 1-1                        [1, 512, 13, 13]          --
│    └─Conv2d: 2-1                       [1, 96, 109, 109]         14,208
│    └─ReLU: 2-2                         [1, 96, 109, 109]         --
│    └─MaxPool2d: 2-3                    [1, 96, 54, 54]           --
│    └─Fire: 2-4                         [1, 128, 54, 54]          --
│    │    └─Conv2d: 3-1                  [1, 16, 54, 54]           1,552
│    │    └─ReLU: 3-2                    [1, 16, 54, 54]           --
│    │    └─Conv2d: 3-3                  [1, 64, 54, 54]           1,088
│    │    └─ReLU: 3-4                    [1, 64, 54, 54]           --
│    │    └─Conv2d: 3-5                  [1, 64, 54, 54]           9,280
│    │    └─ReLU: 3-6                    [1, 64, 54, 54]           --
│    └─Fire: 2-5                         [1, 128, 54, 54]          --
│ 

## MobileNetV1


In [2]:
import torch
import torchvision.models as models
import torchsummary as summary
import os

model_name = "mobilenet_v1"
model_path = os.path.expanduser(f"~/Developer/Models/{model_name}.pth")
if not os.path.exists(model_path):
    model = models.mobilenet.
else:
    model = models.mobilenet(weighs='IMAGENET1K_V1')
    model.load_state_dict(torch.load(model_path))

print(summary(model, input_size=(1, 3, 224, 224)))
torch.save(model.state_dict(), model_path)

TypeError: 'module' object is not callable

## SeNet (2017)
Because SeNet is not definde in Pytorch, we design a simple one ourselves.

In [102]:
import torch
import torch.nn as nn
import torchvision.models as models
import torchsummary as summary
import os

class ResidualBlock(nn.Module):
    def __init__(self):
        """Residual Block"""
        

class SEBasicBlock(nn.Module):
    def __init__(self, inplanes, planes, stride=1, 
                 downsample=None, groups=1, base_width=64, dilation=1, norm_layer=None, *, reduction=16):
        """Squeeze Excitation Block with Fully Connected Layers
        
        Args:
            inplanes (int): input channels
            planes (int): output channels
        """
        
        super(SEBasicBlock, self).__init__()
        
        
        self.downsample = downsample
        
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        
        return x

class SELayer(nn.Module): # Also called SE-Module
    """Squeeze and Excitation Layer as described in https://arxiv.org/pdf/1709.01507.pdf"""
    def __init__(self, channels, reduction):
        super(SELayer, self).__init__()
        self.global_pool = nn.AdaptiveAvgPool2d(1)
        self.fc1 = nn.Conv2d(channels, channels // reduction, kernel_size=1)
        self.relu = nn.ReLU(inplace=True)
        self.fc2 = nn.Conv2d(channels // reduction, channels, kernel_size=1)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x: torch.Tensor) -> torch.Tensor :
        module_input = x
        x = self.global_pool(x)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.sigmoid(x)
        print("5.4.", x.shape, module_input.shape)
        return module_input * x

class SEBottleNeckBlock(nn.Module):
    """ """
    expansion = 4
    
    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(SEBottleNeckBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=inplanes, out_channels=planes,
                               kernel_size=1, stride=1, padding=0, bias=False)
        self.bn1 = nn.BatchNorm2d(num_features=planes)
        self.conv2 = nn.Conv2d(in_channels=planes, out_channels=planes,
                               kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(num_features=64)
        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(planes * 4)
        self.relu = nn.ReLU(inplace=True)
        self.se_module = SELayer(channels=planes * 4, reduction=4)
        
        self.downsample = downsample
        self.stride = stride
        
    
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out1 = self.se_module(out)
        out2 = out1 + residual
        out2 = self.relu(out)

        return out2


class SEResNet(nn.Module):
    def __init__(self, block, layers, num_classes=1000):
        super(SEResNet, self).__init__()
        """SeNet Neural Network Architecture
        
        Args:
            block (): 
            layers (): 
            num_classes (int): number of resulting classes, i.e. ImageNet has 1000 classes
        """
        
        self.inplanes = 64
        
        # Regular Block
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu1 = nn.ReLU(inplace=True)
        self.maxpool1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1)
        # SELayers
        self.layer1 = self._make_layer(block, 64, layers[0])
        # self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 
        # self.layer2 = self._make_layer(block, 256, layers[2], stride=2) 
        # self.layer2 = self._make_layer(block, 512, layers[3], stride=2) 
        
        # Output
        self.avgpool = nn.AvgPool2d(kernel_size=7)
        self.fc = nn.Linear(in_features=64 * 256, out_features=num_classes) # Recalculate this
        
    def _make_layer(self, block, planes, block_count, stride=1):
        """ 
        block : block type
        planes : number of output channels
        blocks : number of blocks per layer
        stride : stride
        """
        
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes * block.expansion),
            )
            
        se_blocks = []
        se_blocks.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, block_count):
            se_blocks.append(block(self.inplanes, planes))
            
        return nn.Sequential(*se_blocks)

        
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu1(x)
        x = self.maxpool1(x)
        x = self.layer1(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x
    

In [103]:
model_name = "SeResNet_v1"
model_path = os.path.expanduser(f"~/Developer/Models/{model_name}.pth")
if True or not os.path.exists(model_path):
    model = SEResNet(block=SEBottleNeckBlock, layers=[3], num_classes=1000)
else:
    model = torch.load(model_path)
    
# print(model.eval())
torch.save(model, model_path)

In [104]:
tensor = torch.randn(1, 3, 224, 224)
y_pred = model(tensor)


1. torch.Size([1, 3, 224, 224])
2. torch.Size([1, 64, 112, 112])
3. torch.Size([1, 64, 112, 112])
4. torch.Size([1, 64, 112, 112])
5. torch.Size([1, 64, 56, 56])
5.05 torch.Size([1, 64, 56, 56])
5.1 torch.Size([1, 64, 56, 56])
5.2 torch.Size([1, 64, 56, 56])
5.3 torch.Size([1, 256, 56, 56])
5.4. torch.Size([1, 256, 1, 1]) torch.Size([1, 256, 56, 56])
6. torch.Size([1, 256, 56, 56]) torch.Size([1, 256, 56, 56])
5.05 torch.Size([1, 256, 56, 56])
5.1 torch.Size([1, 64, 56, 56])
5.3 torch.Size([1, 256, 56, 56])
5.4. torch.Size([1, 256, 1, 1]) torch.Size([1, 256, 56, 56])
6. torch.Size([1, 256, 56, 56]) torch.Size([1, 256, 56, 56])
5.05 torch.Size([1, 256, 56, 56])
5.1 torch.Size([1, 64, 56, 56])
5.3 torch.Size([1, 256, 56, 56])
5.4. torch.Size([1, 256, 1, 1]) torch.Size([1, 256, 56, 56])
6. torch.Size([1, 256, 56, 56]) torch.Size([1, 256, 56, 56])
7. torch.Size([1, 256, 56, 56])
8. torch.Size([1, 256, 8, 8])
9. torch.Size([1, 16384])


In [None]:
# Train the model
import torch
import torch.nn as nn
import torchvision.models as models
import torchsummary as summary
import os 

