In [2]:
import os
import numpy as np
import pandas as pd
import PIL.Image as Image

import torch 
import torch.nn as nn
import torch.nn.functional as F 
from torchvision.transforms import Compose, ToTensor, Resize


In [3]:
test_image = os.path.join('..','data','down-stream','binary','Normal','1.jpg')

In [4]:
test_image = Image.open(test_image)

In [5]:
transforms = Compose([Resize((299,299)),
    ToTensor()
    ])

In [6]:
test_image = transforms(test_image).unsqueeze(0)
test_image.shape

torch.Size([1, 3, 299, 299])

In [7]:
class ConvBlock(nn.Module):
    def __init__(self,**kwargs):
        super(ConvBlock,self).__init__()
        
        self.block = nn.Sequential(nn.Conv2d(**kwargs),
                                   nn.BatchNorm2d(kwargs['out_channels']),
                                   nn.ReLU(inplace= True))
    
    def forward(self,x):
        x = self.block(x)
        return x

In [8]:
class InceptionBlock(nn.Module):
    def __init__(self, in_channels, ch1x1, ch3x3red,ch3x3, ch5x5red, ch5x5, pool_proj):
        super(InceptionBlock, self).__init__()
        
        
        self.branch1 = ConvBlock(in_channels= in_channels, out_channels= ch1x1, kernel_size= 1, bias= False)
        
        self.branch2 = nn.Sequential(ConvBlock(in_channels= in_channels, out_channels= ch3x3red, 
                                               kernel_size= 1, bias= False),
                                     ConvBlock(in_channels= ch3x3red, out_channels= ch3x3, kernel_size= 3,
                                               padding= 1, bias= False))
        # remember the kernel size and padding 
        self.branch3 = nn.Sequential(ConvBlock(in_channels= in_channels, out_channels= ch5x5red, 
                                               kernel_size= 1, bias= False),
                                     ConvBlock(in_channels= ch5x5red, out_channels= ch5x5, kernel_size= 5,
                                               padding= 2, bias= False))
        
        self.branch4 = nn.Sequential(nn.MaxPool2d(kernel_size= 3, padding= 1 , stride= 1,ceil_mode= True),
                                    ConvBlock(in_channels= in_channels, out_channels= pool_proj,
                                              kernel_size= 1, bias= False))
        
    def forward (self,x):
        branch1 = self.branch1(x)
        branch2 = self.branch2(x)
        branch3 = self.branch3(x)
        branch4 = self.branch4(x)
        branches = [branch1,branch2, branch3, branch4]
        x =  torch.cat(branches,1)
        return x

In [9]:
class InceptionAux(nn.Module):
    def __init__(self,in_channels, output_dim, aux_clf):
        super(InceptionAux,self).__init__()
        self.aux_clf= aux_clf
        
        self.conv1 = ConvBlock(in_channels= in_channels, out_channels= 128, kernel_size= 1, bias= False)
        
        self.avgpool = nn.AdaptiveAvgPool2d(output_size=(4,4))
        
        self.fc1 = nn.Linear(in_features= 2048, out_features= 1024)
        self.fc2 = nn.Linear(in_features=1024,out_features= output_dim)
        
        self.dropout = nn.Dropout(p= 0.7)
        self.relu = nn.ReLU(inplace= True)
        
    def forward(self, x):
        x = self.avgpool(x)
        x = self.conv1(x)
        x = torch.flatten(x,1)
        x = self.fc1(x)
        x = self.relu(x)
        if self.aux_clf:
            x = self.dropout(x)
        x = self.fc2(x)
        
        return x
        
        

In [10]:
model = InceptionAux(3,output_dim=3, aux_clf=True)
model(test_image)

tensor([[-0.0014,  0.0684,  0.5694]], grad_fn=<AddmmBackward>)

In [11]:
class GoogleNet(nn.Module):
    def __init__(self,image_channels= 3, output_dim=1000, clf= True, aux_clf= True):
        super(GoogleNet,self).__init__()
        

        self.aux_clf = aux_clf
        self.clf = clf
        
        self.conv1 = ConvBlock(in_channels= image_channels, out_channels= 64, kernel_size= 7, 
                               stride= 2, padding= 3, bias= False)
        self.max_pool1 = nn.MaxPool2d(kernel_size=3, stride= 2, ceil_mode= True)
        
        self.conv2 = ConvBlock(in_channels = 64, out_channels= 64, kernel_size= 1, bias= False)
        self.conv3 = ConvBlock(in_channels= 64, out_channels= 192, kernel_size= 3, padding= 1, bias= False)
        self.max_pool2 = nn.MaxPool2d(kernel_size=3, stride= 2, ceil_mode= True)
        
        
        self.inception3a = InceptionBlock(in_channels= 192, ch1x1= 64, ch3x3red= 96, ch3x3= 128, 
                                          ch5x5red= 16, ch5x5=32, pool_proj=32)
        self.inception3b = InceptionBlock(in_channels= 256, ch1x1= 128, ch3x3red= 128, ch3x3= 192, 
                                          ch5x5red= 32, ch5x5=96, pool_proj=64)
        self.max_pool3 = nn.MaxPool2d(kernel_size=3, stride= 2, ceil_mode= True)
        
        
        
        self.inception4a = InceptionBlock(in_channels= 480, ch1x1= 192, ch3x3red= 96, ch3x3= 208, 
                                          ch5x5red= 16, ch5x5=48, pool_proj=64)
        self.inception4b = InceptionBlock(in_channels= 512, ch1x1= 160, ch3x3red= 112, ch3x3= 224, 
                                          ch5x5red= 24, ch5x5=64, pool_proj=64)
        self.inception4c = InceptionBlock(in_channels= 512, ch1x1= 128, ch3x3red= 128, ch3x3= 256, 
                                          ch5x5red= 24, ch5x5=64, pool_proj=64)
        self.inception4d = InceptionBlock(in_channels= 512, ch1x1= 112, ch3x3red= 144, ch3x3= 288, 
                                          ch5x5red= 32, ch5x5=64, pool_proj=64)
        self.inception4e = InceptionBlock(in_channels= 528, ch1x1= 256, ch3x3red= 160, ch3x3= 320, 
                                          ch5x5red= 32, ch5x5=128, pool_proj=128)
        self.max_pool4 = nn.MaxPool2d(kernel_size=3, stride= 2, ceil_mode= True)
        
        
        
        self.inception5a = InceptionBlock(in_channels= 832, ch1x1= 256, ch3x3red= 160, ch3x3= 320, 
                                          ch5x5red= 32, ch5x5=128, pool_proj=128)
        self.inception5b = InceptionBlock(in_channels= 832, ch1x1= 384, ch3x3red= 192, ch3x3= 384, 
                                          ch5x5red= 48, ch5x5=128, pool_proj=128)
        
        
        
        self.avgpool = nn.AdaptiveAvgPool2d(output_size=(1,1))
        self.dropout = nn.Dropout(p= 0.2)
        self.fc1 = nn.Linear(in_features=1024,out_features=output_dim)
        
        
        if aux_clf:
            self.aux1 = InceptionAux(in_channels= 512, output_dim= output_dim, aux_clf= aux_clf)
            self.aux2 = InceptionAux(in_channels= 528, output_dim= output_dim, aux_clf= aux_clf)
            
        
        
    def forward(self,x):
        x = self.conv1(x)
        x = self.max_pool1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.max_pool2(x)
        x = self.inception3a(x)
        x = self.inception3b(x)
        x = self.max_pool3(x)
        x = self.inception4a(x)
        
        if self.aux_clf:
            x_aux1 = self.aux1(x)
        x = self.inception4b(x)
        x = self.inception4c(x)
        x = self.inception4d(x)
        
        if self.aux_clf:
            x_aux2 = self.aux2(x)    
        x = self.inception4e(x)
        x = self.max_pool4(x)
        x = self.inception5a(x)
        x = self.inception5b(x)
        
        if self.clf:
            x = self.avgpool(x)
            x = torch.flatten(x,1)
            x = self.dropout(x)
            x = self.fc1(x)
        
        if self.aux_clf:
            return x, x_aux1, x_aux2
        else:
            return x

In [12]:
model = GoogleNet(image_channels=3,)
model(test_image)[2].shape
model

GoogleNet(
  (conv1): ConvBlock(
    (block): Sequential(
      (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
    )
  )
  (max_pool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
  (conv2): ConvBlock(
    (block): Sequential(
      (0): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
    )
  )
  (conv3): ConvBlock(
    (block): Sequential(
      (0): Conv2d(64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
    )
  )
  (max_pool2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
  (inception3a): Inceptio

In [13]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')

The model has 13,385,816 trainable parameters


In [14]:
from torchvision.models import GoogLeNet

In [15]:
model1 = GoogLeNet()
model1



GoogLeNet(
  (conv1): BasicConv2d(
    (conv): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
  (conv2): BasicConv2d(
    (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (conv3): BasicConv2d(
    (conv): Conv2d(64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
  (inception3a): Inception(
    (branch1): BasicConv2d(
      (conv): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track

In [16]:
def count_parameters(model):
    return sum(p.numel() for p in model1.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model1):,} trainable parameters')

The model has 13,004,888 trainable parameters


In [17]:
13004888-13012424

-7536