In [None]:

from torch import nn
from torchvision import models
from torchinfo import summary
import torch
assert torch.cuda.is_available()
device = torch.device('cuda:0')
torch.backends.cudnn.benchmark = True
pretrained = models.mnasnet0_5(pretrained=True)


class MyAlexNet(nn.Module):
    def __init__(self, my_pretrained_model):
        super(MyAlexNet, self).__init__()
        self.pretrained = my_pretrained_model
        self.my_new_layers = nn.Linear(1000, out_features=18, bias=True)

    def forward(self, x):
        x = self.pretrained(x)
        x = self.my_new_layers(x)
        return x


In [2]:
# %%writefile WillyNet.py

from torch import nn
from torch.nn import functional as F
import torch
device = torch.device('cuda:0')

def conv(inChannels,outChannels,dilation,groups,dropProbability=0,kernalSize=3):
    """
        Creates the convolution layer that consist of:
        conv2d, batchNorm2d and Mish activation function lastly also a dropout
    """
    return (
        nn.Conv2d(in_channels=inChannels, out_channels=outChannels
        ,kernel_size=kernalSize, stride=1, dilation=dilation,groups=groups
        )
    , nn.BatchNorm2d(outChannels)
    , nn.Mish(inplace=True)
    , nn.Dropout2d(dropProbability,inplace=True)
    )

def willy_block(inChannels,outChannels,dilation,groups,dropProbability=0,kernalSize=3):
    #return conv(inChannels,outChannels,dilation,groups,dropProbability=dropProbability,kernalSize=kernalSize)
    return nn.Sequential(*conv(inChannels,outChannels,dilation,groups,dropProbability=dropProbability,kernalSize=kernalSize))


def fullyConnected(inFeatures,outFeatures):
    """
        Creates the standard fully connected layer consist of:
        Linear, barchNorm1d and Mish activation functino
    """
    return (
        nn.Linear(in_features=inFeatures, out_features=outFeatures),
        nn.BatchNorm1d(num_features=outFeatures),
        nn.Mish(inplace=True)
        )

def willy_fullyConnected_block(inFeatures,outFeatures):
    return nn.Sequential(*fullyConnected(inFeatures,outFeatures))

def willy_prediction_block(inFeatures,outFeatures):
    return nn.Sequential(
        nn.Linear(in_features=inFeatures, out_features=outFeatures),
        nn.BatchNorm1d(num_features=outFeatures),
        nn.Mish(inplace=True)
        # F.log_softmax(outFeatures,dim=1)
        )

class WillyNet(nn.Module):
    """
    A CNN designed by Mustafa A-Hussein to predict what category the product is in an image.
    The goal of the design was to satisfy my curiosity and to gain experince in designing Neural Network Architectures
    I also wanted to explore how Mish activation function compares with others such like ReLU.

    Args:
        s (int): how many channel should the first conv2d layer have.
        ToDo add output (int): to decide how many classes you want.
        ToDo add desired shape i.e., how many convolution layer you want with their shape
        ToDo cont: decide how many fully connected layer you want.
    
    """
    def __init__(self,s=20) -> None: # s = nr_channels_out        
        super(WillyNet,self).__init__()

        # the first layer, takes in 3 channel input (RGB) image
        self.conv1 = willy_block(inChannels=3,outChannels=s*9,
         dilation=4, groups=3, dropProbability=0.5,
         kernalSize=22)
        
        self.conv2 = willy_block(inChannels= s*9, outChannels= s*7,
        dilation= 3, groups=4, dropProbability=0.25,
        kernalSize=18)
        
        self.conv3 = willy_block(inChannels=s*7,outChannels= s*5, 
        dilation=2, groups=2, dropProbability=0.25,
        kernalSize=16)
        
        self.conv4 = willy_block(inChannels=s*5,outChannels= s*3, 
        dilation=1, groups=1, dropProbability=0.25,
        kernalSize=12)
        
        self.conv5 = willy_block(inChannels= s*3,outChannels=s,
        dilation=1, groups=1, dropProbability=0,
         kernalSize=4)
        
        # This part I did do manually until I was satisfied with the archetecture of the model
        self.flatten = nn.Flatten()
        self.fullyConnectedLayer = willy_fullyConnected_block(inFeatures=7220,outFeatures=100)        
        self.predictionLayer = willy_prediction_block(inFeatures=100,outFeatures=18)
       
       
       

    def forward(self,x):
        # Convolution part
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.conv5(x)
        
        # fully connected part
        x = self.flatten(x)
        x = self.fullyConnectedLayer(x)       
        out = self.predictionLayer(x)

        return out

        

# myNet = WillyNet()

Overwriting WillyNet.py


In [2]:
from torchinfo import summary
myNet = WillyNet()
summary(myNet)
# summary(myNet,input_size=(2,3,198,198))


Layer (type:depth-idx)                   Param #
WillyNet                                 --
├─Sequential: 1-1                        --
│    └─Conv2d: 2-1                       87,300
│    └─BatchNorm2d: 2-2                  360
│    └─Mish: 2-3                         --
│    └─Dropout2d: 2-4                    --
├─Sequential: 1-2                        --
│    └─Conv2d: 2-5                       2,041,340
│    └─BatchNorm2d: 2-6                  280
│    └─Mish: 2-7                         --
│    └─Dropout2d: 2-8                    --
├─Sequential: 1-3                        --
│    └─Conv2d: 2-9                       1,792,100
│    └─BatchNorm2d: 2-10                 200
│    └─Mish: 2-11                        --
│    └─Dropout2d: 2-12                   --
├─Sequential: 1-4                        --
│    └─Conv2d: 2-13                      864,060
│    └─BatchNorm2d: 2-14                 120
│    └─Mish: 2-15                        --
│    └─Dropout2d: 2-16                   --


In [3]:

print(myNet)

WillyNet(
  (conv1): Sequential(
    (0): Conv2d(3, 180, kernel_size=(22, 22), stride=(1, 1), dilation=(4, 4), groups=3)
    (1): BatchNorm2d(180, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): Mish(inplace=True)
    (3): Dropout2d(p=0.5, inplace=True)
  )
  (conv2): Sequential(
    (0): Conv2d(180, 140, kernel_size=(18, 18), stride=(1, 1), dilation=(3, 3), groups=4)
    (1): BatchNorm2d(140, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): Mish(inplace=True)
    (3): Dropout2d(p=0.25, inplace=True)
  )
  (conv3): Sequential(
    (0): Conv2d(140, 100, kernel_size=(16, 16), stride=(1, 1), dilation=(2, 2), groups=2)
    (1): BatchNorm2d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): Mish(inplace=True)
    (3): Dropout2d(p=0.25, inplace=True)
  )
  (conv4): Sequential(
    (0): Conv2d(100, 60, kernel_size=(12, 12), stride=(1, 1))
    (1): BatchNorm2d(60, eps=1e-05, momentum=0.1, affine=True, track_running_sta

In [9]:
random_data = torch.randn((2,3,198,198),requires_grad=True)
target = torch.empty(2,dtype=torch.long).random_(18)
result = myNet(random_data)
print(result, result.shape)
print('-----')
print(result[0])
print('-----')
print(result[1])
print('-----')
print(target.shape)

print('-----')
print(target[0])


tensor([[-0.3034,  0.8643,  0.8642, -0.3034, -0.3028,  0.8649, -0.3028, -0.3032,
         -0.3034,  0.8608,  0.8647, -0.3034, -0.3034,  0.8650,  0.8650, -0.3034,
          0.8651,  0.8650],
        [ 0.8651, -0.3034, -0.3034,  0.8645,  0.8551, -0.3034,  0.8549,  0.8619,
          0.8651, -0.3032, -0.3034,  0.8651,  0.8650, -0.3034, -0.3034,  0.8651,
         -0.3034, -0.3034]], grad_fn=<MishBackward>) torch.Size([2, 18])
-----
tensor([-0.3034,  0.8643,  0.8642, -0.3034, -0.3028,  0.8649, -0.3028, -0.3032,
        -0.3034,  0.8608,  0.8647, -0.3034, -0.3034,  0.8650,  0.8650, -0.3034,
         0.8651,  0.8650], grad_fn=<SelectBackward>)
-----
tensor([ 0.8651, -0.3034, -0.3034,  0.8645,  0.8551, -0.3034,  0.8549,  0.8619,
         0.8651, -0.3032, -0.3034,  0.8651,  0.8650, -0.3034, -0.3034,  0.8651,
        -0.3034, -0.3034], grad_fn=<SelectBackward>)
-----
torch.Size([2])
-----
tensor(15)


In [10]:
softm = F.log_softmax(result)
softm[0], softm.shape


  softm = F.log_softmax(result)


(tensor([-3.6358, -2.4681, -2.4682, -3.6358, -3.6353, -2.4676, -3.6353, -3.6357,
         -3.6358, -2.4716, -2.4677, -3.6358, -3.6358, -2.4675, -2.4674, -3.6358,
         -2.4674, -2.4674], grad_fn=<SelectBackward>),
 torch.Size([2, 18]))

In [11]:
loss = nn.CrossEntropyLoss()
outt = loss(softm,target)
outt.backward()

In [12]:
outt

tensor(3.6350, grad_fn=<NllLossBackward>)