In [1]:
%load_ext watermark
%watermark -a 'NavinKumarMNK' -v -p torch

Author: NavinKumarMNK

Python implementation: CPython
Python version       : 3.10.8
IPython version      : 8.8.0

torch: 1.13.1



In [3]:
# Import the required modules
import torch
import torch.nn as nn
import torch.nn.functional as F
import pytorch_lightning as pl
from torchvision import models
import wandb
import torchmetrics


In [4]:
model = models.efficientnet_v2_m(include_top=False, weights='EfficientNet_V2_M_Weights.DEFAULT')

#model = models.efficientnet_b3(include_top=False, weights='EfficientNet_B3_Weights.DEFAULT')
# remove last layer
model.classifier = nn.Identity()
model

EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 24, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(24, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): FusedMBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(24, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
            (1): BatchNorm2d(24, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
        )
        (stochastic_depth): StochasticDepth(p=0.0, mode=row)
      )
      (1): FusedMBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(24, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
            (1): BatchNorm2d(24, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  

In [5]:
total_params = sum(p.numel() for p in model.parameters())
print(total_params)

input = torch.randn(16, 3, 256, 256)
output = model(input)
output.shape

52858356


torch.Size([16, 1280])

In [2]:
torch.save(model, '../weights/EfficientNetv2Encoder.pt')

NameError: name 'torch' is not defined

In [6]:
model_1 = torch.load('../weights/EfficientNetv2Encoder.pt')

In [8]:
import torch.nn as nn

class SEAttention(nn.Module):
    def __init__(self, in_channels, reduction=16):
        super(SEAttention, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Sequential(
            nn.Linear(in_channels, in_channels // reduction, bias=False),
            nn.ReLU(inplace=True),
            nn.Linear(in_channels // reduction, in_channels, bias=False),
            nn.Sigmoid()
        )

    def forward(self, x):
        b, c, _, _ = x.size()
        y = self.avg_pool(x).view(b, c)
        y = self.fc(y).view(b, c, 1, 1)
        return x * y
    
class Decoder(nn.Module):
    def __init__(self):
        super(Decoder, self).__init__()

        # Initial representation
        self.fc = nn.Linear(1280, 4*4*1024)
        self.bn1d = nn.BatchNorm1d(4*4*1024)
        self.gelu = nn.GELU()

        # Decoder layers
        self.conv1 = nn.ConvTranspose2d(1024, 512, kernel_size=4, stride=2, padding=1, output_padding=0)
        self.bn1 = nn.BatchNorm2d(512)
        self.relu1 = nn.ReLU()

        self.conv2 = nn.ConvTranspose2d(512, 256, kernel_size=4, stride=2, padding=1, output_padding=0)
        self.bn2 = nn.BatchNorm2d(256)
        self.relu2 = nn.ReLU()

        self.conv3 = nn.ConvTranspose2d(256, 128, kernel_size=4, stride=2, padding=1, output_padding=0)
        self.bn3 = nn.BatchNorm2d(128)
        self.relu3 = nn.ReLU()

        self.conv4 = nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1, output_padding=0)
        self.bn4 = nn.BatchNorm2d(64)
        self.relu4 = nn.ReLU()

        self.conv5 = nn.ConvTranspose2d(64, 32, kernel_size=4, stride=2, padding=1, output_padding=0)
        self.bn5 = nn.BatchNorm2d(32)
        self.relu5 = nn.ReLU()

        self.conv6 = nn.ConvTranspose2d(32, 32, kernel_size=4, stride=2, padding=1, output_padding=0)
        self.bn6 = nn.BatchNorm2d(32)
        self.relu6 = nn.ReLU()

        # Residual blocks with SE attention
        self.res2 = nn.Sequential(
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.Sigmoid(),
            SEAttention(64),
            nn.ReLU()
        )

        self.res1 = nn.Sequential(
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.Sigmoid(),
            SEAttention(256),
            nn.ReLU()
        )

        
        self.conv7 = nn.Conv2d(32, 3, kernel_size=1, stride=1, padding=0)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.fc(x)
        x = self.bn1d(x)
        x = self.gelu(x)
        x = x.view(-1, 1024, 4, 4)

        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu1(x)

        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu2(x)

        x = self.res1(x) + x


        x = self.conv3(x)
        x = self.bn3(x)
        x = self.relu3(x)

        x = self.conv4(x)
        x = self.bn4(x)
        x = self.relu4(x)

        x = self.res2(x) + x

        x = self.conv5(x)
        x = self.bn5(x)
        x = self.relu5(x)

        x = self.conv6(x)
        x = self.bn6(x)
        x = self.relu6(x)

        x = self.conv7(x)
        x = self.sigmoid(x)

        return x


In [9]:
input = torch.randn(16, 3, 256, 256)
encoder = model
decoder = Decoder()

feature = encoder(input)
print(feature.shape)
output = decoder(feature)
print(output.shape)
# differnece between input and output

model2 = decoder
total_params = sum(p.numel() for p in model2.parameters())
print(total_params)


torch.Size([16, 1280])
torch.Size([16, 3, 256, 256])
33478115


In [20]:
torch.save(model.state_dict(), '../weights/EfficientNetv2DecoderLarge.pt', )

In [22]:
# classifier
class EfficientNetv2Classifier(nn.Module):
    def __init__(self, no_of_classes):
        super(EfficientNetv2Classifier, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(1536, 512),
            nn.ReLU(),
            nn.Linear(512, 128),
            nn.ReLU(),
            nn.Linear(128, no_of_classes),
            nn.Softmax(dim=1)
        )
    
    def forward(self, x):
        return self.model(x)


In [23]:
import sys
import os
if os.path.abspath('../') not in sys.path:
    sys.path.append(os.path.abspath('../'))
from utils import utils

In [24]:
classes = int(utils.config_parse('GENERAL')['no_of_classes'])
classes

14

In [25]:

model = EfficientNetv2Classifier(classes)
print(model)
total_params = sum(p.numel() for p in model.parameters())
print(total_params)

EfficientNetv2Classifier(
  (model): Sequential(
    (0): Linear(in_features=1536, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=128, bias=True)
    (3): ReLU()
    (4): Linear(in_features=128, out_features=14, bias=True)
    (5): Softmax(dim=1)
  )
)
854414


In [26]:
torch.save(model.state_dict(), '../weights/EfficientNetv2Classifier.pt')