# Training the proposed network

In [1]:
!pip install mmcv==1.7.1


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting mmcv==1.7.1
  Downloading mmcv-1.7.1.tar.gz (605 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m605.4/605.4 kB[0m [31m12.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting addict (from mmcv==1.7.1)
  Downloading addict-2.4.0-py3-none-any.whl (3.8 kB)
Collecting yapf (from mmcv==1.7.1)
  Downloading yapf-0.40.0-py3-none-any.whl (250 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m250.3/250.3 kB[0m [31m26.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting importlib-metadata>=6.6.0 (from yapf->mmcv==1.7.1)
  Downloading importlib_metadata-6.6.0-py3-none-any.whl (22 kB)
Collecting platformdirs>=3.5.1 (from yapf->mmcv==1.7.1)
  Downloading platformdirs-3.5.3-py3-none-any.whl (15 kB)
Building wheels for collected packages: mmcv
  Building wheel for mmcv (setup.py) ... [?25l[?25hdone
  Created 

## Importing necessary libraries

In [2]:
from google.colab import drive
import shutil
import torch
from torchvision import transforms
import torchvision
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.autograd import Variable

import math
from mmcv.cnn import constant_init, kaiming_init
from torch.nn import Module, Conv2d, Linear, MaxPool2d, ReLU, Flatten, BatchNorm2d, Dropout

from torchvision.utils import make_grid
import matplotlib.pyplot as plt

from torchsummary import summary



In [3]:
drive.mount ('/content/gdrive', force_remount = True)

Mounted at /content/gdrive


In [4]:
## Hyper Parameters
batch_size = 32

## Loading the training and validation datasets

In [5]:
# Transformer to tensor
transformer=transforms.Compose([
    transforms.Resize((256,256)),
    transforms.ToTensor(),
])

In [8]:
def load_dataset(d_path):
    dataset_manual = torchvision.datasets.ImageFolder(d_path, transform=transformer)
    print("Follwing classes are there : \n",dataset_manual.classes)
    train_loader_manual = torch.utils.data.DataLoader(dataset_manual)
    return train_loader_manual

Loading the dataset from Drive

In [9]:
train_dataset = load_dataset('/content/gdrive/MyDrive/Dataset/training_dataset')
val_dataset = load_dataset('/content/gdrive/MyDrive/Dataset/validation_dataset')


Follwing classes are there : 
 ['Cercospora', 'Healthy', 'Miner', 'Phoma', 'Rust']
Follwing classes are there : 
 ['Cercospora', 'Healthy', 'Miner', 'Phoma', 'Rust']


In [10]:
train_dataset=train_dataset.dataset
val_dataset=val_dataset.dataset

In [11]:
torch.manual_seed(42)

<torch._C.Generator at 0x7f34b8375e50>

In [12]:
train_loader = DataLoader(train_dataset, batch_size=batch_size, num_workers=0, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers=0, shuffle=True)

In [13]:
train_count = (len(train_dataset))
val_count = len(val_dataset)


In [14]:
print('Train Set- ' + str(len(train_dataset)) + ' images in ' + str(len(train_loader)) +' batches')
print('Validation Set - ' + str(len(val_dataset)) + ' images in ' + str(len(val_loader)) + ' batches')

Train Set- 5020 images in 157 batches
Validation Set - 2500 images in 79 batches


In [15]:
for images, labels in train_loader:
    print('Image batch dimensions:', images.shape)
    print('Image label dimensions:', labels.shape)
    break

Image batch dimensions: torch.Size([32, 3, 256, 256])
Image label dimensions: torch.Size([32])


In [16]:
for images, labels in train_loader:
    print (labels)
    break

tensor([1, 4, 2, 4, 2, 1, 1, 1, 4, 0, 2, 1, 2, 1, 3, 4, 0, 1, 1, 0, 4, 4, 4, 0,
        2, 4, 3, 2, 3, 3, 2, 3])


## Building the Proposed Network architecture

In [17]:
class ScaledDotProductAttention(nn.Module): #Implementing the Multi-head attention module
    def forward(self, query, key, value, mask=None):
        dk = query.size()[1]
        scores = torch.matmul(query, key.transpose(-2, -1)) / math.sqrt(dk)
        if mask is not None:
            scores = scores.masked_fill(mask == 0, -1e9)
        attention = F.softmax(scores, dim=-1)
        return torch.matmul(attention, value)


class MultiHeadAttention(nn.Module):
    def __init__(self, in_channels, head_num, bias=True, activation=nn.ReLU()):
        super(MultiHeadAttention, self).__init__()
        if in_channels % head_num != 0:
            raise ValueError('`in_channels`({}) should be divisible by `head_num`({})'.format(in_channels, head_num))
        self.in_channels = in_channels
        self.head_num = head_num
        self.activation = activation
        self.bias = bias
        self.linear_q = nn.Conv2d(in_channels, in_channels, kernel_size=1, bias=bias)
        self.linear_k = nn.Conv2d(in_channels, in_channels, kernel_size=1, bias=bias)
        self.linear_v = nn.Conv2d(in_channels, in_channels, kernel_size=1, bias=bias)
        self.linear_o = nn.Conv2d(in_channels, in_channels, kernel_size=1, bias=bias)
    def forward(self, x, mask=None):
        q, k, v = self.linear_q(x), self.linear_k(x), self.linear_v(x)
        if self.activation is not None:
            q = self.activation(q)
            k = self.activation(k)
            v = self.activation(v)
        q, k, v = self._reshape_to_batches(q), self._reshape_to_batches(k), self._reshape_to_batches(v)
        if mask is not None:
            mask = mask.repeat(self.head_num, 1, 1, 1)
        y = ScaledDotProductAttention()(q, k, v, mask)
        y = self._reshape_from_batches(y)
        y = self.linear_o(y)
        if self.activation is not None:
            y = self.activation(y)
        return y

    def _reshape_to_batches(self, x):
        batch_size, channels, height, width = x.size()
        sub_channels = channels // self.head_num
        return x.reshape(batch_size, self.head_num, sub_channels, height, width)\
                .permute(0, 2, 1, 3, 4)\
                .reshape(batch_size * self.head_num, sub_channels, height, width)

    def _reshape_from_batches(self, x):
        batch_size, channels, height, width = x.size()
        batch_size //= self.head_num
        out_channels = channels * self.head_num
        return x.reshape(batch_size, self.head_num, channels, height, width)\
                .permute(0, 2, 1, 3, 4)\
                .reshape(batch_size, out_channels, height, width)

In [18]:
class ConvBlock(nn.Module): #Convolutional + Relu + BatchNorm layer
    def __init__(self, in_channels,out_channels,kernel_size, **kwargs):
        super(ConvBlock, self).__init__()
        self.simple_conv = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size = kernel_size, **kwargs),
            nn.ReLU(),
            nn.BatchNorm2d(out_channels)

        )
    def forward(self, xb):
        return self.simple_conv(xb)

In [19]:
class ConvWithMaxDrop(nn.Module): #Convolutional + Relu + BatchNorm + MaxPool + Dropout
    def __init__(self, in_channels,out_channels,kernel_size):
        super(ConvWithMaxDrop, self).__init__()
        self.conv_maxp_drop = nn.Sequential(

            nn.Conv2d(in_channels, out_channels, kernel_size = kernel_size),
            nn.ReLU(),
            nn.BatchNorm2d(out_channels),
            nn.MaxPool2d(2,2),
            nn.Dropout(0.7),

        )
    def forward(self, xb):
      return self.conv_maxp_drop(xb)

In [20]:
class InceptionBlock(nn.Module): #Inception module
    def __init__(
        self,
        in_channels,
        out_1x1,
        red_3x3,
        out_3x3,
        red_5x5,
        out_5x5,
        out_pool,
    ):
        super(InceptionBlock, self).__init__()
        self.branch1 = ConvBlock(in_channels, out_1x1, kernel_size=1)
        self.branch2 = nn.Sequential(
            ConvBlock(in_channels, red_3x3, kernel_size=1, padding=0),
            ConvBlock(red_3x3, out_3x3, kernel_size=3, padding=1),
        )
        self.branch3 = nn.Sequential(
            ConvBlock(in_channels, red_5x5, kernel_size=1),
            ConvBlock(red_5x5, out_5x5, kernel_size=5, padding=2),
        )
        self.branch4 = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, padding=1, stride=1),
            ConvBlock(in_channels, out_pool, kernel_size=1),
        )

    def forward(self, x):
        branches = (self.branch1, self.branch2, self.branch3, self.branch4)
        return torch.cat([branch(x) for branch in branches], 1)

In [21]:
def last_zero_init(m): # Global Context block
    if isinstance(m, nn.Sequential):
        constant_init(m[-1], val=0)
    else:
        constant_init(m, val=0)

class ContextBlock(nn.Module):

    def __init__(self,
                 inplanes,
                 ratio,
                 pooling_type='att',
                 fusion_types=('channel_add', )):
        super(ContextBlock, self).__init__()
        assert pooling_type in ['avg', 'att']
        assert isinstance(fusion_types, (list, tuple))
        valid_fusion_types = ['channel_add', 'channel_mul']
        assert all([f in valid_fusion_types for f in fusion_types])
        assert len(fusion_types) > 0, 'at least one fusion should be used'
        self.inplanes = inplanes
        self.ratio = ratio
        self.planes = int(inplanes * ratio)
        self.pooling_type = pooling_type
        self.fusion_types = fusion_types
        if pooling_type == 'att':
            self.conv_mask = nn.Conv2d(inplanes, 1, kernel_size=1)
            self.softmax = nn.Softmax(dim=2)
        else:
            self.avg_pool = nn.AdaptiveAvgPool2d(1)
        if 'channel_add' in fusion_types:
            self.channel_add_conv = nn.Sequential(
                nn.Conv2d(self.inplanes, self.planes, kernel_size=1),
                nn.LayerNorm([self.planes, 1, 1]),
                nn.ReLU(inplace=True),  # yapf: disable
                nn.Conv2d(self.planes, self.inplanes, kernel_size=1))
        else:
            self.channel_add_conv = None
        if 'channel_mul' in fusion_types:
            self.channel_mul_conv = nn.Sequential(
                nn.Conv2d(self.inplanes, self.planes, kernel_size=1),
                nn.LayerNorm([self.planes, 1, 1]),
                nn.ReLU(inplace=True),  # yapf: disable
                nn.Conv2d(self.planes, self.inplanes, kernel_size=1))
        else:
            self.channel_mul_conv = None
        self.reset_parameters()

    def reset_parameters(self):
        if self.pooling_type == 'att':
            kaiming_init(self.conv_mask, mode='fan_in')
            self.conv_mask.inited = True

        if self.channel_add_conv is not None:
            last_zero_init(self.channel_add_conv)
        if self.channel_mul_conv is not None:
            last_zero_init(self.channel_mul_conv)

    def spatial_pool(self, x):
        batch, channel, height, width = x.size()
        if self.pooling_type == 'att':
            input_x = x
            # [N, C, H * W]
            input_x = input_x.view(batch, channel, height * width)
            # [N, 1, C, H * W]
            input_x = input_x.unsqueeze(1)
            # [N, 1, H, W]
            context_mask = self.conv_mask(x)
            # [N, 1, H * W]
            context_mask = context_mask.view(batch, 1, height * width)
            # [N, 1, H * W]
            context_mask = self.softmax(context_mask)
            # [N, 1, H * W, 1]
            context_mask = context_mask.unsqueeze(-1)
            # [N, 1, C, 1]
            context = torch.matmul(input_x, context_mask)
            # [N, C, 1, 1]
            context = context.view(batch, channel, 1, 1)
        else:
            # [N, C, 1, 1]
            context = self.avg_pool(x)

        return context

    def forward(self, x):
        # [N, C, 1, 1]
        context = self.spatial_pool(x)

        out = x
        if self.channel_mul_conv is not None:
            # [N, C, 1, 1]
            channel_mul_term = torch.sigmoid(self.channel_mul_conv(context))
            out = out * channel_mul_term
        if self.channel_add_conv is not None:
            # [N, C, 1, 1]
            channel_add_term = self.channel_add_conv(context)
            out = out + channel_add_term

        return out

In [22]:
class ProposedModel(nn.Module):  # Building the proposed network architecture
    def __init__(self, aux_logits=True, num_classes=5):
      super(ProposedModel,self).__init__()
      self.aux_logits = aux_logits

      self.conv1 = ConvBlock(in_channels=3, out_channels=8, kernel_size=3)

      self.inception1 = InceptionBlock(in_channels=8, out_1x1=8, red_3x3=8, out_3x3=8, red_5x5=8, out_5x5=8, out_pool=8) #Inception module

      self.conv2 = ConvBlock(in_channels=32, out_channels=64, kernel_size=3)

      self.inception2 = InceptionBlock(in_channels=64, out_1x1=32, red_3x3=32, out_3x3=32, red_5x5=32, out_5x5=32, out_pool=32) # second Inception module

      self.conv3 = ConvWithMaxDrop(in_channels=128, out_channels=192, kernel_size=3)

      self.conv4 = ConvBlock(in_channels=192, out_channels=160, kernel_size=3)

      self.gcblock = ContextBlock(inplanes=160, ratio=8) # Global Context block

      self.conv5 = ConvWithMaxDrop(in_channels=160, out_channels=80, kernel_size=3)

      self.conv6 = ConvBlock(in_channels=80, out_channels=64, kernel_size=3)

      self.multi_head = MultiHeadAttention(in_channels=64, head_num=4) # Multi-head Attention module

      self.conv7 = ConvWithMaxDrop(in_channels=64, out_channels=32, kernel_size=3)

      self.fc1 = nn.Linear(25088, 256)


      self.fc2 = nn.Linear(256, 5)

    def forward(self, x):
      x = self.conv1(x)
      x = self.inception1(x)
      x = self.conv2(x)
      x = self.inception2(x)
      x = self.conv3(x)
      x = self.conv4(x)
      x = self.gcblock(x)
      x = self.conv5(x)
      x = self.conv6(x)
      x = self.multi_head(x)
      x = self.conv7(x)
      x = x.reshape(x.shape[0], -1)
      x = F.relu(self.fc1(x))
      x = F.softmax(self.fc2(x))
      return x


## Building the Model

In [23]:
if torch.cuda.is_available():
  torch.backends.cudnn.deterministic = True

In [24]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda', index=0)

In [25]:
model = ProposedModel().to(device) #building the proposed network architecture

In [26]:
model.eval() # displays the proposed network architecture

ProposedModel(
  (conv1): ConvBlock(
    (simple_conv): Sequential(
      (0): Conv2d(3, 8, kernel_size=(3, 3), stride=(1, 1))
      (1): ReLU()
      (2): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (inception1): InceptionBlock(
    (branch1): ConvBlock(
      (simple_conv): Sequential(
        (0): Conv2d(8, 8, kernel_size=(1, 1), stride=(1, 1))
        (1): ReLU()
        (2): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (branch2): Sequential(
      (0): ConvBlock(
        (simple_conv): Sequential(
          (0): Conv2d(8, 8, kernel_size=(1, 1), stride=(1, 1))
          (1): ReLU()
          (2): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
      )
      (1): ConvBlock(
        (simple_conv): Sequential(
          (0): Conv2d(8, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
          (1): ReLU()
          (2): BatchNorm2d(8, eps=1e-0

In [98]:
summary(model,(3,256,256)) #proposed model summary of input size (256,256,3)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [-1, 8, 254, 254]             224
              ReLU-2          [-1, 8, 254, 254]               0
       BatchNorm2d-3          [-1, 8, 254, 254]              16
         ConvBlock-4          [-1, 8, 254, 254]               0
            Conv2d-5          [-1, 8, 254, 254]              72
              ReLU-6          [-1, 8, 254, 254]               0
       BatchNorm2d-7          [-1, 8, 254, 254]              16
         ConvBlock-8          [-1, 8, 254, 254]               0
            Conv2d-9          [-1, 8, 254, 254]              72
             ReLU-10          [-1, 8, 254, 254]               0
      BatchNorm2d-11          [-1, 8, 254, 254]              16
        ConvBlock-12          [-1, 8, 254, 254]               0
           Conv2d-13          [-1, 8, 254, 254]             584
             ReLU-14          [-1, 8, 2

  x = F.softmax(self.fc2(x))


## Model training

In [27]:
num_epochs = 10 # train for 10 epochs and save the model weights
LEARNING_RATE = 0.0001
WEIGHT_DECAY = 0.0001

In [28]:
optimizer=optim.Adam(model.parameters(),lr=LEARNING_RATE,weight_decay=WEIGHT_DECAY) #model compiled with optimization algorithm and loss function
loss_function=nn.CrossEntropyLoss()

In [29]:
PATH = '/content/gdrive/MyDrive/proposed_model_size256_final.pt'

### loading model weights and other checkpoint parameters

In [30]:
checkpoint = torch.load(PATH, map_location=torch.device('cpu'))
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
train_loss = checkpoint['train_loss']
valid_loss = checkpoint['valid_loss']
train_accuracy = checkpoint['train_accuracy']
valid_accuracy = checkpoint['valid_accuracy']
epochs_completed = checkpoint['epochs_completed']

In [31]:
print(train_loss, valid_loss, train_accuracy, valid_accuracy)

0.05702671682687179 0.05770041843945335 0.9955312612416418 0.9866077561529295


In [32]:
print(epochs_completed)

120


In [33]:
from torch.utils.tensorboard import SummaryWriter # to store accuracy and loss values in tensorboard
writer = SummaryWriter()

In [34]:
!nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2022 NVIDIA Corporation
Built on Wed_Sep_21_10:33:58_PDT_2022
Cuda compilation tools, release 11.8, V11.8.89
Build cuda_11.8.r11.8/compiler.31833905_0


In [47]:
for epoch in range(num_epochs): #model training

    #Evaluation and training on training dataset
    model.train()
    print("\nrunning epoch " + str(epoch))
    train_accuracy=0.0
    train_loss=0.0

    for i, (images,labels) in enumerate(train_loader):
        print("running batch " + str(i), end=" ")
        if torch.cuda.is_available():
            images=Variable(images.cuda())
            labels=Variable(labels.cuda())

        optimizer.zero_grad()

        outputs=model(images)

        loss=loss_function(outputs,labels)
        loss.backward()
        optimizer.step()



        train_loss += loss.item()
        _,prediction=torch.max(outputs.data,1)

        train_accuracy+=int(torch.sum(prediction==labels.data))

    train_accuracy=train_accuracy/train_count
    train_loss=train_loss/train_count


    # Evaluation on testing dataset
    model.eval()

    valid_accuracy = 0.0
    valid_loss = 0.0

    for i, (images,labels) in enumerate(val_loader):

        if torch.cuda.is_available():
          images=Variable(images.cuda())
          labels=Variable(labels.cuda())

        outputs=model(images)

        loss = loss_function (outputs, labels)
        _,prediction=torch.max(outputs.data,1)
        valid_accuracy += int(torch.sum(prediction == labels.data))

        valid_loss+= loss.item()


        valid_accuracy = valid_accuracy/val_count
        valid_loss = valid_loss/val_count
    writer.add_scalar("Training Loss/epoch", train_loss, epoch)
    writer.add_scalar("Validation Loss/epoch", valid_loss, epoch)
    writer.add_scalar("Training Accuracy/epoch", train_accuracy, epoch)
    writer.add_scalar("Validation Accuracy/epoch", valid_accuracy, epoch)



    torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'train_loss': train_loss,
            'valid_loss': valid_loss,
            'train_accuracy': train_accuracy,
            'valid_accuracy': valid_accuracy,
            'epochs_completed': epochs_completed + epoch + 1
            }, PATH)

    print('\nEpoch: %d Train Loss: %.6f Train Accuracy: %.6f Validation Loss: %.6f  Validation Accuracy: %.6f' % (epoch, train_loss, train_accuracy, valid_loss, valid_accuracy))

In [None]:
writer.flush() #flushing the remaining stored values in writer object and closing the writer object
writer.close()

### TensorBoard

In [None]:
!pip install tensorboard #tensorboard

In [None]:
%load_ext tensorboard

In [None]:
%tensorboard --logdir runs #displays accuracy and loss vs epoch graphs in tensorboard