In [1]:
# Import packages

import numpy as np    
import matplotlib.pyplot as plt       
from torchvision.transforms import transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
import torch
import torch.nn as nn
import torchvision
import torch.nn.functional as F
from fastai.vision.all import *
set_seed(42, reproducible= True)
source = untar_data(URLs.IMAGENETTE)

In [6]:
# Load and resize data

# check learning rate decay, improve architecture (recent research papers)

classes = ("Tench", "English Springer", "Cassette Player", "Chain Saw", "Church", "French Horn", "Garbage Truck", "Gas Pump", "Golf Ball", "Parachute")

def load_data():
  train = source/"train"
  val = source/"val"

  train_dataset = ImageFolder(
    train,
    transforms.Compose([
        transforms.Resize(128),
        transforms.RandomCrop(128), 
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4663, 0.4590, 0.4300), (0.2764, 0.2721, 0.2951)),
        transforms.RandomErasing()
    ]))

  val_dataset = ImageFolder(
    val,
    transforms.Compose([
        transforms.Resize(128),
        transforms.RandomCrop(128), 
        transforms.ToTensor(),
        transforms.Normalize((0.4663, 0.4590, 0.4300), (0.2764, 0.2721, 0.2951))
    ]))

  train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
  val_dataloader = DataLoader(val_dataset, batch_size=32)
  
  return train_dataloader, val_dataloader

In [None]:
def get_mean_std(dl):
    sum_, squared_sum, batches = 0,0,0
    for data, _ in dl:
        sum_ += torch.mean(data,dim=([0,2,3]))
        squared_sum += torch.mean(data**2, dim=([0,2,3]))
        batches += 1
    mean = sum_/batches
    std = (squared_sum/batches-mean**2)**0.5
    return mean,std

train_dl, test_dl = load_data()
mean, std = get_mean_std(train_dl)
print(mean, std)

tensor([-0.0002,  0.0017,  0.0015]) tensor([0.9574, 0.9580, 0.9581])


In [3]:
"""
Taken from https://github.com/digantamisra98/Mish/tree/master/Mish/Torch
Applies the mish function element-wise:
mish(x) = x * tanh(softplus(x)) = x * tanh(ln(1 + exp(x)))
"""

class Mish(nn.Module):
    """
    Applies the mish function element-wise:
    mish(x) = x * tanh(softplus(x)) = x * tanh(ln(1 + exp(x)))
    Shape:
        - Input: (N, *) where * means, any number of additional
          dimensions
        - Output: (N, *), same shape as the input
    Examples:
        >>> m = Mish()
        >>> input = torch.randn(2)
        >>> output = m(input)
    Reference: https://pytorch.org/docs/stable/generated/torch.nn.Mish.html
    """

    def __init__(self):
        """
        Init method.
        """
        super().__init__()

    def forward(self, input):
        """
        Forward pass of the function.
        """
        if torch.__version__ >= "1.9":
            return F.mish(input)
        else:
            return input * torch.tanh(F.softplus(input))

In [None]:
# Define a convolution neural network (simple)
class Network(nn.Module):
  def __init__(self):
    super(Network, self).__init__()
        
    self.conv1 = nn.Conv2d(in_channels=3, out_channels=12, kernel_size=5, stride=1, padding=1)
    self.bn1 = nn.BatchNorm2d(12)
    self.conv2 = nn.Conv2d(in_channels=12, out_channels=12, kernel_size=5, stride=1, padding=1)
    self.bn2 = nn.BatchNorm2d(12)
    self.pool = nn.MaxPool2d(2,2)
    self.conv4 = nn.Conv2d(in_channels=12, out_channels=24, kernel_size=5, stride=1, padding=1)
    self.bn4 = nn.BatchNorm2d(24)
    self.conv5 = nn.Conv2d(in_channels=24, out_channels=24, kernel_size=5, stride=1, padding=1)
    self.bn5 = nn.BatchNorm2d(24)
    self.conv6 = nn.Conv2d(in_channels=24, out_channels=32, kernel_size=5, stride=1, padding=1)
    self.bn6 = nn.BatchNorm2d(32)
    self.conv7 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, stride=1, padding=1)
    self.fc1 = nn.Linear(64*25*25, 128)
    self.fc2 = nn.Linear(128, 10)
  
  def forward(self, input):
    m = Mish()
    output = m(self.bn1(self.conv1(input.cuda())))      
    output = m(self.bn2(self.conv2(output)))     
    output = self.pool(output)                        
    output = m(self.bn4(self.conv4(output)))     
    output = m(self.bn5(self.conv5(output)))     
    output = self.pool(output)
    output = m(self.bn6(self.conv6(output)))
    output = m(self.conv7(output))
    output = output.view(-1, 64*25*25)
    output = m(self.fc1(output))
    output = self.fc2(output)
    output = F.log_softmax(output, dim=1)

    return output

# Instantiate a neural network model 
model2 = Network()

In [4]:
""" GoogLeNet From pytorch official website """
from collections import namedtuple
from typing import Optional, Tuple, List, Callable, Any
from torch import Tensor


class GoogLeNet(nn.Module):

    def __init__(
        self,
        num_classes: int = 1000,
        transform_input: bool = False,
        init_weights: Optional[bool] = None,
        blocks: Optional[List[Callable[..., nn.Module]]] = None,
        dropout: float = 0.2
    ) -> None:
        super().__init__()
        if blocks is None:
            blocks = [BasicConv2d, Inception]
        if init_weights is None:
            init_weights = True
        assert len(blocks) == 2
        conv_block = blocks[0]
        inception_block = blocks[1]

        self.transform_input = transform_input

        self.conv1 = conv_block(3, 64, kernel_size=7, stride=2, padding=3)
        self.maxpool1 = nn.MaxPool2d(3, stride=2, ceil_mode=True)
        self.conv2 = conv_block(64, 64, kernel_size=1)
        self.conv3 = conv_block(64, 192, kernel_size=3, padding=1)
        self.maxpool2 = nn.MaxPool2d(3, stride=2, ceil_mode=True)

        self.inception3a = inception_block(192, 64, 96, 128, 16, 32, 32)
        self.inception3b = inception_block(256, 128, 128, 192, 32, 96, 64)
        self.maxpool3 = nn.MaxPool2d(3, stride=2, ceil_mode=True)

        self.inception4a = inception_block(480, 192, 96, 208, 16, 48, 64)
        self.inception4b = inception_block(512, 160, 112, 224, 24, 64, 64)
        self.inception4c = inception_block(512, 128, 128, 256, 24, 64, 64)
        self.inception4d = inception_block(512, 112, 144, 288, 32, 64, 64)
        self.inception4e = inception_block(528, 256, 160, 320, 32, 128, 128)
        self.maxpool4 = nn.MaxPool2d(2, stride=2, ceil_mode=True)

        self.inception5a = inception_block(832, 256, 160, 320, 32, 128, 128)
        self.inception5b = inception_block(832, 384, 192, 384, 48, 128, 128)

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.dropout = nn.Dropout(p=dropout)
        self.fc = nn.Linear(1024, num_classes)

        if init_weights:
            for m in self.modules():
                if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
                    torch.nn.init.trunc_normal_(m.weight, mean=0.0, std=0.01, a=-2, b=2)
                elif isinstance(m, nn.BatchNorm2d):
                    nn.init.constant_(m.weight, 1)
                    nn.init.constant_(m.bias, 0)

    def _transform_input(self, x: Tensor) -> Tensor:
        if self.transform_input:
            x_ch0 = torch.unsqueeze(x[:, 0], 1) * (0.229 / 0.5) + (0.485 - 0.5) / 0.5
            x_ch1 = torch.unsqueeze(x[:, 1], 1) * (0.224 / 0.5) + (0.456 - 0.5) / 0.5
            x_ch2 = torch.unsqueeze(x[:, 2], 1) * (0.225 / 0.5) + (0.406 - 0.5) / 0.5
            x = torch.cat((x_ch0, x_ch1, x_ch2), 1)
        return x

    def forward(self, x: Tensor) -> Tensor:
        # N x 3 x 224 x 224
        x = self.conv1(x)
        # N x 64 x 112 x 112
        x = self.maxpool1(x)
        # N x 64 x 56 x 56
        x = self.conv2(x)
        # N x 64 x 56 x 56
        x = self.conv3(x)
        # N x 192 x 56 x 56
        x = self.maxpool2(x)

        # N x 192 x 28 x 28
        x = self.inception3a(x)
        # N x 256 x 28 x 28
        x = self.inception3b(x)
        # N x 480 x 28 x 28
        x = self.maxpool3(x)
        # N x 480 x 14 x 14
        x = self.inception4a(x)
        # N x 512 x 14 x 14

        x = self.inception4b(x)
        # N x 512 x 14 x 14
        x = self.inception4c(x)
        # N x 512 x 14 x 14
        x = self.inception4d(x)
        # N x 528 x 14 x 14
        x = self.inception4e(x)
        # N x 832 x 14 x 14
        x = self.maxpool4(x)
        # N x 832 x 7 x 7
        x = self.inception5a(x)
        # N x 832 x 7 x 7
        x = self.inception5b(x)
        # N x 1024 x 7 x 7

        x = self.avgpool(x)
        # N x 1024 x 1 x 1
        x = torch.flatten(x, 1)
        # N x 1024
        x = self.dropout(x)
        x = self.fc(x)
        # N x 1000 (num_classes)
        return x

class Inception(nn.Module):
    def __init__(
        self,
        in_channels: int,
        ch1x1: int,
        ch3x3red: int,
        ch3x3: int,
        ch5x5red: int,
        ch5x5: int,
        pool_proj: int,
        conv_block: Optional[Callable[..., nn.Module]] = None,
    ) -> None:
        super().__init__()
        if conv_block is None:
            conv_block = BasicConv2d
        self.branch1 = conv_block(in_channels, ch1x1, kernel_size=1)

        self.branch2 = nn.Sequential(
            conv_block(in_channels, ch3x3red, kernel_size=1), conv_block(ch3x3red, ch3x3, kernel_size=3, padding=1)
        )

        self.branch3 = nn.Sequential(
            conv_block(in_channels, ch5x5red, kernel_size=1),
            # Here, kernel_size=3 instead of kernel_size=5 is a known bug.
            # Please see https://github.com/pytorch/vision/issues/906 for details.
            conv_block(ch5x5red, ch5x5, kernel_size=3, padding=1),
        )

        self.branch4 = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1, ceil_mode=True),
            conv_block(in_channels, pool_proj, kernel_size=1),
        )

    def _forward(self, x: Tensor) -> List[Tensor]:
        branch1 = self.branch1(x)
        branch2 = self.branch2(x)
        branch3 = self.branch3(x)
        branch4 = self.branch4(x)

        outputs = [branch1, branch2, branch3, branch4]
        return outputs

    def forward(self, x: Tensor) -> Tensor:
        outputs = self._forward(x)
        return torch.cat(outputs, 1)

class BasicConv2d(nn.Module):
    def __init__(self, in_channels: int, out_channels: int, **kwargs: Any) -> None:
        super().__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, bias=False, **kwargs)
        self.bn = nn.BatchNorm2d(out_channels, eps=0.001)

    def forward(self, x: Tensor) -> Tensor:
        m = Mish()
        x = self.conv(x)
        x = self.bn(x)
        return m(x)

model = GoogLeNet()

In [54]:
""" 
    ResNet50D based on 'Bag of Tricks for Image Classification with Convolutional Neural Networks' 
    (https://openaccess.thecvf.com/content_CVPR_2019/papers/He_Bag_of_Tricks_for_Image_Classification_with_Convolutional_Neural_Networks_CVPR_2019_paper.pdf) 
    replacing relu with mish
    modified from https://github.com/JayPatwardhan/ResNet-PyTorch/blob/master/ResNet/ResNet.py
"""
class Bottleneck(nn.Module):
    expansion = 4
    def __init__(self, in_channels, out_channels, i_downsample=None, stride=1):
        super(Bottleneck, self).__init__()
        
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0)
        self.batch_norm1 = nn.BatchNorm2d(out_channels)
        
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=stride, padding=1)
        self.batch_norm2 = nn.BatchNorm2d(out_channels)
        
        self.conv3 = nn.Conv2d(out_channels, out_channels*self.expansion, kernel_size=1, stride=1, padding=0)
        self.batch_norm3 = nn.BatchNorm2d(out_channels*self.expansion)
        
        self.i_downsample = i_downsample
        self.stride = stride
        
    def forward(self, x):
        identity = x.clone()
        m = Mish()
        x = m(self.batch_norm1(self.conv1(x)))
        
        x = m(self.batch_norm2(self.conv2(x)))
        
        x = self.conv3(x)
        x = self.batch_norm3(x)
        
        if self.i_downsample is not None:
            identity = self.i_downsample(identity)
        
        x += identity
        x = m(x)
        
        return x

class ResNet50D(nn.Module):
    def __init__(self, block):
        super(ResNet50D, self).__init__()
        self.in_channels = 64
        self.conv1 = nn.Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        self.batch_norm1 = nn.BatchNorm2d(64)
        self.max_pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        
        self.layer1 = self._make_layer(block, 3, planes=64)
        self.layer2 = self._make_layer(block, 4, planes=128, stride=2)
        self.layer3 = self._make_layer(block, 6, planes=256, stride=2)
        self.layer4 = self._make_layer(block, 3, planes=512, stride=2)
        
        self.avgpool = nn.AdaptiveAvgPool2d((1,1))
        self.fc = nn.Linear(512*block.expansion, 10)
        
    def forward(self, x):
        m = Mish()
        x = m(self.batch_norm1(self.conv1(x)))
        x = self.max_pool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        
        x = self.avgpool(x)
        x = x.reshape(x.shape[0], -1)
        x = self.fc(x)
        
        return x
        
    def _make_layer(self, ResBlock, blocks, planes, stride=1):
        ii_downsample = None
        layers = []
        
        if stride != 1:
          ii_downsample = nn.Sequential(
                nn.AvgPool2d(2, stride=2),
                nn.Conv2d(self.in_channels, planes*ResBlock.expansion, kernel_size=1, stride=1),
                nn.BatchNorm2d(planes*ResBlock.expansion)
            )
        elif self.in_channels != planes*ResBlock.expansion:
            ii_downsample = nn.Sequential(
                nn.Conv2d(self.in_channels, planes*ResBlock.expansion, kernel_size=1, stride=stride),
                nn.BatchNorm2d(planes*ResBlock.expansion)
            )
            
        layers.append(ResBlock(self.in_channels, planes, i_downsample=ii_downsample, stride=stride))
        self.in_channels = planes*ResBlock.expansion
        
        for i in range(blocks-1):
            layers.append(ResBlock(self.in_channels, planes))
            
        return nn.Sequential(*layers)

In [10]:
from torch.autograd import Variable
from torch.optim import Adam

# Function to save the model
def saveModel(cnn, p):
    path = "./"+p
    torch.save(cnn.state_dict(), path)

# Function to test the model with the test dataset and print the accuracy for the test images
def testAccuracy(cnn, device):
    
    cnn.eval()
    accuracy = 0.0
    total = 0.0
    
    with torch.no_grad():
        for data in test_loader:
            images, labels = data
            images = Variable(images.to(device))
            labels = Variable(labels.to(device))
            # run the model on the test set to predict labels
            outputs = cnn(images)
            # the label with the highest energy will be our prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            accuracy += (predicted == labels).sum().item()
    
    # compute the accuracy over all test images
    accuracy = (100 * accuracy / total)
    return(accuracy)

def trainAccuracy(cnn, device):

    cnn.eval()
    accuracy = 0.0
    total = 0.0
    
    with torch.no_grad():
        for data in train_loader:
            images, labels = data
            images = Variable(images.to(device))
            labels = Variable(labels.to(device))
            # run the model on the test set to predict labels
            outputs = cnn(images)
            # the label with the highest energy will be our prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            accuracy += (predicted == labels).sum().item()
    
    # compute the accuracy over all test images
    accuracy = (100 * accuracy / total)
    return(accuracy)


# Training function. We simply have to loop over our data iterator and feed the inputs to the network and optimize.
def train(cnn, num_epochs, path):
    
    best_accuracy = 0.0

    # Define your execution device
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("The model will be running on", device, "device")
    # Convert model parameters and buffers to CPU or Cuda
    cnn.to(device)
 
    # Define the loss function with Classification Cross-Entropy loss and an optimizer with Adam optimizer
    loss_fn = nn.CrossEntropyLoss()
    optimizer = Adam(cnn.parameters(), lr=0.001, weight_decay=0.0001)

    all_accuracy = []

    for epoch in range(num_epochs):  # loop over the dataset multiple times
        running_loss = 0.0

        for i, (images, labels) in enumerate(train_loader, 0):
            # get the inputs
            images = Variable(images.to(device))
            labels = Variable(labels.to(device))

            # zero the parameter gradients
            optimizer.zero_grad()
            # predict classes using images from the training set
            outputs = cnn(images)
            # compute the loss based on model output and real labels
            loss = loss_fn(outputs, labels)
            # backpropagate the loss
            loss.backward()
            # adjust parameters based on the calculated gradients
            optimizer.step()

            # Let's print statistics for every 1000 images
            running_loss += loss.item()     # extract the loss value
            if i % 1000 == 999:    
                # print every 50 (twice per epoch) 
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / 1000))
                # zero the loss
                running_loss = 0.0

        # Compute and print the average accuracy for this epoch when tested over all test images
        accuracy = testAccuracy(cnn, device)
        train_accuracy = trainAccuracy(cnn, device)
        all_accuracy.append(accuracy)
        print('For epoch', epoch+1,'the train accuracy is %d %%' % (train_accuracy), 'the test accuracy over the whole test set is %d %%' % (accuracy))
        
        # we want to save the model if the accuracy is the best
        if accuracy > best_accuracy:
            saveModel(cnn, path)
            best_accuracy = accuracy

In [None]:
# Function to show the images
def imageshow(img):
    img = img / 2 + 0.5     # unnormalize
    if torch.cuda.is_available():
      npimg = img.cpu().numpy()
    else:
      npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()


# Function to test the model with a batch of images and show the labels predictions
def testBatch(batch_size):
    # get batch of images from the test DataLoader  
    images, labels = next(iter(test_loader))
    if torch.cuda.is_available():
      images = images.cuda()
      labels = labels.cuda()

    # show all images as one image grid
    imageshow(torchvision.utils.make_grid(images))
   
    # Show the real labels on the screen 
    print('Real labels: ', ', '.join('%5s' % classes[labels[j]] 
                               for j in range(batch_size)))
  
    # Let's see what if the model identifiers the  labels of those example
    outputs = model(images)
    
    # We got the probability for every 10 labels. The highest (max) probability should be correct label
    _, predicted = torch.max(outputs, 1)
    
    # Let's show the predicted labels on the screen to compare with the real ones
    print('Predicted: ', ', '.join('%5s' % classes[predicted[j]] 
                              for j in range(batch_size)))

def testClassess(batch_size, number_of_labels):
    class_correct = list(0. for i in range(number_of_labels))
    class_total = list(0. for i in range(number_of_labels))
    with torch.no_grad():
        for data in test_loader:
            images, labels = data
            if torch.cuda.is_available():
              images = images.cuda()
              labels = labels.cuda()
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            c = (predicted == labels).squeeze()
            for i in range(batch_size):
                label = labels[i]
                class_correct[label] += c[i].item()
                class_total[label] += 1

    for i in range(number_of_labels):
        print('Accuracy of %5s : %2d %%' % (
            classes[i], 100 * class_correct[i] / class_total[i]))

In [8]:
# Let's build our model
train_loader, test_loader = load_data()

In [11]:
model = GoogLeNet()

train(model, 80, 'classifier_GoogLeNet.pth')

The model will be running on cuda:0 device
For epoch 1 the train accuracy is 41 % the test accuracy over the whole test set is 40 %
For epoch 2 the train accuracy is 36 % the test accuracy over the whole test set is 35 %
For epoch 3 the train accuracy is 41 % the test accuracy over the whole test set is 41 %
For epoch 4 the train accuracy is 43 % the test accuracy over the whole test set is 42 %
For epoch 5 the train accuracy is 52 % the test accuracy over the whole test set is 52 %
For epoch 6 the train accuracy is 52 % the test accuracy over the whole test set is 50 %
For epoch 7 the train accuracy is 57 % the test accuracy over the whole test set is 58 %
For epoch 8 the train accuracy is 60 % the test accuracy over the whole test set is 60 %
For epoch 9 the train accuracy is 65 % the test accuracy over the whole test set is 65 %
For epoch 10 the train accuracy is 61 % the test accuracy over the whole test set is 61 %
For epoch 11 the train accuracy is 64 % the test accuracy over the

KeyboardInterrupt: ignored

In [None]:
model = ResNet50D(Bottleneck)

train(model, 80, 'classifier_ResNet50D.pth')

The model will be running on cuda:0 device
For epoch 1 the train accuracy is 40 % the test accuracy over the whole test set is 40 %
For epoch 2 the train accuracy is 42 % the test accuracy over the whole test set is 41 %
For epoch 3 the train accuracy is 43 % the test accuracy over the whole test set is 42 %
For epoch 4 the train accuracy is 51 % the test accuracy over the whole test set is 52 %
For epoch 5 the train accuracy is 59 % the test accuracy over the whole test set is 60 %
For epoch 6 the train accuracy is 61 % the test accuracy over the whole test set is 62 %
For epoch 7 the train accuracy is 65 % the test accuracy over the whole test set is 65 %
For epoch 8 the train accuracy is 68 % the test accuracy over the whole test set is 69 %
For epoch 9 the train accuracy is 67 % the test accuracy over the whole test set is 66 %
For epoch 10 the train accuracy is 66 % the test accuracy over the whole test set is 65 %
For epoch 11 the train accuracy is 71 % the test accuracy over the

In [None]:
train(20) # epochs 40 - 60 for GoogLeNet

The model will be running on cuda:0 device
For epoch 1 the train accuracy is 82 % the test accuracy over the whole test set is 77 %
For epoch 2 the train accuracy is 85 % the test accuracy over the whole test set is 79 %
For epoch 3 the train accuracy is 86 % the test accuracy over the whole test set is 80 %
For epoch 4 the train accuracy is 86 % the test accuracy over the whole test set is 80 %
For epoch 5 the train accuracy is 86 % the test accuracy over the whole test set is 80 %
For epoch 6 the train accuracy is 85 % the test accuracy over the whole test set is 79 %
For epoch 7 the train accuracy is 85 % the test accuracy over the whole test set is 79 %
For epoch 8 the train accuracy is 87 % the test accuracy over the whole test set is 80 %
For epoch 9 the train accuracy is 88 % the test accuracy over the whole test set is 80 %
For epoch 10 the train accuracy is 87 % the test accuracy over the whole test set is 80 %
For epoch 11 the train accuracy is 88 % the test accuracy over the

In [None]:
from google.colab import drive
drive.mount('/content/drive')