In [31]:
#Load libraries
import os
import numpy as np
import torch
import glob
import torch.nn as nn
from torchvision.transforms import transforms
from torch.utils.data import DataLoader
from torch.optim import Adam
from torch.autograd import Variable
import torchvision
import pathlib
import math
import torch.nn.functional as F

In [32]:
#checking for device
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [33]:
print(device)

cuda


In [34]:
## Hyperparameters
batch_size_ = 36
num_epochs = 35
size = 224

In [35]:
#Transforms
transformer=transforms.Compose([
    transforms.Resize((size,size)),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.8),
    # transforms.RandomRotation(degrees=45),
    transforms.ToTensor(),  #0-255 to 0-1, numpy to tensors
    transforms.Normalize([0.5,0.5,0.5], # 0-1 to [-1,1] , formula (x-mean)/std
                        [0.5,0.5,0.5])
])


In [36]:
#Dataloader

#Path for training and testing directory
train_path='Dataset\\train'
test_path='Dataset\\val'

train_loader=DataLoader(
    torchvision.datasets.ImageFolder(train_path,transform=transformer),
    batch_size=batch_size_, shuffle=True
)
test_loader=DataLoader(
    torchvision.datasets.ImageFolder(test_path,transform=transformer),
    batch_size=batch_size_, shuffle=True
)

In [37]:
#categories
root=pathlib.Path(train_path)
classes=sorted([j.name.split('/')[-1] for j in root.iterdir()])

In [38]:
print(classes)

['basketball_court', 'bridge', 'crosswalk', 'golf_course', 'oil_well', 'overpass', 'railway', 'runway', 'swimming_pool', 'tennis_court']


In [39]:
#CNN Network


class ConvNet(nn.Module):
    def __init__(self,num_classes):
        super(ConvNet,self).__init__()
        
        #Output size after convolution filter
        #((w-f+2P)/s) +1
        
        #Input shape= (256,3,256,256)
        
        self.conv1=nn.Conv2d(in_channels=3,out_channels=12,kernel_size=3,stride=1,padding=1)
        self.bn1=nn.BatchNorm2d(num_features=12)
        self.relu1=nn.ReLU()
        
        self.pool1=nn.MaxPool2d(kernel_size=2)
        
        self.conv2=nn.Conv2d(in_channels=12,out_channels=20,kernel_size=3,stride=1,padding=1)
        self.relu2=nn.ReLU()
        
        
        
        self.conv3=nn.Conv2d(in_channels=20,out_channels=32,kernel_size=3,stride=1,padding=1)
        self.bn3=nn.BatchNorm2d(num_features=32)
        self.relu3=nn.ReLU()
        #Shape= (batch_size,32,128,128)

        self.pool2 = nn.MaxPool2d(kernel_size=2)

        self.conv4 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.bn4 = nn.BatchNorm2d(num_features=64)
        self.relu4 = nn.ReLU()
        # Shape = (batch_size,64,64,64)

        self.conv5 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
        self.bn5 = nn.BatchNorm2d(num_features=128)
        self.relu5 = nn.ReLU()
        # Shape = (batch_size,128,64,64)

        
        self.fc=nn.Linear(in_features=128*64*64 ,out_features=num_classes)
        
        
        
        #Feed forwad function
        
    def forward(self,input):
        output=self.conv1(input)
        output=self.bn1(output)
        output=self.relu1(output)
            
        output=self.pool1(output)
            
        output=self.conv2(output)
        output=self.relu2(output)
            
        output=self.conv3(output)
        output=self.bn3(output)
        output=self.relu3(output)
        
        output=self.pool2(output)

        output=self.conv4(output)
        output=self.bn4(output)
        output=self.relu4(output)

        output=self.conv5(output)
        output=self.bn5(output)
        output=self.relu5(output)

            
        #Above output will be in matrix form, with shape (256,128,64,64)
        
        output=output.view(-1,128*64*64)
            
            
        output=self.fc(output)
            
        return output
            
        


In [40]:
## VGG Network

VGG_types = {
    "MyConv": [16,"M",32,32,"M",64,64,"M",128,128,"M"],
    "MyVGG" : [32,32,"M",64,64,"M",128,128,"M",256,256,"M"],
    "VGGmod":[16, "M", 32, "M", 64,64, "M", 128,128, "M", 256, 256, "M",256,256, "M"],   
    "VGG11": [64, "M", 128, "M", 256, 256, "M", 512, 512, "M", 512, 512, "M"],
    "VGG13": [64, 64, "M", 128, 128, "M", 256, 256, "M", 512, 512, "M", 512, 512, "M"],
    "VGG16": [64,64,"M",128,128,"M",256,256,256,"M",512,512,512,"M",512,512,512,"M"],
    "VGG19": [64,64,"M",128,128,"M",256,256,256,256,"M",512,512,512,512,"M",512,512,512,512,"M"],
}


class VGG_net(nn.Module):
    def __init__(self, in_channels, num_classes, type="VGG16"):
        super(VGG_net, self).__init__()
        self.in_channels = in_channels
        self.conv_layers = self.create_conv_layers(VGG_types[type])

        self.fcs = nn.Sequential(
            nn.Linear(512*7*7, num_classes),
            # nn.ReLU(),
            # nn.Dropout(p=0.5),
            # nn.Linear(4096, num_classes),
            # nn.ReLU(),
            # nn.Dropout(p=0.5),
            # nn.Linear(128, num_classes),
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = x.reshape(x.shape[0], -1)
        x = self.fcs(x)
        return x

    def create_conv_layers(self, architecture):
        layers = []
        in_channels = self.in_channels

        for x in architecture:
            if type(x) == int:
                out_channels = x

                layers += [
                    nn.Conv2d(
                        in_channels=in_channels,
                        out_channels=out_channels,
                        kernel_size=(3, 3),
                        stride=(1, 1),
                        padding=(1, 1),
                    ),
                    nn.BatchNorm2d(x),
                    nn.ReLU(),
                ]
                in_channels = x
            elif x == "M":
                layers += [nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))]

        return nn.Sequential(*layers)

In [41]:
## Inception Net

class GoogLeNet(nn.Module):
    def __init__(self, aux_logits=True, num_classes=10):
        super(GoogLeNet, self).__init__()
        assert aux_logits == True or aux_logits == False
        self.aux_logits = aux_logits

        # Write in_channels, etc, all explicit in self.conv1, rest will write to
        # make everything as compact as possible, kernel_size=3 instead of (3,3)
        self.conv1 = conv_block(
            in_channels=3,
            out_channels=64,
            kernel_size=(7, 7),
            stride=(2, 2),
            padding=(3, 3),
        )

        self.maxpool1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.conv2 = conv_block(64, 192, kernel_size=3, stride=1, padding=1)
        self.maxpool2 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        # In this order: in_channels, out_1x1, red_3x3, out_3x3, red_5x5, out_5x5, out_1x1pool
        self.inception3a = Inception_block(192, 64, 96, 128, 16, 32, 32)
        self.inception3b = Inception_block(256, 128, 128, 192, 32, 96, 64)
        self.maxpool3 = nn.MaxPool2d(kernel_size=(3, 3), stride=2, padding=1)

        self.inception4a = Inception_block(480, 192, 96, 208, 16, 48, 64)
        self.inception4b = Inception_block(512, 160, 112, 224, 24, 64, 64)
        self.inception4c = Inception_block(512, 128, 128, 256, 24, 64, 64)
        self.inception4d = Inception_block(512, 112, 144, 288, 32, 64, 64)
        self.inception4e = Inception_block(528, 256, 160, 320, 32, 128, 128)
        self.maxpool4 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.inception5a = Inception_block(832, 256, 160, 320, 32, 128, 128)
        self.inception5b = Inception_block(832, 384, 192, 384, 48, 128, 128)

        self.avgpool = nn.AvgPool2d(kernel_size=7, stride=1)
        self.dropout = nn.Dropout(p=0.4)
        self.fc1 = nn.Linear(1024, num_classes)

        if self.aux_logits:
            self.aux1 = InceptionAux(512, num_classes)
            self.aux2 = InceptionAux(528, num_classes)
        else:
            self.aux1 = self.aux2 = None

    def forward(self, x):
        x = self.conv1(x)
        x = self.maxpool1(x)
        x = self.conv2(x)
        # x = self.conv3(x)
        x = self.maxpool2(x)

        x = self.inception3a(x)
        x = self.inception3b(x)
        x = self.maxpool3(x)

        x = self.inception4a(x)

        # Auxiliary Softmax classifier 1
        if self.aux_logits and self.training:
            aux1 = self.aux1(x)

        x = self.inception4b(x)
        x = self.inception4c(x)
        x = self.inception4d(x)

        # Auxiliary Softmax classifier 2
        if self.aux_logits and self.training:
            aux2 = self.aux2(x)

        x = self.inception4e(x)
        x = self.maxpool4(x)
        x = self.inception5a(x)
        x = self.inception5b(x)
        x = self.avgpool(x)
        x = x.reshape(x.shape[0], -1)
        x = self.dropout(x)
        x = self.fc1(x)

        if self.aux_logits and self.training:
            return aux1, aux2, x
        else:
            return x


class Inception_block(nn.Module):
    def __init__(
        self, in_channels, out_1x1, red_3x3, out_3x3, red_5x5, out_5x5, out_1x1pool
    ):
        super(Inception_block, self).__init__()
        self.branch1 = conv_block(in_channels, out_1x1, kernel_size=(1, 1))

        self.branch2 = nn.Sequential(
            conv_block(in_channels, red_3x3, kernel_size=(1, 1)),
            conv_block(red_3x3, out_3x3, kernel_size=(3, 3), padding=(1, 1)),
        )

        self.branch3 = nn.Sequential(
            conv_block(in_channels, red_5x5, kernel_size=(1, 1)),
            conv_block(red_5x5, out_5x5, kernel_size=(5, 5), padding=(2, 2)),
        )

        self.branch4 = nn.Sequential(
            nn.MaxPool2d(kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            conv_block(in_channels, out_1x1pool, kernel_size=(1, 1)),
        )

    def forward(self, x):
        return torch.cat(
            [self.branch1(x), self.branch2(x), self.branch3(x), self.branch4(x)], 1
        )


class InceptionAux(nn.Module):
    def __init__(self, in_channels, num_classes):
        super(InceptionAux, self).__init__()
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.7)
        self.pool = nn.AvgPool2d(kernel_size=5, stride=3)
        self.conv = conv_block(in_channels, 128, kernel_size=1)
        self.fc1 = nn.Linear(2048, 1024)
        self.fc2 = nn.Linear(1024, num_classes)

    def forward(self, x):
        x = self.pool(x)
        x = self.conv(x)
        x = x.reshape(x.shape[0], -1)
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)

        return x


class conv_block(nn.Module):
    def __init__(self, in_channels, out_channels, **kwargs):
        super(conv_block, self).__init__()
        self.relu = nn.ReLU()
        self.conv = nn.Conv2d(in_channels, out_channels, **kwargs)
        self.batchnorm = nn.BatchNorm2d(out_channels)

    def forward(self, x):
        return self.relu(self.batchnorm(self.conv(x)))


In [42]:
## Resnet

class block(nn.Module):
    def __init__(
        self, in_channels, intermediate_channels, identity_downsample=None, stride=1
    ):
        super(block, self).__init__()
        self.expansion = 4
        self.conv1 = nn.Conv2d(
            in_channels, intermediate_channels, kernel_size=1, stride=1, padding=0, bias=False
        )
        self.bn1 = nn.BatchNorm2d(intermediate_channels)
        self.conv2 = nn.Conv2d(
            intermediate_channels,
            intermediate_channels,
            kernel_size=3,
            stride=stride,
            padding=1,
            bias=False
        )
        self.bn2 = nn.BatchNorm2d(intermediate_channels)
        self.conv3 = nn.Conv2d(
            intermediate_channels,
            intermediate_channels * self.expansion,
            kernel_size=1,
            stride=1,
            padding=0,
            bias=False
        )
        self.bn3 = nn.BatchNorm2d(intermediate_channels * self.expansion)
        self.relu = nn.ReLU()
        self.identity_downsample = identity_downsample
        self.stride = stride

    def forward(self, x):
        identity = x.clone()

        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu(x)
        x = self.conv3(x)
        x = self.bn3(x)

        if self.identity_downsample is not None:
            identity = self.identity_downsample(identity)

        x += identity
        x = self.relu(x)
        return x


class ResNet(nn.Module):
    def __init__(self, block, layers, image_channels, num_classes):
        super(ResNet, self).__init__()
        self.in_channels = 64
        self.conv1 = nn.Conv2d(image_channels, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        # Essentially the entire ResNet architecture are in these 4 lines below
        self.layer1 = self._make_layer(
            block, layers[0], intermediate_channels=64, stride=1
        )
        self.layer2 = self._make_layer(
            block, layers[1], intermediate_channels=128, stride=2
        )
        self.layer3 = self._make_layer(
            block, layers[2], intermediate_channels=256, stride=2
        )
        self.layer4 = self._make_layer(
            block, layers[3], intermediate_channels=512, stride=2
        )

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * 4, num_classes)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = x.reshape(x.shape[0], -1)
        x = self.fc(x)

        return x

    def _make_layer(self, block, num_residual_blocks, intermediate_channels, stride):
        identity_downsample = None
        layers = []

        # Either if we half the input space for ex, 56x56 -> 28x28 (stride=2), or channels changes
        # we need to adapt the Identity (skip connection) so it will be able to be added
        # to the layer that's ahead
        if stride != 1 or self.in_channels != intermediate_channels * 4:
            identity_downsample = nn.Sequential(
                nn.Conv2d(
                    self.in_channels,
                    intermediate_channels * 4,
                    kernel_size=1,
                    stride=stride,
                    bias=False
                ),
                nn.BatchNorm2d(intermediate_channels * 4),
            )

        layers.append(
            block(self.in_channels, intermediate_channels, identity_downsample, stride)
        )

        # The expansion size is always 4 for ResNet 50,101,152
        self.in_channels = intermediate_channels * 4

        # For example for first resnet layer: 256 will be mapped to 64 as intermediate layer,
        # then finally back to 256. Hence no identity downsample is needed, since stride = 1,
        # and also same amount of channels.
        for i in range(num_residual_blocks - 1):
            layers.append(block(self.in_channels, intermediate_channels))

        return nn.Sequential(*layers)


def ResNet50(img_channel=3, num_classes=1000):
    return ResNet(block, [3, 4, 6, 3], img_channel, num_classes)


def ResNet101(img_channel=3, num_classes=1000):
    return ResNet(block, [3, 4, 23, 3], img_channel, num_classes)


def ResNet152(img_channel=3, num_classes=1000):
    return ResNet(block, [3, 8, 36, 3], img_channel, num_classes)

In [43]:
## Efficient Net

from math import ceil

base_model = [
    # expand_ratio, channels, repeats, stride, kernel_size
    [1, 16, 1, 1, 3],
    [6, 24, 2, 2, 3],
    [6, 40, 2, 2, 5],
    [6, 80, 3, 2, 3],
    [6, 112, 3, 1, 5],
    [6, 192, 4, 2, 5],
    [6, 320, 1, 1, 3],
]

phi_values = {
    # tuple of: (phi_value, resolution, drop_rate)
    "b0": (0, 224, 0.2),  # alpha, beta, gamma, depth = alpha ** phi
    "b1": (0.5, 240, 0.2),
    "b2": (1, 260, 0.3),
    "b3": (2, 300, 0.3),
    "b4": (3, 380, 0.4),
    "b5": (4, 456, 0.4),
    "b6": (5, 528, 0.5),
    "b7": (6, 600, 0.5),
}

class CNNBlock(nn.Module):
    def __init__(
            self, in_channels, out_channels, kernel_size, stride, padding, groups=1
    ):
        super(CNNBlock, self).__init__()
        self.cnn = nn.Conv2d(
            in_channels,
            out_channels,
            kernel_size,
            stride,
            padding,
            groups=groups,
            bias=False,
        )
        self.bn = nn.BatchNorm2d(out_channels)
        self.silu = nn.SiLU() # SiLU <-> Swish

    def forward(self, x):
        return self.silu(self.bn(self.cnn(x)))

class SqueezeExcitation(nn.Module):
    def __init__(self, in_channels, reduced_dim):
        super(SqueezeExcitation, self).__init__()
        self.se = nn.Sequential(
            nn.AdaptiveAvgPool2d(1), # C x H x W -> C x 1 x 1
            nn.Conv2d(in_channels, reduced_dim, 1),
            nn.SiLU(),
            nn.Conv2d(reduced_dim, in_channels, 1),
            nn.Sigmoid(),
        )

    def forward(self, x):
        return x * self.se(x)

class InvertedResidualBlock(nn.Module):
    def __init__(
            self,
            in_channels,
            out_channels,
            kernel_size,
            stride,
            padding,
            expand_ratio,
            reduction=4, # squeeze excitation
            survival_prob=0.8, # for stochastic depth
    ):
        super(InvertedResidualBlock, self).__init__()
        self.survival_prob = 0.8
        self.use_residual = in_channels == out_channels and stride == 1
        hidden_dim = in_channels * expand_ratio
        self.expand = in_channels != hidden_dim
        reduced_dim = int(in_channels / reduction)

        if self.expand:
            self.expand_conv = CNNBlock(
                in_channels, hidden_dim, kernel_size=3, stride=1, padding=1,
            )

        self.conv = nn.Sequential(
            CNNBlock(
                hidden_dim, hidden_dim, kernel_size, stride, padding, groups=hidden_dim,
            ),
            SqueezeExcitation(hidden_dim, reduced_dim),
            nn.Conv2d(hidden_dim, out_channels, 1, bias=False),
            nn.BatchNorm2d(out_channels),
        )

    def stochastic_depth(self, x):
        if not self.training:
            return x

        binary_tensor = torch.rand(x.shape[0], 1, 1, 1, device=x.device) < self.survival_prob
        return torch.div(x, self.survival_prob) * binary_tensor

    def forward(self, inputs):
        x = self.expand_conv(inputs) if self.expand else inputs

        if self.use_residual:
            return self.stochastic_depth(self.conv(x)) + inputs
        else:
            return self.conv(x)


class EfficientNet(nn.Module):
    def __init__(self, version, num_classes):
        super(EfficientNet, self).__init__()
        width_factor, depth_factor, dropout_rate = self.calculate_factors(version)
        last_channels = ceil(1280 * width_factor)
        self.pool = nn.AdaptiveAvgPool2d(1)
        self.features = self.create_features(width_factor, depth_factor, last_channels)
        self.classifier = nn.Sequential(
            nn.Dropout(dropout_rate),
            nn.Linear(last_channels, num_classes),
        )

    def calculate_factors(self, version, alpha=1.2, beta=1.1):
        phi, res, drop_rate = phi_values[version]
        depth_factor = alpha ** phi
        width_factor = beta ** phi
        return width_factor, depth_factor, drop_rate

    def create_features(self, width_factor, depth_factor, last_channels):
        channels = int(32 * width_factor)
        features = [CNNBlock(3, channels, 3, stride=2, padding=1)]
        in_channels = channels

        for expand_ratio, channels, repeats, stride, kernel_size in base_model:
            out_channels = 4*ceil(int(channels*width_factor) / 4)
            layers_repeats = ceil(repeats * depth_factor)

            for layer in range(layers_repeats):
                features.append(
                    InvertedResidualBlock(
                        in_channels,
                        out_channels,
                        expand_ratio=expand_ratio,
                        stride = stride if layer == 0 else 1,
                        kernel_size=kernel_size,
                        padding=kernel_size//2, # if k=1:pad=0, k=3:pad=1, k=5:pad=2
                    )
                )
                in_channels = out_channels

        features.append(
            CNNBlock(in_channels, last_channels, kernel_size=1, stride=1, padding=0)
        )

        return nn.Sequential(*features)

    def forward(self, x):
        x = self.pool(self.features(x))
        return self.classifier(x.view(x.shape[0], -1))



In [44]:
# model = VGG_net(3, len(classes) , "VGG13").to(device)     #!All models out of race 

model = GoogLeNet(aux_logits=False, num_classes=len(classes)).to(device)  #* Without logits works best

# model = GoogLeNet(aux_logits=True, num_classes=len(classes)).to(device)  #* Without logits works best

# model = EfficientNet(version= "b0" ,num_classes=len(classes)).to(device)   #* Useless Model

# model = ResNet101(img_channel=3, num_classes=len(classes)).to(device) #!All models out of race


### Observations

For GoogLeNet 
- batch size of 32 and 20 epochs 
  - the training accuracy is about 0.98. It gives tennis courts and basket ball courts as Runway. 117 --> Crosswalk, 199--> Runway.

- Batch size 40 out of memory

- Batch size 36, epochs 20 
  - training accuracy 0.99. Again gives tennis courts and basket ball courts as Runway. 117 --> basketball_court, 199--> Runway.
  - Better than batch size 32.

- Batch size 36, epochs 30
  - training accuracy 1. Gives tennis courts and basket ball courts correct. 117 --> Railway, 120--> railway 199--> Runway.
  - Maybe the model is overfitting.

- With more Data Augmentation
  - Batch size 32, epochs 30
   - Seems training did not complete
   - Wrong output for images with shadows.

  - added image crop, epochs 50
   - - Shadows issue seems to be resoloved.
   - - But Oilwells not classified properly.
   - - 117--> Tennis_court, 120 --> Tennis_court, 199 --> Basketball_court. # All are correct.
   - - Seems to me that random crop is not correct.
  
  - Removed image crop, epochs 50
   - - Oilwells not classified properly again.
   - - Seems crop is not the issue
   - - The issue lies with higher epochs. 
   - - 117--> Basketball_court, 120 --> Basketball_court, 199 --> Crosswalk. 

  - Epochs 40
  - - Didn't get so good results
  - - 117--> Basketball_court, 120 --> Tennis_court, 199 --> Crosswalk. # Cross walk is wrong.

  - CenterCrop instead of RandomCrop
  - - Not so good results
  - - Oilwells issue persists.
  - - 117--> Basketball_court, 120 --> Tennis_court, 199 --> Basketball_court. # All correct
  - - Seems training did not complete

  Observation: Increasing epochs is making oilwell detection more difficult.

For GoogleNet with Aux Logits True
- batch size of 36 and 30 epochs
  - Pretty bad model. Gives bridge as Basketball court. 117--> Basket ball court ,120--> Railway ,199--> Runway

- batch size of 32 and 20 epochs
  - Better than batch size 36 and 30 epochs. Good classification for basketball court and tennis court. 117 -->oilwell, 120 --> Railway, 199 --> Basketball court.

For EffecientNet 
- batch size of 32 and 20 epochs
  - out of memory

- batch size of 16 and 20 epochs
  - Didn't complete training.

- batch size of 16 and 30 epochs
  - Pretty bad model. Training accuracy is 1. Oil well not learnt at all. Lots of overpass and Crosswalk.
   Gives bridge as Basketball court. 117--> Overpass ,120--> Overpass ,199--> Crosswalk

In [45]:
#Optmizer and loss function
optimizer=Adam(model.parameters(),lr=0.001,weight_decay=0.0001)
loss_function=nn.CrossEntropyLoss()

In [46]:
#calculating the size of training and testing images
train_count=len(glob.glob(train_path+'/**/*.jpg'))
test_count=len(glob.glob(test_path+'/**/*.jpg'))

In [47]:
print(train_count,test_count)

500 100


In [48]:
#Model training and saving best model 

best_accuracy=0.0

for epoch in range(num_epochs):
    
    #Evaluation and training on training dataset
    model.train()
    train_accuracy=0.0
    train_loss=0.0
    
    for i, (images,labels) in enumerate(train_loader):
        if torch.cuda.is_available():
            images=Variable(images.cuda())
            labels=Variable(labels.cuda())
            
        optimizer.zero_grad()
        
        outputs=model(images)
        loss=loss_function(outputs,labels)
        loss.backward()
        optimizer.step()
        
        
        train_loss+= loss.cpu().data*images.size(0)
        _,prediction=torch.max(outputs.data,1)
        
        train_accuracy+=int(torch.sum(prediction==labels.data))
        
    train_accuracy=train_accuracy/train_count
    train_loss=train_loss/train_count
    
    
    # Evaluation on testing dataset
    model.eval()
    
    test_accuracy=0.0
    for i, (images,labels) in enumerate(test_loader):
        if torch.cuda.is_available():
            images=Variable(images.cuda())
            labels=Variable(labels.cuda())
            
        outputs=model(images)
        _,prediction=torch.max(outputs.data,1)
        test_accuracy+=int(torch.sum(prediction==labels.data))
    
    test_accuracy=test_accuracy/test_count
    
    
    print('Epoch: '+str(epoch+1)+' Train Loss: '+str(train_loss)+' Train Accuracy: '+str(train_accuracy)+' Test Accuracy: '+str(test_accuracy))
    
    #Save the best model
    if test_accuracy>=best_accuracy:
        torch.save(model.state_dict(),'best_checkpoint.model')
        best_accuracy=test_accuracy
    
       


Epoch: 1 Train Loss: tensor(1.9219) Train Accuracy: 0.3 Test Accuracy: 0.1
Epoch: 2 Train Loss: tensor(1.3685) Train Accuracy: 0.51 Test Accuracy: 0.23
Epoch: 3 Train Loss: tensor(1.1703) Train Accuracy: 0.59 Test Accuracy: 0.44
Epoch: 4 Train Loss: tensor(0.9829) Train Accuracy: 0.658 Test Accuracy: 0.59
Epoch: 5 Train Loss: tensor(1.0385) Train Accuracy: 0.658 Test Accuracy: 0.52
Epoch: 6 Train Loss: tensor(0.9253) Train Accuracy: 0.68 Test Accuracy: 0.34
Epoch: 7 Train Loss: tensor(0.8724) Train Accuracy: 0.702 Test Accuracy: 0.47
Epoch: 8 Train Loss: tensor(0.7218) Train Accuracy: 0.75 Test Accuracy: 0.6
Epoch: 9 Train Loss: tensor(0.6709) Train Accuracy: 0.748 Test Accuracy: 0.75
Epoch: 10 Train Loss: tensor(0.8583) Train Accuracy: 0.696 Test Accuracy: 0.49


## Inference


In [None]:
checkpoint = torch.load('best_checkpoint.model')
model.load_state_dict(checkpoint)
model.eval()

GoogLeNet(
  (conv1): conv_block(
    (relu): ReLU()
    (conv): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
    (batchnorm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool1): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (conv2): conv_block(
    (relu): ReLU()
    (conv): Conv2d(64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (batchnorm): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool2): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (inception3a): Inception_block(
    (branch1): conv_block(
      (relu): ReLU()
      (conv): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1))
      (batchnorm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (branch2): Sequential(
      (0): conv_block(
        (relu): ReLU()
        (conv): Conv2d(192, 96, kernel_size

In [None]:
#Transforms
transformer=transforms.Compose([
    transforms.Resize((size,size)),
    transforms.ToTensor(),  #0-255 to 0-1, numpy to tensors
    transforms.Normalize([0.5,0.5,0.5], # 0-1 to [-1,1] , formula (x-mean)/std
                        [0.5,0.5,0.5])
])

In [None]:
## Making predictions
from PIL import Image
from io import open


def prediction(path, transform):
    image=Image.open(path)
    image_tensor=transformer(image).float()
    image_tensor=image_tensor.unsqueeze_(0)
    image_tensor=image_tensor.to(device)
    output=model(image_tensor)
    _,prediction=torch.max(output.data,1)
    return classes[prediction[0]]

In [None]:
test_path = "Dataset\\test"
image_path=glob.glob(test_path+'/*.jpg')

In [None]:
predictions={}
for i,path in enumerate(image_path):
    predictions[path[len(test_path)+1:]]=prediction(path,transformer)


In [None]:
predictions

{'101.jpg': 'oil_well',
 '102.jpg': 'tennis_court',
 '103.jpg': 'crosswalk',
 '104.jpg': 'basketball_court',
 '105.jpg': 'basketball_court',
 '106.jpg': 'overpass',
 '107.jpg': 'tennis_court',
 '108.jpg': 'golf_course',
 '109.jpg': 'bridge',
 '110.jpg': 'bridge',
 '111.jpg': 'basketball_court',
 '112.jpg': 'basketball_court',
 '113.jpg': 'oil_well',
 '114.jpg': 'overpass',
 '115.jpg': 'runway',
 '116.jpg': 'swimming_pool',
 '117.jpg': 'basketball_court',
 '118.jpg': 'basketball_court',
 '119.jpg': 'runway',
 '120.jpg': 'tennis_court',
 '121.jpg': 'basketball_court',
 '122.jpg': 'bridge',
 '123.jpg': 'overpass',
 '124.jpg': 'golf_course',
 '125.jpg': 'crosswalk',
 '126.jpg': 'overpass',
 '127.jpg': 'basketball_court',
 '128.jpg': 'basketball_court',
 '129.jpg': 'basketball_court',
 '130.jpg': 'crosswalk',
 '131.jpg': 'golf_course',
 '132.jpg': 'basketball_court',
 '133.jpg': 'crosswalk',
 '134.jpg': 'runway',
 '135.jpg': 'bridge',
 '136.jpg': 'golf_course',
 '137.jpg': 'bridge',
 '138.j

In [None]:
label_dict = {
    "basketball_court": 1, 
    "bridge":2, 
    "crosswalk":3, 
    "golf_course":4, 
    "oil_well":5, 
    "overpass":6, 
    "railway":7, 
    "runway":8, 
    "swimming_pool":9, 
    "tennis_court":10
}

In [None]:
## Convert the predictions to labels from label_dict
predictions_labels={}
for key,value in predictions.items():
    predictions_labels[key]=label_dict[value]


In [None]:
predictions_labels

{'101.jpg': 5,
 '102.jpg': 10,
 '103.jpg': 3,
 '104.jpg': 1,
 '105.jpg': 1,
 '106.jpg': 6,
 '107.jpg': 10,
 '108.jpg': 4,
 '109.jpg': 2,
 '110.jpg': 2,
 '111.jpg': 1,
 '112.jpg': 1,
 '113.jpg': 5,
 '114.jpg': 6,
 '115.jpg': 8,
 '116.jpg': 9,
 '117.jpg': 1,
 '118.jpg': 1,
 '119.jpg': 8,
 '120.jpg': 10,
 '121.jpg': 1,
 '122.jpg': 2,
 '123.jpg': 6,
 '124.jpg': 4,
 '125.jpg': 3,
 '126.jpg': 6,
 '127.jpg': 1,
 '128.jpg': 1,
 '129.jpg': 1,
 '130.jpg': 3,
 '131.jpg': 4,
 '132.jpg': 1,
 '133.jpg': 3,
 '134.jpg': 8,
 '135.jpg': 2,
 '136.jpg': 4,
 '137.jpg': 2,
 '138.jpg': 2,
 '139.jpg': 6,
 '140.jpg': 4,
 '141.jpg': 10,
 '142.jpg': 10,
 '143.jpg': 1,
 '144.jpg': 4,
 '145.jpg': 1,
 '146.jpg': 4,
 '147.jpg': 10,
 '148.jpg': 5,
 '149.jpg': 1,
 '150.jpg': 1,
 '151.jpg': 8,
 '152.jpg': 8,
 '153.jpg': 1,
 '154.jpg': 6,
 '155.jpg': 1,
 '156.jpg': 4,
 '157.jpg': 10,
 '158.jpg': 3,
 '159.jpg': 7,
 '160.jpg': 2,
 '161.jpg': 10,
 '162.jpg': 6,
 '163.jpg': 4,
 '164.jpg': 2,
 '165.jpg': 1,
 '166.jpg': 4,
 '

## Converting the predictions to CSV format

In [None]:
# Convert predictions to dataframe
import pandas as pd

# Make a dataframe with the predictions with column names as ImageID and LabelID
predictions_df = pd.DataFrame.from_dict(predictions_labels, orient='index')

# Removing the .jpg from the file names
predictions_df.index=predictions_df.index.str.replace('.jpg','')

# Naming the columns as ImageID and Label
predictions_df.reset_index(level=0, inplace=True)
predictions_df.columns = ['ImageID', 'LabelID']

# Removing the index from the dataframe
predictions_df.reset_index(drop= True, inplace=True)

  


In [None]:
predictions_df

Unnamed: 0,ImageID,LabelID
0,101,5
1,102,10
2,103,3
3,104,1
4,105,1
...,...,...
95,196,7
96,197,1
97,198,4
98,199,10


In [None]:
## Saving the predictions to csv
predictions_df.to_csv('18D070067.csv',index=False)   #18D070067.csv is the name of the csv file and the index have been dropped

# Hence, the implementation of the model is complete and the predictions are converted to CSV format.