In [1]:
#Load libraries
import os
import numpy as np
import torch
import glob
import torch.nn as nn
from torchvision.transforms import transforms
from torch.utils.data import DataLoader
from torch.optim import Adam
from torch.autograd import Variable
import torchvision
import pathlib

In [2]:
#checking for device
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
print(device)

cuda


In [4]:
## Hyperparameters
batch_size_ = 4
num_epochs = 10
size = 256

In [5]:
#Transforms
transformer=transforms.Compose([
    transforms.Resize((size,size)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),  #0-255 to 0-1, numpy to tensors
    transforms.Normalize([0.5,0.5,0.5], # 0-1 to [-1,1] , formula (x-mean)/std
                        [0.5,0.5,0.5])
])

In [6]:
#Dataloader

#Path for training and testing directory
train_path='Dataset\\train'
test_path='Dataset\\val'

train_loader=DataLoader(
    torchvision.datasets.ImageFolder(train_path,transform=transformer),
    batch_size=batch_size_, shuffle=True
)
test_loader=DataLoader(
    torchvision.datasets.ImageFolder(test_path,transform=transformer),
    batch_size=batch_size_, shuffle=True
)

In [7]:
#categories
root=pathlib.Path(train_path)
classes=sorted([j.name.split('/')[-1] for j in root.iterdir()])

In [8]:
print(classes)

['basketball_court', 'bridge', 'crosswalk', 'golf_course', 'oil_well', 'overpass', 'railway', 'runway', 'swimming_pool', 'tennis_court']


In [9]:
#CNN Network


class ConvNet(nn.Module):
    def __init__(self,num_classes):
        super(ConvNet,self).__init__()
        
        #Output size after convolution filter
        #((w-f+2P)/s) +1
        
        #Input shape= (256,3,150,150)
        
        self.conv1=nn.Conv2d(in_channels=3,out_channels=12,kernel_size=3,stride=1,padding=1)
        self.bn1=nn.BatchNorm2d(num_features=12)
        self.relu1=nn.ReLU()
        
        self.pool=nn.MaxPool2d(kernel_size=2)
        
        self.conv2=nn.Conv2d(in_channels=12,out_channels=20,kernel_size=3,stride=1,padding=1)
        self.relu2=nn.ReLU()
        
        
        
        self.conv3=nn.Conv2d(in_channels=20,out_channels=32,kernel_size=3,stride=1,padding=1)
        self.bn3=nn.BatchNorm2d(num_features=32)
        self.relu3=nn.ReLU()
        #Shape= (batch_size,32,128,128)
        
        
        self.fc=nn.Linear(in_features=128*128*32,out_features=num_classes)
        
        
        
        #Feed forwad function
        
    def forward(self,input):
        output=self.conv1(input)
        output=self.bn1(output)
        output=self.relu1(output)
            
        output=self.pool(output)
            
        output=self.conv2(output)
        output=self.relu2(output)
            
        output=self.conv3(output)
        output=self.bn3(output)
        output=self.relu3(output)
            
            
        #Above output will be in matrix form, with shape (256,32,128,128)
        
        output=output.view(-1,128*128*32)
            
            
        output=self.fc(output)
            
        return output
            
        


In [10]:
## VGG Network

VGG_types = {
    "VGGmod": [32, "M", 64, "M", 128, 128, "M", 256, 256, "M", 256, 256, "M"],   
    "VGG11": [64, "M", 128, "M", 256, 256, "M", 512, 512, "M", 512, 512, "M"],
    "VGG13": [64, 64, "M", 128, 128, "M", 256, 256, "M", 512, 512, "M", 512, 512, "M"],
    "VGG16": [64,64,"M",128,128,"M",256,256,256,"M",512,512,512,"M",512,512,512,"M"],
    "VGG19": [64,64,"M",128,128,"M",256,256,256,256,"M",512,512,512,512,"M",512,512,512,512,"M"],
}


class VGG_net(nn.Module):
    def __init__(self, in_channels, num_classes, type="VGG16"):
        super(VGG_net, self).__init__()
        self.in_channels = in_channels
        self.conv_layers = self.create_conv_layers(VGG_types[type])

        self.fcs = nn.Sequential(
            nn.Linear(256 * 7 * 7, 4096),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            # nn.Linear(4096, 4096),
            # nn.ReLU(),
            # nn.Dropout(p=0.5),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = x.reshape(x.shape[0], -1)
        x = self.fcs(x)
        return x

    def create_conv_layers(self, architecture):
        layers = []
        in_channels = self.in_channels

        for x in architecture:
            if type(x) == int:
                out_channels = x

                layers += [
                    nn.Conv2d(
                        in_channels=in_channels,
                        out_channels=out_channels,
                        kernel_size=(3, 3),
                        stride=(1, 1),
                        padding=(1, 1),
                    ),
                    nn.BatchNorm2d(x),
                    nn.ReLU(),
                ]
                in_channels = x
            elif x == "M":
                layers += [nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))]

        return nn.Sequential(*layers)

In [11]:
## AlexNet implementation

class AlexNet(nn.Module):
    """
    Neural network model consisting of layers propsed by AlexNet paper.
    """
    def __init__(self, num_classes):
        """
        Define and allocate layers for this neural net.
        Args:
            num_classes (int): number of classes to predict with this model
        """
        super().__init__()
        # input size should be : (b x 3 x 227 x 227)
        # The image in the original paper states that width and height are 224 pixels, but
        # the dimensions after first convolution layer do not lead to 55 x 55.
        self.net = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=96, kernel_size=11, stride=4),  # (b x 96 x 55 x 55)
            nn.ReLU(),
            nn.LocalResponseNorm(size=5, alpha=0.0001, beta=0.75, k=2),  # section 3.3
            nn.MaxPool2d(kernel_size=3, stride=2),  # (b x 96 x 27 x 27)
            nn.Conv2d(96, 256, 5, padding=2),  # (b x 256 x 27 x 27)
            nn.ReLU(),
            nn.LocalResponseNorm(size=5, alpha=0.0001, beta=0.75, k=2),
            nn.MaxPool2d(kernel_size=3, stride=2),  # (b x 256 x 13 x 13)
            nn.Conv2d(256, 384, 3, padding=1),  # (b x 384 x 13 x 13)
            nn.ReLU(),
            nn.Conv2d(384, 384, 3, padding=1),  # (b x 384 x 13 x 13)
            nn.ReLU(),
            nn.Conv2d(384, 256, 3, padding=1),  # (b x 256 x 13 x 13)
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),  # (b x 256 x 6 x 6)
        )
        # classifier is just a name for linear layers
        self.classifier = nn.Sequential(
            nn.Dropout(p=0.5, inplace=True),
            nn.Linear(in_features=(256 * 6 * 6), out_features=4096),
            nn.ReLU(),
            nn.Dropout(p=0.5, inplace=True),
            nn.Linear(in_features=4096, out_features=4096),
            nn.ReLU(),
            nn.Linear(in_features=4096, out_features=num_classes),
        )
        self.init_bias()  # initialize bias

    def init_bias(self):
        for layer in self.net:
            if isinstance(layer, nn.Conv2d):
                nn.init.normal_(layer.weight, mean=0, std=0.01)
                nn.init.constant_(layer.bias, 0)
        # original paper = 1 for Conv2d layers 2nd, 4th, and 5th conv layers
        nn.init.constant_(self.net[4].bias, 1)
        nn.init.constant_(self.net[10].bias, 1)
        nn.init.constant_(self.net[12].bias, 1)

    def forward(self, x):
        """
        Pass the input through the net.
        Args:
            x (Tensor): input tensor
        Returns:
            output (Tensor): output tensor
        """
        x = self.net(x)
        x = x.view(-1, 256 * 6 * 6)  # reduce the dimensions for linear layer input
        return self.classifier(x)

In [12]:
## Le Net 5 implementation
class LeNet(nn.Module):
    def __init__(self, num_classes):
        super(LeNet, self).__init__()
        self.relu = nn.ReLU()
        self.pool = nn.AvgPool2d(kernel_size=(2, 2), stride=(2, 2))
        self.conv1 = nn.Conv2d(
            in_channels=1,
            out_channels=6,
            kernel_size=(5, 5),
            stride=(1, 1),
            padding=(0, 0),
        )
        self.conv2 = nn.Conv2d(
            in_channels=6,
            out_channels=16,
            kernel_size=(5, 5),
            stride=(1, 1),
            padding=(0, 0),
        )
        self.conv3 = nn.Conv2d(
            in_channels=16,
            out_channels=120,
            kernel_size=(5, 5),
            stride=(1, 1),
            padding=(0, 0),
        )
        self.linear1 = nn.Linear(120, 84)
        self.linear2 = nn.Linear(84, num_classes)

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.pool(x)
        x = self.relu(self.conv2(x))
        x = self.pool(x)
        x = self.relu(
            self.conv3(x)
        )  # num_examples x 120 x 1 x 1 --> num_examples x 120
        x = x.reshape(x.shape[0], -1)
        x = self.relu(self.linear1(x))
        x = self.linear2(x)
        return x

In [13]:
model=ConvNet(num_classes=len(classes)).to(device)

# model = VGG_net(3, len(classes) , "VGGmod").to(device)

# model = MyVGG16(num_classes=len(classes)).to(device)

# model = AlexNet(num_classes=len(classes)).to(device)

# model = LeNet(num_classes=len(classes)).to(device)


In [14]:
#Optmizer and loss function
optimizer=Adam(model.parameters(),lr=0.001,weight_decay=0.0001)
loss_function=nn.CrossEntropyLoss()

In [15]:
#calculating the size of training and testing images
train_count=len(glob.glob(train_path+'/**/*.jpg'))
test_count=len(glob.glob(test_path+'/**/*.jpg'))

In [16]:
print(train_count,test_count)

500 100


In [17]:
#Model training and saving best model

best_accuracy=0.0

for epoch in range(num_epochs):
    
    #Evaluation and training on training dataset
    model.train()
    train_accuracy=0.0
    train_loss=0.0
    
    for i, (images,labels) in enumerate(train_loader):
        if torch.cuda.is_available():
            images=Variable(images.cuda())
            labels=Variable(labels.cuda())
            
        optimizer.zero_grad()
        
        outputs=model(images)
        loss=loss_function(outputs,labels)
        loss.backward()
        optimizer.step()
        
        
        train_loss+= loss.cpu().data*images.size(0)
        _,prediction=torch.max(outputs.data,1)
        
        train_accuracy+=int(torch.sum(prediction==labels.data))
        
    train_accuracy=train_accuracy/train_count
    train_loss=train_loss/train_count
    
    
    # Evaluation on testing dataset
    model.eval()
    
    test_accuracy=0.0
    for i, (images,labels) in enumerate(test_loader):
        if torch.cuda.is_available():
            images=Variable(images.cuda())
            labels=Variable(labels.cuda())
            
        outputs=model(images)
        _,prediction=torch.max(outputs.data,1)
        test_accuracy+=int(torch.sum(prediction==labels.data))
    
    test_accuracy=test_accuracy/test_count
    
    
    print('Epoch: '+str(epoch+1)+' Train Loss: '+str(train_loss)+' Train Accuracy: '+str(train_accuracy)+' Test Accuracy: '+str(test_accuracy))
    
    #Save the best model
    if test_accuracy>=best_accuracy:
        torch.save(model.state_dict(),'best_checkpoint.model')
        best_accuracy=test_accuracy
    
       


  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


Epoch: 1 Train Loss: tensor(62.6844) Train Accuracy: 0.444 Test Accuracy: 0.77
Epoch: 2 Train Loss: tensor(24.3858) Train Accuracy: 0.748 Test Accuracy: 0.78
Epoch: 3 Train Loss: tensor(12.0709) Train Accuracy: 0.85 Test Accuracy: 0.96
Epoch: 4 Train Loss: tensor(8.0377) Train Accuracy: 0.902 Test Accuracy: 0.89
Epoch: 5 Train Loss: tensor(21.2205) Train Accuracy: 0.82 Test Accuracy: 0.97
Epoch: 6 Train Loss: tensor(9.5564) Train Accuracy: 0.9 Test Accuracy: 0.85
Epoch: 7 Train Loss: tensor(2.4029) Train Accuracy: 0.952 Test Accuracy: 1.0
Epoch: 8 Train Loss: tensor(3.1653) Train Accuracy: 0.966 Test Accuracy: 0.99
Epoch: 9 Train Loss: tensor(3.2964) Train Accuracy: 0.962 Test Accuracy: 0.94
Epoch: 10 Train Loss: tensor(3.3660) Train Accuracy: 0.956 Test Accuracy: 0.96


## Inference


In [18]:
checkpoint = torch.load('best_checkpoint.model')
model.load_state_dict(checkpoint)
model.eval()

ConvNet(
  (conv1): Conv2d(3, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu1): ReLU()
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(12, 20, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu2): ReLU()
  (conv3): Conv2d(20, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu3): ReLU()
  (fc): Linear(in_features=524288, out_features=10, bias=True)
)

In [19]:
#Transforms
transformer=transforms.Compose([
    transforms.Resize((size,size)),
    transforms.ToTensor(),  #0-255 to 0-1, numpy to tensors
    transforms.Normalize([0.5,0.5,0.5], # 0-1 to [-1,1] , formula (x-mean)/std
                        [0.5,0.5,0.5])
])

In [20]:
## Making predictions
from PIL import Image
from io import open


def prediction(path, transform):
    image=Image.open(path)
    image_tensor=transformer(image).float()
    image_tensor=image_tensor.unsqueeze_(0)
    image_tensor=image_tensor.to(device)
    output=model(image_tensor)
    _,prediction=torch.max(output.data,1)
    return classes[prediction[0]]

In [21]:
test_path = "Dataset\\test"
image_path=glob.glob(test_path+'/*.jpg')

In [22]:
predictions={}
for i,path in enumerate(image_path):
    predictions[path[len(test_path)+1:]]=prediction(path,transformer)


In [23]:
predictions

{'101.jpg': 'oil_well',
 '102.jpg': 'swimming_pool',
 '103.jpg': 'crosswalk',
 '104.jpg': 'oil_well',
 '105.jpg': 'basketball_court',
 '106.jpg': 'basketball_court',
 '107.jpg': 'overpass',
 '108.jpg': 'golf_course',
 '109.jpg': 'basketball_court',
 '110.jpg': 'basketball_court',
 '111.jpg': 'basketball_court',
 '112.jpg': 'basketball_court',
 '113.jpg': 'oil_well',
 '114.jpg': 'overpass',
 '115.jpg': 'runway',
 '116.jpg': 'swimming_pool',
 '117.jpg': 'basketball_court',
 '118.jpg': 'oil_well',
 '119.jpg': 'runway',
 '120.jpg': 'crosswalk',
 '121.jpg': 'oil_well',
 '122.jpg': 'basketball_court',
 '123.jpg': 'overpass',
 '124.jpg': 'golf_course',
 '125.jpg': 'crosswalk',
 '126.jpg': 'overpass',
 '127.jpg': 'railway',
 '128.jpg': 'railway',
 '129.jpg': 'oil_well',
 '130.jpg': 'crosswalk',
 '131.jpg': 'overpass',
 '132.jpg': 'basketball_court',
 '133.jpg': 'crosswalk',
 '134.jpg': 'runway',
 '135.jpg': 'basketball_court',
 '136.jpg': 'golf_course',
 '137.jpg': 'basketball_court',
 '138.jp

In [24]:
label_dict = {
    "basketball_court": 1, 
    "bridge":2, 
    "crosswalk":3, 
    "golf_course":4, 
    "oil_well":5, 
    "overpass":6, 
    "railway":7, 
    "runway":8, 
    "swimming_pool":9, 
    "tennis_court":10
}

In [25]:
## Convert the predictions to labels from label_dict
predictions_labels={}
for key,value in predictions.items():
    predictions_labels[key]=label_dict[value]


In [26]:
predictions_labels

{'101.jpg': 5,
 '102.jpg': 9,
 '103.jpg': 3,
 '104.jpg': 5,
 '105.jpg': 1,
 '106.jpg': 1,
 '107.jpg': 6,
 '108.jpg': 4,
 '109.jpg': 1,
 '110.jpg': 1,
 '111.jpg': 1,
 '112.jpg': 1,
 '113.jpg': 5,
 '114.jpg': 6,
 '115.jpg': 8,
 '116.jpg': 9,
 '117.jpg': 1,
 '118.jpg': 5,
 '119.jpg': 8,
 '120.jpg': 3,
 '121.jpg': 5,
 '122.jpg': 1,
 '123.jpg': 6,
 '124.jpg': 4,
 '125.jpg': 3,
 '126.jpg': 6,
 '127.jpg': 7,
 '128.jpg': 7,
 '129.jpg': 5,
 '130.jpg': 3,
 '131.jpg': 6,
 '132.jpg': 1,
 '133.jpg': 3,
 '134.jpg': 8,
 '135.jpg': 1,
 '136.jpg': 4,
 '137.jpg': 1,
 '138.jpg': 2,
 '139.jpg': 6,
 '140.jpg': 9,
 '141.jpg': 1,
 '142.jpg': 8,
 '143.jpg': 1,
 '144.jpg': 4,
 '145.jpg': 7,
 '146.jpg': 6,
 '147.jpg': 8,
 '148.jpg': 5,
 '149.jpg': 1,
 '150.jpg': 6,
 '151.jpg': 1,
 '152.jpg': 8,
 '153.jpg': 1,
 '154.jpg': 6,
 '155.jpg': 5,
 '156.jpg': 4,
 '157.jpg': 9,
 '158.jpg': 3,
 '159.jpg': 1,
 '160.jpg': 1,
 '161.jpg': 8,
 '162.jpg': 6,
 '163.jpg': 1,
 '164.jpg': 1,
 '165.jpg': 1,
 '166.jpg': 4,
 '167.jpg'

## Converting the predictions to CSV format

In [27]:
# Convert predictions to dataframe
import pandas as pd

# Make a dataframe with the predictions with column names as ImageID and LabelID
predictions_df = pd.DataFrame.from_dict(predictions_labels, orient='index')

# Removing the .jpg from the file names
predictions_df.index=predictions_df.index.str.replace('.jpg','')

# Naming the columns as ImageID and Label
predictions_df.reset_index(level=0, inplace=True)
predictions_df.columns = ['ImageID', 'LabelID']

# Removing the index from the dataframe
predictions_df.reset_index(drop= True, inplace=True)

  


In [28]:
predictions_df

Unnamed: 0,ImageID,LabelID
0,101,5
1,102,9
2,103,3
3,104,5
4,105,1
...,...,...
95,196,7
96,197,5
97,198,6
98,199,8


In [29]:
## Saving the predictions to csv
predictions_df.to_csv('18D070067.csv',index=False)   #18D070067.csv is the name of the csv file and the index have been dropped

PermissionError: [Errno 13] Permission denied: '18D070067.csv'

# Hence, the implementation of the model is complete and the predictions are converted to CSV format.