In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
import torch.optim as optim
import skimage.io as io
from torchvision import datasets
from skimage.transform import rotate, AffineTransform, warp
from skimage.util import random_noise
from skimage.filters import gaussian
from torchvision import models
from torchsummary import summary

In [2]:
#Training model
class Network(nn.Module):
    def __init__(self):
        super().__init__()
        
        # (1) conv.layer 1
        self.conv1 = nn.Conv2d(in_channels = 3, out_channels = 6, kernel_size = 5)
        self.conv1_bn = nn.BatchNorm2d(6)
        # (2) conv.layer 2
        self.conv2 = nn.Conv2d(in_channels = 6, out_channels = 8, kernel_size = 5)
        self.conv2_bn = nn.BatchNorm2d(8)
        # (3) conv.layer 3
        self.conv3 = nn.Conv2d(in_channels = 8, out_channels = 11, kernel_size = 3)
        self.conv3_bn = nn.BatchNorm2d(11)
        # (4) conv.layer 4
        self.conv4 = nn.Conv2d(in_channels = 11, out_channels = 13, kernel_size = 3)
        self.conv4_bn = nn.BatchNorm2d(13)
        # (5) conv.layer 5
        self.conv5 = nn.Conv2d(in_channels = 13, out_channels = 15, kernel_size = 3)
        self.conv5_bn = nn.BatchNorm2d(15)
        
        # (1) fully connected layer 1
        self.fc1 = nn.Linear(in_features=15*25*25, out_features=3000)
        self.fc1_bn = nn.BatchNorm1d(3000)
        # (2) fully connected layer 2
        self.fc2 = nn.Linear(in_features=3000, out_features=500)
        self.fc2_bn = nn.BatchNorm1d(500)
        
        # output layer
        self.out = nn.Linear(in_features=500, out_features=7)

    def forward(self, t):
        
        #conv. layers
        t = self.conv1(t)
        t = self.conv1_bn(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        
        t = self.conv2(t)
        t = self.conv2_bn(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        
        t = self.conv3(t)
        t = self.conv3_bn(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        
        #conv4 and conv5 layer do not have maxpool to improve accuracy
        t = self.conv4(t)
        t = self.conv4_bn(t)
        t = F.relu(t)
                
        t = self.conv5(t)
        t = self.conv5_bn(t)
        t = F.relu(t)
                
        #linear layers
        t = t.reshape(-1, 15 *25 * 25)
        t = self.fc1(t)
        t = self.fc1_bn(t)
        t = F.relu(t)
       
        t = self.fc2(t)
        t = self.fc2_bn(t)
        t = F.relu(t)
        
        #output layer
        t = self.out(t)
        t = F.softmax(t, dim=1)
        return t

In [3]:
#loading dataset of images
train_set=datasets.ImageFolder(root=r'C:\Users\Rushikesh J. Metkar\Desktop\Acads\GNR638\train\train', transform=transforms.Compose([
        transforms.ToTensor()
    ]), target_transform=None, is_valid_file=None)

In [4]:
network = Network().cuda()
train_loader = torch.utils.data.DataLoader(train_set, batch_size=5, shuffle=True)


In [5]:
#Augmentation of dataset

lists=[]
for i in range(560):
    temp=[train_set[i][0],train_set[i][1]]
    lists.append(temp)
    for j in range(1,12):
        #rotating each image in the dataset by an angle which is mutiple of 30degree
        images=rotate(np.transpose(temp[0].numpy(), (1, 2, 0)) , angle = 30*j, mode = 'wrap')
        images=torch.tensor(np.transpose(images,(2 , 0 , 1)))
        lists.append([images,temp[1]])
    
    #shifting each image in the dataset by (25px,25px)
    transform = AffineTransform(translation=(25,25))
    wrapShift1 = warp(np.transpose(temp[0].numpy(), (1, 2, 0)) , transform , mode='wrap')
    wrapShift1 = torch.tensor(np.transpose(wrapShift1,(2 , 0 , 1)))
    lists.append([wrapShift1,temp[1]])
    
    #shifting each image in the dataset by (-25px,-25px)
    transform = AffineTransform(translation=(-25,-25))
    wrapShift2 = warp(np.transpose(temp[0].numpy(), (1, 2, 0)) , transform , mode='wrap')
    wrapShift2 = torch.tensor(np.transpose(wrapShift2 , (2 , 0 , 1)))
    lists.append([wrapShift2,temp[1]])
    
    # I've not used blurring of images as it blurrs the markings of the basketball and tennis courts and the model doesn't
    # distinguish between the two 
    # Also since most of the images are symmetrical, did not flip the images for augmentation  

In [6]:
#Training_dataset

Training_dataset = torch.utils.data.DataLoader(lists, batch_size = 10, shuffle = True)

In [7]:
#Training the model

optimizer = optim.SGD(network.parameters(), lr=0.008, momentum = 0.5)
for epoch in range(50): #50 no. of epochs
    total_loss=0
    total_preds = 0
    for batch in Training_dataset:
        images, labels = batch
        optimizer.zero_grad()                                                  # assining the gradients value 0
        preds = network(images.cuda())                                         # Pass Batch
        loss = F.cross_entropy(preds, labels.cuda())                           # Calculate Loss
        loss.backward()                                                        # Calculate Gradients
        optimizer.step()                                                       # Update Weights
        total_loss += loss.item()                                              # Calculating total loss 
        total_preds += preds.argmax(dim=1).eq(labels.cuda()).sum().item()      # Calculating total no. of correct preditions   
    print(epoch)
    print(total_loss)

0
1160.432070851326
1
1058.5934487581253
2
1023.8758972883224
3
1004.796567440033
4
993.2620408535004
5
985.7865858078003
6
978.0725890398026
7
970.0047948360443
8
966.1873159408569
9
956.9965070486069
10
955.9949917793274
11
950.3593146800995
12
947.8977760076523
13
941.7905030250549
14
935.4961730241776
15
935.6714873313904
16
934.9468305110931
17
935.1481719017029
18
934.3316670656204
19
930.4808365106583
20
930.6399060487747
21
929.304603934288
22
927.9039134979248
23
925.7532404661179
24
924.167882680893
25
923.7262268066406
26
924.4387308359146
27
921.0095748901367
28
920.0617240667343
29
921.4118163585663
30
920.178387761116
31
919.5169363021851
32
920.5306876897812
33
920.6465982198715
34
919.0524392127991
35
919.2364993095398
36
919.1665225028992
37
919.5062716007233
38
918.6611522436142
39
918.7990086078644
40
918.0694148540497
41
917.2836139202118
42
917.2286076545715
43
917.466367483139
44
917.729948759079
45
917.7942723035812
46
916.0277420282364
47
916.56312084198
48
916.

In [8]:
#Testing dataset

test_set = datasets.ImageFolder(root=r'C:\Users\Rushikesh J. Metkar\Desktop\Acads\GNR638\test_set1', transform=transforms.Compose([
        transforms.ToTensor()
    ]), target_transform=None, is_valid_file=None)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=5, shuffle=False)

In [9]:
#calculating predictions with batch_size = 5

for i in test_loader:
    my_image,label = i
    prediction = network(my_image.cuda())
    print(prediction.argmax(dim = 1))

tensor([6, 4, 3, 2, 3], device='cuda:0')
tensor([6, 1, 4, 0, 5], device='cuda:0')
tensor([3, 1, 6, 3, 3], device='cuda:0')
tensor([6, 1, 5, 4, 3], device='cuda:0')
tensor([3, 0, 4, 2, 4], device='cuda:0')
tensor([1, 3, 3, 5, 2], device='cuda:0')
tensor([3, 3, 6, 5, 0], device='cuda:0')
tensor([1, 4, 3, 0, 0], device='cuda:0')
tensor([3, 6, 3, 0, 3], device='cuda:0')
tensor([3, 5, 6, 2, 5], device='cuda:0')
tensor([5, 0, 2, 3, 1], device='cuda:0')
tensor([2, 4, 3, 3, 6], device='cuda:0')
tensor([3, 0, 2, 1, 5], device='cuda:0')
tensor([3, 1, 0, 6, 2], device='cuda:0')
tensor([3, 4, 3, 0, 2], device='cuda:0')
tensor([3, 1, 5, 2, 0], device='cuda:0')
tensor([6, 1, 1, 4, 5], device='cuda:0')
tensor([6, 1, 4, 2, 3], device='cuda:0')
tensor([4, 3, 5, 0, 3], device='cuda:0')


In [10]:
#labels of the test_set to check accuracy

label_test_set = [6,3,3,2,3,6,1,3,3,5,3,1,6,3,3,6,1,5,4,3,3,3,4,3,4,3,3,3,5,2,1,3,6,5,3,0,3,3,0,3,3,3,1,4,3,1,5,0,2,5,5,4,2,3,0,2,6,3,3,6,3,4,2,1,3,3,3,0,6,2,3,4,3,6,2,3,1,5,2,3,0,1,1,4,5,6,3,4,2,3,4,3,5,0,3]

In [11]:
label_test_set = torch.tensor(np.array(label_test_set))

#Concatenating the preditions and counting the total no of correct predictions

result = torch.tensor([])       #
for batch in test_loader:
    image1,label1 = batch
    prediction = network(image1.cuda())
    result = torch.cat((result.cuda(), prediction.argmax(dim=1)))
    
print(torch.eq(label_test_set.cuda(), result).sum())

tensor(69, device='cuda:0')


In [12]:
final_result = np.zeros(95)
for i in range(95):
    if result[i] == 3:
        final_result[i] = 7
    if result[i] == 2:
        final_result[i] = 3
    if result[i] == 1:
        final_result[i] = 2
    if result[i] == 0:
        final_result[i] = 1
    if result[i] == 4:
        final_result[i] = 4
    if result[i] == 5:
        final_result[i] = 6
    elif result[i] == 6:
        final_result[i] = 5

In [20]:
for i in range(95):
    if label_test_set[i] == 3:
        label_test_set[i] = 7
    if label_test_set[i] == 2:
        label_test_set[i] = 3
    if label_test_set[i] == 1:
        label_test_set[i] = 2
    if label_test_set[i] == 0:
        label_test_set[i] = 1
    if label_test_set[i] == 4:
        label_test_set[i] = 4
    if label_test_set[i] == 5:
        label_test_set[i] = 6
    elif label_test_set[i] == 6:
        label_test_set[i] = 5

In [14]:
#converting to csv file

import pandas as pd
final_result = final_result.astype(int)
a = {'ImageID': range(1001, 1096), 'Label': final_result}
b = pd.DataFrame(a)
print(b)

b.to_csv('19D070034_gnr2.csv', index = False)


    ImageID  Label
0      1001      5
1      1002      7
2      1003      7
3      1004      3
4      1005      7
..      ...    ...
90     1091      4
91     1092      7
92     1093      6
93     1094      1
94     1095      1

[95 rows x 2 columns]


In [7]:
print(network)

Note: you may need to restart the kernel to use updated packages.


'C:\Users\Rushikesh' is not recognized as an internal or external command,
operable program or batch file.


In [21]:
summary(network, (3,256,256))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [-1, 6, 252, 252]             456
       BatchNorm2d-2          [-1, 6, 252, 252]              12
            Conv2d-3          [-1, 8, 124, 124]             440
       BatchNorm2d-4          [-1, 8, 124, 124]              16
            Conv2d-5           [-1, 11, 60, 60]             803
       BatchNorm2d-6           [-1, 11, 60, 60]              22
            Conv2d-7           [-1, 13, 28, 28]           1,300
       BatchNorm2d-8           [-1, 13, 28, 28]              26
            Conv2d-9           [-1, 15, 26, 26]           1,770
      BatchNorm2d-10           [-1, 15, 26, 26]              30
           Linear-11                 [-1, 3000]      30,423,000
      BatchNorm1d-12                 [-1, 3000]           6,000
           Linear-13                  [-1, 500]       1,500,500
      BatchNorm1d-14                  [