In [None]:
import numpy as np
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, transforms
import torch.nn as nn
import torch.nn.functional as F
import itertools
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
from sklearn.metrics import roc_auc_score

#### The transforms are to resize the image to the size 224x224 because, the architecture implemented here is the original ResNet18 as described in the research paper whereby the first convolution kernel size is 7x7, the stride is 2 and the padding is 3. So if we use this kernel on our original 64x64 image, then it would not be as effective. Hence the resizing of the image is performed. 

#### The next transform is to perform a Random horizontal flip with a probability of 0.3 for more robust detection.

In [2]:
transform = transforms.Compose([torchvision.transforms.Resize(size = (224,224)),
                               torchvision.transforms.RandomHorizontalFlip(p=0.3),
                               torchvision.transforms.ToTensor()])
transform_val = transforms.Compose([torchvision.transforms.Resize(size = (224,224)),
                                   torchvision.transforms.ToTensor()])

In [3]:
train_data = datasets.ImageFolder('classification_data/train_data/',transform=transform)
train_loader = DataLoader(train_data,batch_size=256,shuffle=True,num_workers=3)

In [4]:
val_data = datasets.ImageFolder('classification_data/val_data/',transform=transform_val)
val_loader = DataLoader(val_data,batch_size=256,num_workers=3,shuffle = False)


In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [6]:
num_epochs = 15
learning_rate = 0.15


#### The architecture below is ResNet18 architecture which uses the basic blocks in the format [2,2,2,2].

![alt text](ResNet18.png "Fig1")

###### Reference: https://i.imgur.com/XwcnU5x.png
The image sizes referred here is to be ignored as it is not pertaining to our problem

In [None]:
class basic_block(nn.Module):
    def __init__(self,in_channels,out_channels, stride = 1, dimension_change = None):
        super(basic_block,self).__init__()
        
        self.conv1 = nn.Conv2d(in_channels,out_channels, kernel_size=3, stride = stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_channels,out_channels, kernel_size=3, stride = 1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        
        ''''dimension change is an indicator of the fact that either a step size greater than 1 has occured
            or the number of output channels have changed with respect to the previous layer'''
        
        self.dimension_change = dimension_change
        
        self.stride = stride
        
    def forward(self,x):
        # Residue is the value that the skip connection would add. 
        op = self.relu(self.bn1(self.conv1(x)))
        op = self.bn2(self.conv2(op))
        #self.dimension change is a 1x1 convolution to take into account the effect of dimension change
        if(self.dimension_change is not None):
            residue = self.dimension_change(x)
        
            op+=residue
        else:
            op+=x
        op = self.relu(op)
        
        return op


class ResNet(nn.Module):
    def __init__(self,block,num_classes = 4000):
        super(ResNet,self).__init__()
        
        self.conv1 = nn.Conv2d(3,64,kernel_size=7, stride=2, padding=3,bias = False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.drop = nn.Dropout2d(0.3)
        
        block_seq = [2,2,2,2]
        channel_size = [64,128,256,512]
        strides = [1,2,2,2]
        sections = []
        self.in_channels = channel_size[0]
        
         
        for i,block_val in enumerate(block_seq):
            layers = []
            if strides[i] > 1 or channel_size[i]!=self.in_channels:
                dimension_change = nn.Sequential(nn.Conv2d(self.in_channels, channel_size[i], kernel_size= 1,
                                                           stride = strides[i], bias = False),
                                             nn.BatchNorm2d(channel_size[i]))
            else:
                dimension_change = None
            layers.append(block(self.in_channels,channel_size[i],strides[i],dimension_change))
            self.in_channels = channel_size[i]
            for j in range(block_val-1):
                layers.append(block(self.in_channels,channel_size[i]))
            sections.append(layers)
        # These sections represent the skip connections consisting of two basic blocks each.
        # Look at the layers indicated in the figure to visualize.
        self.section1 = nn.Sequential(*sections[0])
        self.section2 = nn.Sequential(*sections[1])
        self.section3 = nn.Sequential(*sections[2])
        self.section4 = nn.Sequential(*sections[3])
        self.avgPool = nn.AdaptiveAvgPool2d((1,1))
        self.fc1 = nn.Linear(512,num_classes)
        
        
    
    def forward(self,x):
        
        x = self.relu(self.bn1(self.conv1(x)))
        x = self.maxpool(x)
        
        x = self.section1(x)
        
        # Dropout is implemented as a regularization.
        x = self.drop(x)
        
        x = self.section2(x)
        
       
        x = self.section3(x)
        
        
        x = self.section4(x)
        
        
        x = self.avgPool(x)
        
        x = torch.flatten(x, 1)
        
        x = self.fc1(x)
        
        return x


In [38]:
model = ResNet(basic_block).to(device)


#### Hyperparameters:
Loss- Cross Entropy Loss <br>
Optimizer- SGD with Momentum as 0.9 <br>
Scheduler- StepLR with stepsize 1 and gamma (Multiplicative factor of learning rate decay) as 0.85 <br>

In [9]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate, momentum=0.9, weight_decay=5e-5)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size = 1, gamma = 0.85) 

In [10]:
for epoch in range(num_epochs):
    model.train()
    correct = 0
    total = 0
    train_loss = 0
    print("EPOCH: ",epoch)
    for idx, (img,target) in enumerate(train_loader):
        img = img.to(device = device)
        
        target = target.to(device = device)
        optimizer.zero_grad()
        
        out = model(img)
        
        
        loss = criterion(out,target)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
        total = total + len(target)
        correct += (torch.argmax(out,dim = 1)==target).float().sum()
        if(idx%100==0):
            print(correct,total,"<<<<<<<<<<<,")
            acc = (correct/total)*100
            print(idx,"--->""Accuracy:",acc)
        
    model.eval()
    valid_loss = 0
    correct_v = 0
    total_v = 0
    with torch.no_grad():
        for i,(data,target) in enumerate(val_loader):

            data = data.to(device)
            target = target.to(device)
            

            output = model(data.float())

            loss = criterion(output,target)

            valid_loss += loss.item()

            total_v = total_v + len(target)

            correct_v += (torch.argmax(output,dim = 1)==target).float().sum()

            if(i%10==0):
                acc = (correct_v/total_v)*100
                print("Validation Accuracy:",acc)  
                print(i,"<-------")
    train_loss /= len(train_loader)
    valid_loss /= len(val_loader)
    print('Validation loss = ',valid_loss)
    print('Training loss = ',train_loss)
    file = "/home/ubuntu/hw2p2/ckpt_ResNet18/epoch_ckpt{0}.pth".format(epoch+1)
    torch.save(model.state_dict(),file)

    scheduler.step()    

EPOCH:  0
tensor(0., device='cuda:0') 256 <<<<<<<<<<<,
0 --->Accuracy: tensor(0., device='cuda:0')
tensor(3., device='cuda:0') 25856 <<<<<<<<<<<,
100 --->Accuracy: tensor(0.0116, device='cuda:0')
tensor(22., device='cuda:0') 51456 <<<<<<<<<<<,
200 --->Accuracy: tensor(0.0428, device='cuda:0')
tensor(54., device='cuda:0') 77056 <<<<<<<<<<<,
300 --->Accuracy: tensor(0.0701, device='cuda:0')
tensor(88., device='cuda:0') 102656 <<<<<<<<<<<,
400 --->Accuracy: tensor(0.0857, device='cuda:0')
tensor(140., device='cuda:0') 128256 <<<<<<<<<<<,
500 --->Accuracy: tensor(0.1092, device='cuda:0')
tensor(205., device='cuda:0') 153856 <<<<<<<<<<<,
600 --->Accuracy: tensor(0.1332, device='cuda:0')
tensor(309., device='cuda:0') 179456 <<<<<<<<<<<,
700 --->Accuracy: tensor(0.1722, device='cuda:0')
tensor(429., device='cuda:0') 205056 <<<<<<<<<<<,
800 --->Accuracy: tensor(0.2092, device='cuda:0')
tensor(611., device='cuda:0') 230656 <<<<<<<<<<<,
900 --->Accuracy: tensor(0.2649, device='cuda:0')
tensor(84

KeyboardInterrupt: 

In [12]:
state_dict = torch.load('/home/ubuntu/hw2p2/ckpt_ResNet18/epoch_ckpt13.pth')

In [13]:
model.load_state_dict(state_dict)

<All keys matched successfully>

#### Getting the embeddings.
This section helps us get the features learnt by the penultimate layer of the model.

In [12]:
model_cpy_list = list(model.children())[:-1]
model_cpy = nn.Sequential(*model_cpy_list).to(device=device)


#### Dataloader designed for getting the images required to be compared.

In [26]:
class get_imgs(Dataset):
    #Dataloader for the data
   def __init__(self,txt):
    self.s = txt.split('_')
    txt = open(txt)
    self.im1 = []
    self.im2 = []
    self.transforms = transforms.Compose([torchvision.transforms.Resize(size = (224,224)),
                                   torchvision.transforms.ToTensor()])
    if(self.s[2]=='val.txt'):
        self.label = []
        for i in txt:
            img1,img2,label = i.split(" ")
            self.im1.append(img1)
            self.im2.append(img2)
            self.label.append(label.replace('\n',''))
    else:
        for i in txt:
            i = i.replace('\n','')
            img1,img2 = i.split(" ")
            self.im1.append(img1)
            self.im2.append(img2)
        
    
   def __len__(self):
    #return length of any of the image lists
    return len(self.im2)
        
   def __getitem__(self,index):
        #convert into pil
        image1 = Image.open(self.im1[index])
        image2 = Image.open(self.im2[index])
        if(self.s[2] =='val.txt'):
            label = self.label[index]


            return self.transforms(image1), self.transforms(image2), label
        else:

            return self.transforms(image1), self.transforms(image2), self.im1[index], self.im2[index]
          

In [20]:
images = get_imgs('verification_pairs_val.txt')
images_loader = DataLoader(images,batch_size=128,num_workers=3)

#### This step helps us get the cosine similarities as the metric of comparison between the two images.

In [21]:
similarities = []
l = []
model_cpy.eval()
with torch.no_grad():
    for i, (img1,img2,label) in enumerate(images_loader):
        i1 = img1.to(device = device)
        i2 = img2.to(device = device)
        sim1 = model_cpy(i1)
        sim2 = model_cpy(i2)
        l.append(label)

        cos = nn.CosineSimilarity()
        cossim = cos(sim1,sim2)
       
        similarities.append([x.item() for x in cossim])

In [22]:
from itertools import chain 
label_flat = list(chain.from_iterable(l))
similarity_flat = list(chain.from_iterable(similarities))

In [23]:
roc_auc_score(label_flat, similarity_flat)

0.932955376567472

In [27]:
test_images = get_imgs('verification_pairs_test.txt')
test_images_loader = DataLoader(test_images,batch_size=128,shuffle=False, num_workers=3)

In [28]:
tst_similarities = []
image1_name = []
image2_name = []
model_cpy.eval()
with torch.no_grad():
    for (img1, img2, im_name1, im_name2) in test_images_loader:
        im1 = img1.to(device = device)
        im2 = img2.to(device = device)
        sim1 = model_cpy(im1)
        sim2 = model_cpy(im2)
        image1_name.append(im_name1)
        image2_name.append(im_name2)
        cos = nn.CosineSimilarity()
        tst_similarities.append(cos(sim1,sim2))
        


In [29]:
image1_name_flat = list(chain.from_iterable(image1_name))
image2_name_flat = list(chain.from_iterable(image2_name))
test_sim_flat = list(chain.from_iterable(tst_similarities))

In [30]:
sim_values = []
for val in test_sim_flat:
    sim_values.append(val.item())

In [31]:
image_names = res = [i + " " + j for i, j in zip(image1_name_flat, image2_name_flat)]

In [33]:
from pandas import DataFrame
df = DataFrame (image_names,columns=['Id'])


In [34]:
df['Category'] = sim_values

In [35]:
df

Unnamed: 0,Id,Category
0,verification_data/00020839.jpg verification_da...,0.768152
1,verification_data/00002921.jpg verification_da...,0.387351
2,verification_data/00011732.jpg verification_da...,0.361730
3,verification_data/00052778.jpg verification_da...,0.547782
4,verification_data/00053948.jpg verification_da...,0.692625
...,...,...
51830,verification_data/00041961.jpg verification_da...,0.432748
51831,verification_data/00060107.jpg verification_da...,0.283507
51832,verification_data/00003205.jpg verification_da...,0.601663
51833,verification_data/00068054.jpg verification_da...,0.653149


In [36]:
df.to_csv('please_workResNet18.csv',index = False)

In [37]:
!kaggle competitions submit -c 11785-hw2p2-slack-kaggle -f please_workResNet18.csv -m "Resnet18ckpt12"

100%|██████████████████████████████████████| 4.01M/4.01M [00:01<00:00, 2.88MB/s]
Successfully submitted to 11785-HW2p2-slack-kaggle

6

1
