Connecting to Colab and Importing libaries


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
from os import walk
import torch
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import os
from PIL import Image
import cv2
from torchsummary import summary
from torch.utils.data import Dataset, DataLoader, random_split
import pandas as pd
import numpy as np
import random
import pickle

Creating Dictionary with Keys as Classes

In [None]:
data_dir='/content/drive/My Drive/Colab Notebooks/WaDaBa/'
a_file = open(data_dir+"category.pkl", "rb")
categories = pickle.load(a_file)
categories.pop(5)
categories.keys()

dict_keys([1, 2, 6, 7])

Function that returns Images from the Dataset

In [None]:
class WadabaDataset(Dataset):
    def __init__(self, setSize, transform=None):
        self.transform = transform
        self.setSize = setSize
    def __len__(self):
        return self.setSize
    def __getitem__(self, idx):
        img1 = None
        img2 = None
        label = None
        if idx % 2 == 0: # select the same character for both images
            category = random.choice([k for k in categories.keys()])
            img1 = random.choice(categories[category])
            img2 = random.choice(categories[category])
            label = 0.0
        else: # select a different character for both images
            category1, category2 = random.choice([k for k in categories.keys()]), random.choice([k for k in categories.keys()])
            while category1 == category2:
              category1, category2 = random.choice([k for k in categories.keys()]), random.choice([k for k in categories.keys()])
            label = 1.0
            img1 = random.choice(categories[category1])
            img2 = random.choice(categories[category2])
        img1 = Image.open(data_dir + img1)
        img2 = Image.open(data_dir + img2)

        if self.transform:
            img1 = self.transform(img1)
            img2 = self.transform(img2)
        return img1, img2, torch.from_numpy(np.array([label], dtype=np.float32))


Function for N-way evaluation of the dataset

In [None]:
class NWayAll(Dataset):
    def __init__(self, setSize,transform=None):
        self.setSize = setSize
        self.transform = transform
    def __len__(self):
        return self.setSize
    def __getitem__(self, idx):
        # find one main image
        imgName = all_files[idx]
        index = int(imgName.rfind('_',3))
        category = int(imgName[index+2:index+4])
        mainImg = Image.open(data_dir + imgName)
        # print(imgDir + '/' + imgName)
        if self.transform:
            mainImg = self.transform(mainImg)
        
        # find n numbers of distinct images, 1 in the same set as the main
        testSet = []
        label = 0
        for i,j in enumerate([k for k in categories.keys()]):
            testImgName = ''
            if j == category:
              label = i
            testImgName = random.choice(categories[j])
            testImg = Image.open(data_dir + testImgName)
            if self.transform:
                testImg = self.transform(testImg)
            testSet.append(testImg)
        # plt.imshow()
        return category,mainImg, testSet, torch.from_numpy(np.array([label], dtype = int))

List containg image categories and their corresponding image locations

In [None]:
all_files = []
for k in categories.keys():
  all_files.append(categories[k])
all_files = [item for sublist in all_files for item in sublist]

Initializing Train and validation sets

In [None]:
# choose a training dataset size and further divide it into train and validation set 80:20
dataSize = 4000 # self-defined dataset size
TRAIN_PCT = 0.8 # percentage of entire dataset for training
train_size = int(dataSize * TRAIN_PCT)
val_size = dataSize - train_size

transformations = transforms.Compose([
        transforms.Resize((105,105)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

wadabadataset = WadabaDataset(dataSize, transformations)
train_set, val_set = random_split(wadabadataset, [train_size, val_size])
train_loader = torch.utils.data.DataLoader(train_set, batch_size=25, num_workers=16)
val_loader = torch.utils.data.DataLoader(val_set, batch_size=1, num_workers=16, shuffle=True)

Initializing Test Set

In [None]:
testSize = 200
test_set = NWayOneShotEvalSet(testSize,transformations)
test_loader = torch.utils.data.DataLoader(test_set, batch_size = 1, num_workers = 2, shuffle=True)

In [None]:
import torch.nn as nn
import torch.nn.functional as F

Creating Network

In [None]:
#Different network structures, the commented out are the different experimenting structures
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        # Koch et al.
        # Conv2d(input_channels, output_channels, kernel_size)
        self.conv1 = nn.Conv2d(3, 64, 10) 
        self.conv2 = nn.Conv2d(64, 128, 7)  
        self.conv3 = nn.Conv2d(128, 128, 4)
        self.conv4 = nn.Conv2d(128, 256, 4)
        self.bn1 = nn.BatchNorm2d(64)
        self.bn2 = nn.BatchNorm2d(128)
        self.bn3 = nn.BatchNorm2d(128)
        self.bn4 = nn.BatchNorm2d(256)
        self.dropout1 = nn.Dropout(0.1)
        self.dropout2 = nn.Dropout(0.5)
        self.fc1 = nn.Linear(256 * 6 * 6, 4096)
        self.fcOut = nn.Linear(4096, 1)
        self.sigmoid = nn.Sigmoid()
    
    def convs(self, x):

        # Koch et al.
        # out_dim = in_dim - kernel_size + 1  
        #1, 105, 105
        x = F.relu(self.bn1(self.conv1(x)))
        # 64, 96, 96
        x = F.max_pool2d(x, (2,2))
        # 64, 48, 48
        x = F.relu(self.bn2(self.conv2(x)))
        # 128, 42, 42
        x = F.max_pool2d(x, (2,2))
        # 128, 21, 21
        x = F.relu(self.bn3(self.conv3(x)))
        # 128, 18, 18
        x = F.max_pool2d(x, (2,2))
        # 128, 9, 9
        x = F.relu(self.bn4(self.conv4(x)))
        # 256, 6, 6
        return x

    def forward(self, x1, x2):
        x1 = self.convs(x1)

        # Koch et al.
        x1 = x1.view(-1, 256 * 6 * 6)
        x1 = self.sigmoid(self.fc1(x1))
        
        x2 = self.convs(x2)

        # Koch et al.
        x2 = x2.view(-1, 256 * 6 * 6)
        x2 = self.sigmoid(self.fc1(x2))

        x = torch.abs(x1 - x2)
        x = self.fcOut(x)
        return x

creating the network and couting the paramenters

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
siameseBaseLine = Net()
siameseBaseLine = siameseBaseLine.to(device)

def count_parameters(model):
    temp = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f'The model architecture:\n\n', model)
    print(f'\nThe model has {temp:,} trainable parameters')
    
count_parameters(siameseBaseLine)

The model architecture:

 Net(
  (conv1): Conv2d(3, 64, kernel_size=(10, 10), stride=(1, 1))
  (conv2): Conv2d(64, 128, kernel_size=(7, 7), stride=(1, 1))
  (conv3): Conv2d(128, 128, kernel_size=(4, 4), stride=(1, 1))
  (conv4): Conv2d(128, 256, kernel_size=(4, 4), stride=(1, 1))
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn4): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (dropout1): Dropout(p=0.1, inplace=False)
  (dropout2): Dropout(p=0.5, inplace=False)
  (fc1): Linear(in_features=9216, out_features=4096, bias=True)
  (fcOut): Linear(in_features=4096, out_features=1, bias=True)
  (sigmoid): Sigmoid()
)

The model has 38,965,697 trainable parameters


saving and loading checkpoint mechanisms

In [None]:
saving and loading checkpoint mechanisms
def save_checkpoint(save_path, model, optimizer, val_loss):
    if save_path==None:
        return
    save_path = save_path 
    state_dict = {'model_state_dict': model.state_dict(),
                  'optimizer_state_dict': optimizer.state_dict(),
                  'val_loss': val_loss}

    torch.save(state_dict, save_path)

    print(f'Model saved to ==> {save_path}')

def load_checkpoint(model, optimizer):
    save_path = data_dir + 'Weights/5-siameseNet-batchnorm50.pt'
    state_dict = torch.load(save_path)
    model.load_state_dict(state_dict['model_state_dict'])
    optimizer.load_state_dict(state_dict['optimizer_state_dict'])
    val_loss = state_dict['val_loss']
    print(f'Model loaded from <== {save_path}')
    
    return val_loss

training and validation after every epoch

In [None]:
import time
train_loss = []
validation_loss = []
def train(model, train_loader, val_loader, num_epochs, criterion, save_name):
    best_val_loss = float("Inf") 
    #best_val_loss = 0.0729
    train_losses = []
    val_losses = []
    cur_step = 0
    for epoch in range(num_epochs):
        start_time = time.time()
        running_loss = 0.0
        model.train()
        print("Starting epoch " + str(epoch+1))
        for img1, img2, labels in train_loader:
            
            # Forward
            img1 = img1.to(device)
            img2 = img2.to(device)
            labels = labels.to(device)
            outputs = model(img1, img2)
            loss = criterion(outputs, labels)
            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        avg_train_loss = running_loss / len(train_loader)
        train_losses.append(avg_train_loss)
        
        val_running_loss = 0.0
        with torch.no_grad():
            model.eval()
            for img1, img2, labels in val_loader:
                img1 = img1.to(device)
                img2 = img2.to(device)
                labels = labels.to(device)
                outputs = model(img1, img2)
                loss = criterion(outputs, labels)
                val_running_loss += loss.item()
        avg_val_loss = val_running_loss / len(val_loader)
        val_losses.append(avg_val_loss)
        
        print('Epoch [{}/{}],Train Loss: {:.4f}, Valid Loss: {:.8f}'
            .format(epoch+1, num_epochs, avg_train_loss, avg_val_loss))
        train_loss.append(avg_train_loss)
        validation_loss.append(avg_val_loss)
        print("Time taken for epoch = ",(time.time()-start_time))
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            save_checkpoint(save_name, model, optimizer, best_val_loss)
    
    print("Finished Training")  
    return train_losses, val_losses  

# evaluation metrics
def eval(model, test_loader):
    with torch.no_grad():
        model.eval()
        correct = 0
        print('Starting Iteration')
        count = 0
        acc_category = {1:0,2:0,5:0,6:0,7:0}
        for category,mainImg, imgSets, label in test_loader:
            mainImg = mainImg.to(device)
            predVal = 1
            pred = -1
            for i, testImg in enumerate(imgSets):
                testImg = testImg.to(device)
                output = model(mainImg, testImg)
                if output < predVal:
                    pred = i
                    predVal = output
            #print(label)
            label = label.to(device)
            if pred == label:
                correct += 1
                acc_category[category.numpy()[0]] += 1
            count += 1
            if count % 200 == 0:
                print("Current Count is: {}".format(count))
                print('Accuracy on n way: {}'.format(correct/count))
    return acc_category

Running the Network

In [None]:
import torch.optim as optim

num_epochs = 50
criterion = nn.BCEWithLogitsLoss()
save_path = data_dir+'Weights/5-siameseNet-batchnorm50.pt'
optimizer = optim.SGD(siameseBaseLine.parameters(), lr=0.001, momentum=0.9)
train_losses, val_losses = train(siameseBaseLine, train_loader, val_loader, num_epochs, criterion, save_path)
#load_model = Net().to(device)
#optimizer = optim.SGD(load_model.parameters(), lr=0.001)
#load_checkpoint(load_model,optimizer)
#train_losses, val_losses = train(load_model, train_loader, val_loader, num_epochs, criterion, save_path)

Starting epoch 1
Epoch [1/50],Train Loss: 0.6887, Valid Loss: 0.68410975
Time taken for epoch =  362.51566648483276
Model saved to ==> /content/drive/My Drive/Colab Notebooks/WaDaBa/Weights/5-siameseNet-batchnorm50.pt
Starting epoch 2
Epoch [2/50],Train Loss: 0.6761, Valid Loss: 0.66835142
Time taken for epoch =  351.4178624153137
Model saved to ==> /content/drive/My Drive/Colab Notebooks/WaDaBa/Weights/5-siameseNet-batchnorm50.pt
Starting epoch 3
Epoch [3/50],Train Loss: 0.6321, Valid Loss: 0.60940485
Time taken for epoch =  350.1337926387787
Model saved to ==> /content/drive/My Drive/Colab Notebooks/WaDaBa/Weights/5-siameseNet-batchnorm50.pt
Starting epoch 4
Epoch [4/50],Train Loss: 0.5680, Valid Loss: 0.54943763
Time taken for epoch =  348.8320252895355
Model saved to ==> /content/drive/My Drive/Colab Notebooks/WaDaBa/Weights/5-siameseNet-batchnorm50.pt
Starting epoch 5
Epoch [5/50],Train Loss: 0.5262, Valid Loss: 0.46923578
Time taken for epoch =  349.955858707428
Model saved to ==

Testing the Model

In [None]:
testSize_all = 4000
test_set_all = NWayAll(testSize_all,transformations)
test_loader_all = torch.utils.data.DataLoader(test_set_all, batch_size = 1, num_workers = 2, shuffle=True)
import torch.optim as optim
load_model = Net().to(device)
load_optimizer = optim.SGD(load_model.parameters(), lr=0.001)

best_val_loss = load_checkpoint(load_model, load_optimizer)

print(best_val_loss)
acc_category = eval(load_model, test_loader_all)

In [None]:
for k in acc_category.keys():
  print(k,":",acc_category[k]/len(categories[k]))