In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

from torchvision import models

import cv2
import random
import os

device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [2]:
class SiameseNetwork(nn.Module):
    def __init__(self, backbone="resnet18"):
        '''
        Creates a siamese network with a network from torchvision.models as backbone.
            Parameters:
                    backbone (str): Options of the backbone networks can be found at https://pytorch.org/vision/stable/models.html
        '''

        super().__init__()

        if backbone not in models.__dict__:
            raise Exception("No model named {} exists in torchvision.models.".format(backbone))

        # Create a backbone network from the pretrained models provided in torchvision.models 
        self.backbone = models.__dict__[backbone](pretrained=True, progress=True)

        # Get the number of features that are outputted by the last layer of backbone network.
        out_features = list(self.backbone.modules())[-1].out_features

        # Create an MLP (multi-layer perceptron) as the classification head. 
        # Classifies if provided combined feature vector of the 2 images represent same player or different.
        self.cls_head = nn.Sequential(
            nn.Dropout(p=0.5),
            nn.Linear(out_features, 512),
            # nn.BatchNorm1d(512),
            nn.ReLU(),

            nn.Dropout(p=0.5),
            nn.Linear(512, 64),
            # nn.BatchNorm1d(64),
            nn.Sigmoid(),
            nn.Dropout(p=0.5),

            nn.Linear(64, 1),
            nn.Sigmoid(),
        )

    def forward(self, img1, img2):
        '''
        Returns the similarity value between two images.
            Parameters:
                    img1 (torch.Tensor): shape=[b, 3, 224, 224]
                    img2 (torch.Tensor): shape=[b, 3, 224, 224]
            where b = batch size
            Returns:
                    output (torch.Tensor): shape=[b, 1], Similarity of each pair of images
        '''

        # Pass the both images through the backbone network to get their seperate feature vectors
        feat1 = self.backbone(img1)
        feat2 = self.backbone(img2)
        
        # Multiply (element-wise) the feature vectors of the two images together, 
        # to generate a combined feature vector representing the similarity between the two.
        combined_features = feat1 * feat2

        # Pass the combined feature vector through classification head to get similarity value in the range of 0 to 1.
        output = self.cls_head(combined_features)
        return output

In [3]:
def get_image(path):
    img = cv2.imread(path)
    img = cv2.resize(
        img,
        (224, 224)
    )
    return torch.tensor(img.reshape(1, 3, 224, 224)).float().to('cuda')

healthy = [get_image('./data_siamese/0/' + img) for img in os.listdir('./data_siamese/0/')]
unhealthy = [get_image('./data_siamese/1/' + img) for img in os.listdir('./data_siamese/1/')]

shfl0 = list(range(len(healthy)))
random.shuffle(shfl0)
 
shfl1 = list(range(len(unhealthy)))
random.shuffle(shfl1)

test = []
for i in range(3):
    test.append( (healthy[shfl0[3*i]], unhealthy[shfl1[3*i]], torch.zeros(1, 1).to('cuda')) )
    test.append( (healthy[shfl0[3*i + 1]], healthy[shfl0[3*i + 2]], torch.ones(1, 1).to('cuda')) )
    test.append( (unhealthy[shfl1[3*i + 1]], unhealthy[shfl1[3*i + 2]], torch.ones(1, 1).to('cuda')) )

shfl0 = shfl0[9:]
shfl1 = shfl1[9:]

train = []

for i in range(7):
    train.append( (unhealthy[shfl1[2*i]], unhealthy[shfl1[2*i + 1]], torch.ones(1, 1).to('cuda')) )

shfl1 = shfl1[14:]

for i in range(len(shfl1)):
    train.append( (healthy[shfl0[i]], unhealthy[shfl1[i]], torch.zeros(1, 1).to('cuda')) )

shfl0 = shfl0[len(shfl1):]

for i in range(0, len(shfl0), 2):
    train.append( (healthy[shfl0[i + 1]], healthy[shfl0[i]], torch.ones(1, 1).to('cuda')) )


len(test), len(train)

(9, 50)

In [8]:
siamese_net = torch.load('siamese.pt').to('cuda')

In [5]:
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(siamese_net.parameters(), lr=0.001)

for epoch in range(100):
    for i, (input1, input2, label) in enumerate(train):
        optimizer.zero_grad()
        output = siamese_net(input1, input2).mean(axis=0)[None]
        loss = criterion(output, label)
        loss.backward()
        optimizer.step()
        
        if i % 100 == 0:
            print(f"Epoch {epoch+1}, Batch {i+1}, Loss {loss.item()}")

Epoch 1, Batch 1, Loss 0.8777173757553101
Epoch 2, Batch 1, Loss 0.4291377663612366
Epoch 3, Batch 1, Loss 0.48325228691101074
Epoch 4, Batch 1, Loss 0.4521399736404419
Epoch 5, Batch 1, Loss 0.37431690096855164
Epoch 6, Batch 1, Loss 0.33896368741989136
Epoch 7, Batch 1, Loss 0.42783495783805847
Epoch 8, Batch 1, Loss 0.706821084022522
Epoch 9, Batch 1, Loss 0.2912171483039856
Epoch 10, Batch 1, Loss 0.3966604471206665
Epoch 11, Batch 1, Loss 0.31122878193855286
Epoch 12, Batch 1, Loss 0.44941118359565735
Epoch 13, Batch 1, Loss 0.4331289827823639
Epoch 14, Batch 1, Loss 0.41475710272789
Epoch 15, Batch 1, Loss 0.37424376606941223
Epoch 16, Batch 1, Loss 0.5292100310325623
Epoch 17, Batch 1, Loss 0.5178886651992798
Epoch 18, Batch 1, Loss 0.5192611217498779
Epoch 19, Batch 1, Loss 0.48622962832450867
Epoch 20, Batch 1, Loss 0.30855676531791687
Epoch 21, Batch 1, Loss 0.4852754473686218
Epoch 22, Batch 1, Loss 0.2949196398258209
Epoch 23, Batch 1, Loss 0.4820440411567688
Epoch 24, Batc

In [9]:
for x in test:
    print(siamese_net(*x[:2]).item(), x[2].item())

0.6104997992515564 0.0
0.6881439089775085 1.0
0.6378397345542908 1.0
0.6590268015861511 0.0
0.6929436326026917 1.0
0.6062656044960022 1.0
0.5993179082870483 0.0
0.6854667663574219 1.0
0.6659497618675232 1.0
