In [206]:
import os
import numpy as np
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
import torch.nn as nn
import torch.optim as optim

if torch.cuda.is_available():
    device = torch.device("cuda:0")
    print(f'Using device: {device} - {torch.cuda.get_device_name(0)}')
else:
    raise Exception("GPU is not available. Please run this notebook on a system with a GPU.")

Using device: cuda:0 - NVIDIA GeForce RTX 3080


In [207]:
# Python
class FingerprintDataset(Dataset):
    def __init__(self, real_dir, altered_dir, transform=None, limit=None):
        self.real_dir = real_dir
        self.altered_dir = altered_dir
        self.transform = transform
        self.limit = limit

        self.real_images = os.listdir(real_dir)[:limit] if limit else os.listdir(real_dir)

    def __len__(self):
        return len(self.real_images)

    def __getitem__(self, idx):
        if idx >= len(self.real_images):
            raise IndexError('Index out of range')

        real_img = self.real_images[idx]
        real_image = Image.open(os.path.join(self.real_dir, real_img))
        altered_image = Image.open(os.path.join(self.altered_dir, real_img))

        if self.transform:
            real_image = self.transform(real_image)
            altered_image = self.transform(altered_image)

        # Add an extra dimension for the batch size
        real_image = real_image.unsqueeze(0)
        altered_image = altered_image.unsqueeze(0)

        return real_image, altered_image, 1

In [208]:
transform = transforms.Compose([
    transforms.Grayscale(), 
    transforms.ToTensor(), 
    transforms.Normalize((0.5,), (0.5,))  # Normalize pixel values to [-1, 1]
])

In [209]:
real_dir = 'dataset/Real'
altered_dir = 'dataset/Altered/Real'

dataset = FingerprintDataset(real_dir, altered_dir, transform=transform)

dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

In [210]:
# Python
class SiameseNetwork(nn.Module):
    def __init__(self):
        super(SiameseNetwork, self).__init__()
        
        # Define the feature extractor
        self.feature_extractor = models.resnet18(pretrained=True)
        
        # Replace the first convolutional layer to accept grayscale images
        self.feature_extractor.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
        
        # Remove the final layer (since we're not doing classification)
        self.feature_extractor.fc = nn.Identity()
        
        # Define the final layer that computes the absolute difference between the two outputs
        self.final_layer = nn.Linear(512, 1)

    def forward(self, input1, input2):
        # Pass both inputs through the feature extractor
        output1 = self.feature_extractor(input1)
        output2 = self.feature_extractor(input2)
        
        # Compute the absolute difference between the two outputs
        diff = torch.abs(output1 - output2)
        
        # Pass the difference through the final layer to get the output
        output = self.final_layer(diff)
        
        return output

In [211]:
model = SiameseNetwork().to(device)

criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [212]:
# Python
for epoch in range(num_epochs):
    for i, (real_images, altered_images_lists, labels) in enumerate(dataloader):
        for altered_images in altered_images_lists:
            # Move the tensors to the GPU
            real_images = real_images.to(device)
            altered_images = altered_images.to(device)  # This line was missing
            labels = labels.to(device)

            # Remove the extra dimension
            real_images = real_images.squeeze(1)
            altered_images = altered_images.squeeze(1)

            optimizer.zero_grad()

            outputs = model(real_images, altered_images)

            loss = criterion(outputs, labels.float().unsqueeze(1))

            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        print(i)

        if i % 5 == 4:  
            print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 100))
            running_loss = 0.0

print('Finished Training')

RuntimeError: Expected 3D (unbatched) or 4D (batched) input to conv2d, but got input of size: [32, 1, 1, 103, 96]