In [1]:
import cv2
import os
import random
import numpy as np
from matplotlib import pyplot as plt
import glob

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F

from torch.utils.data import Dataset, DataLoader, ConcatDataset, random_split, Subset
from torch.nn import Linear, Conv2d, MaxPool2d, Flatten, BatchNorm2d, LayerNorm

import torch.optim as optim

from torchvision import transforms

In [3]:
# GPU Details
if torch.cuda.is_available():
    print("CUDA is available. Details of available GPU(s):")
    num_gpus = torch.cuda.device_count()
    print(f"Number of GPU(s) available: {num_gpus}")
    for i in range(num_gpus):
        print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
        print(f"  Memory Allocated: {torch.cuda.memory_allocated(i) / 1e9} GB")
        print(f"  Memory Cached: {torch.cuda.memory_reserved(i) / 1e9} GB")
else:
    print("CUDA is not available.")


CUDA is available. Details of available GPU(s):
Number of GPU(s) available: 1
GPU 0: NVIDIA GeForce RTX 4090
  Memory Allocated: 0.0 GB
  Memory Cached: 0.0 GB


In [4]:
    # Setup Paths
POS_PATH = os.path.join('data', 'positive')
NEG_PATH = os.path.join('data', 'negative')
ANC_PATH = os.path.join('data', 'anchor')

In [5]:
# Create Folder Structure
os.makedirs(POS_PATH, exist_ok=True)
os.makedirs(NEG_PATH, exist_ok=True)
os.makedirs(ANC_PATH, exist_ok=True)

In [6]:
# unzip Tar GZ Labelled Faces in the Wild Dataset
# dataset: https://vis-www.cs.umass.edu/lfw/
# !tar -xf lfw.tgz

In [7]:
# move images from the dataset directory to data\negative
# for directory in os.listdir('lfw'):
#     for file in os.listdir(os.path.join('lfw', directory)):
#         FROM_PATH = os.path.join('lfw', directory, file)
#         TO_PATH = os.path.join(NEG_PATH, file)
#         os.replace(FROM_PATH, TO_PATH)

In [8]:
# universally unique identifier for naming collected images
import uuid

In [9]:
# # Collect positive and anchor images
# cap = cv2.VideoCapture(0, cv2.CAP_DSHOW)

# while cap.isOpened():
#     _, frame = cap.read()
    
#     # display image
#     cv2.imshow('Frame', frame)
    
#     # extract 250, 250 block from image
#     frame = frame[60:310, 200:450 :]
    
#     # display extracted section
#     cv2.imshow('Tiny Frame' , frame)
    
#     # Collect anchors
#     if cv2.waitKey(1) & 0xFF == ord('a'):
#         # generate file path
#         IMG_PATH = os.path.join(ANC_PATH, "{}.jpg".format(uuid.uuid1()))
#         # save file to anchors
#         cv2.imwrite(IMG_PATH, frame)
    
#     # Collect positives
#     if cv2.waitKey(1) & 0xFF == ord('p'):
#         # generate file path
#         IMG_PATH = os.path.join(POS_PATH, "{}.jpg".format(uuid.uuid1()))
#         # save file to positives
#         cv2.imwrite(IMG_PATH, frame)
    
#     # Break loop
#     if cv2.waitKey(1) & 0xFF == ord('q'):
#         break

# # release webcam and destroy window
# cap.release()
# cv2.destroyAllWindows()

In [10]:
# grab first 300 file paths in anchors, negatives and positives
anchor_files = glob.glob(ANC_PATH + '\\*.jpg')[:300]
negative_files = glob.glob(NEG_PATH + '\\*.jpg')[:300]
positive_files = glob.glob(POS_PATH + '\\*.jpg')[:300]

In [11]:
class ImageDataset(Dataset):
    def __init__(self, anchor_paths, other_paths, label):
        """
        anchor_paths: List of paths to anchor images
        other_paths: List of paths to either positive or negative images
        label: 0 or 1 (0 for negative pairs, 1 for positive pairs)
        """
        self.anchor_paths = anchor_paths
        self.other_paths = other_paths
        self.label = label
        self.transform = transforms.Compose([
            # Convert to PIL Image
            transforms.ToPILImage(),
            # Resize to match the Siamese paper input size
            transforms.Resize((105, 105)),
            # Convert to PyTorch tensor of shape (channels, height, width) which also scales values between [0, 1]
            transforms.ToTensor(),
        ])
        
    def __len__(self):
        return len(self.anchor_paths)
    
    def __getitem__(self, index):
        # Load the anchor image
        anchor_img = cv2.imread(self.anchor_paths[index])
        anchor_img = cv2.cvtColor(anchor_img, cv2.COLOR_BGR2RGB)  # Convert from BGR to RGB
        anchor_img = self.transform(anchor_img)
        
        # Load the other image (positive or negative)
        other_img = cv2.imread(self.other_paths[index])
        other_img = cv2.cvtColor(other_img, cv2.COLOR_BGR2RGB)
        other_img = self.transform(other_img)
        
        return anchor_img, other_img, torch.tensor(self.label, dtype=torch.float32)

In [12]:
# Create the DataSets
negative_dataset = ImageDataset(anchor_files, negative_files, 0)
positive_dataset = ImageDataset(anchor_files, positive_files, 1)

In [14]:
# for batch in positive_loader:
#     for val in batch:
#         print(val.shape)
#     # print(batch)
#     break

In [41]:
# Concatenate datasets
combined_dataset = ConcatDataset([positive_dataset, negative_dataset])

# Calculate absolutes for a 70-30 split
total_size = len(combined_dataset)
train_size = int(0.7 * total_size)
test_size = total_size - train_size

# Randomly split dataset into train and test
train_dataset, test_dataset = random_split(combined_dataset, [train_size, test_size])

# Create DataLoader for training data
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=8)

# Create DataLoader for testing data
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=8) # no shuffling for repeatability

In [42]:
# # set num_workers to 0 before doing this, and set batch_size to 1
# for anchor, other, label in train_loader:
#     fig, ax = plt.subplots(1, 2)
#     print(label)
#     ax[0].imshow(anchor.squeeze().permute(1, 2, 0))
#     ax[1].imshow(other.squeeze().permute(1, 2, 0))
#     break