In [1]:
import os
import random
import torch
import torch.nn as nn
from PIL import Image
import torch.optim as optim
import matplotlib.pyplot as plt
import torch.nn.functional as F
from torchvision import transforms
from torch.utils.data import random_split
from torch.utils.data import Dataset, DataLoader
from concurrent.futures import ThreadPoolExecutor
from sklearn.model_selection import train_test_split
from torch.optim.lr_scheduler import ReduceLROnPlateau

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(torch.cuda.get_device_name(device))

NVIDIA GeForce RTX 2060


In [18]:
class ImageFolderDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.image_paths = []

        for img_name in os.listdir(root_dir):
            img_path = os.path.join(root_dir, img_name)
            if os.path.isfile(img_path) and img_path.lower().endswith(('png', 'jpg', 'jpeg', 'bmp', 'tiff')):
                self.image_paths.append(img_path)

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert("RGB")

        if self.transform:
            image = self.transform(image)
        
        return image, img_path

transform = transforms.Compose([transforms.Resize((224, 224)),transforms.ToTensor(),transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),])

root_dir = r"D:\Computer Vision\FYP\TASK 1\env\TrackNet-X\DataSet\VeRi\image_train"

dataset = ImageFolderDataset(root_dir, transform=transform)

In [19]:
for idx in range(144):
    image, img_path = dataset[idx]
    print(f"Name: {os.path.basename(img_path)}, Image shape: {image.shape}")

Name: 0001_c001_00016450_0.jpg, Image shape: torch.Size([3, 224, 224])
Name: 0001_c001_00016460_0.jpg, Image shape: torch.Size([3, 224, 224])
Name: 0001_c001_00016470_0.jpg, Image shape: torch.Size([3, 224, 224])
Name: 0001_c001_00016480_0.jpg, Image shape: torch.Size([3, 224, 224])
Name: 0001_c001_00016490_0.jpg, Image shape: torch.Size([3, 224, 224])
Name: 0001_c001_00016500_0.jpg, Image shape: torch.Size([3, 224, 224])
Name: 0001_c002_00016885_0.jpg, Image shape: torch.Size([3, 224, 224])
Name: 0001_c002_00016895_0.jpg, Image shape: torch.Size([3, 224, 224])
Name: 0001_c002_00016905_0.jpg, Image shape: torch.Size([3, 224, 224])
Name: 0001_c002_00016915_0.jpg, Image shape: torch.Size([3, 224, 224])
Name: 0001_c002_00016945_0.jpg, Image shape: torch.Size([3, 224, 224])
Name: 0001_c002_00016955_0.jpg, Image shape: torch.Size([3, 224, 224])
Name: 0001_c012_00014680_0.jpg, Image shape: torch.Size([3, 224, 224])
Name: 0001_c012_00014685_0.jpg, Image shape: torch.Size([3, 224, 224])
Name: 

In [20]:
class CustomCNN(nn.Module):
    def __init__(self):
        super(CustomCNN, self).__init__()

        # First convolution layer: 7x7 kernel, stride 5, padding 3, 12 channels
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=12, kernel_size=7, stride=5, padding=3)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=1)  # No downsampling here

        # Second convolution layer: 3x3 kernel, stride 1, padding 1, 24 channels
        self.conv2 = nn.Conv2d(in_channels=12, out_channels=24, kernel_size=3, stride=1, padding=1)

        # Third convolution layer: 3x3 kernel, stride 1, padding 1, 32 channels (no downsampling)
        self.conv3 = nn.Conv2d(in_channels=24, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)  # Reduces to 32x32

        # Fourth convolution layer: 3x3 kernel, stride 1, padding 1, 64 channels (no downsampling)
        self.conv4 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)

        # Adaptive pooling to prevent size issues
        self.global_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Flatten()

    def forward(self, x):
        x1 = F.relu(self.conv1(x))
        x2 = self.pool1(x1)

        x3 = F.relu(self.conv2(x2))
        x4 = F.relu(self.conv3(x3))
        x5 = self.pool2(x4)

        x6 = F.relu(self.conv4(x5))
        x7 = self.global_pool(x6)
        x8 = self.fc(x7)

        return x8

In [27]:
def GetRandomImage(dataset):
    return random.choice(dataset)

def Generate_Triplets(dataset):
    anchor, anchor_id = GetRandomImage(dataset)

    # Find a positive image (same class as anchor)
    while True:
        positive, positive_id = GetRandomImage(dataset)
        if positive_id == anchor_id:
            break

    # Find a negative image (different class than anchor)
    while True:
        negative, negative_id = GetRandomImage(dataset)
        if negative_id != anchor_id:
            break

    return anchor, positive, negative

anchor, positive, negative = Generate_Triplets(dataset)
print(f"Anchor: {anchor}, Positive: {positive}, Negative: {negative}")

Anchor: tensor([[[ 0.5707,  0.5536,  0.5536,  ..., -1.4329, -1.4672, -1.4672],
         [ 0.5707,  0.5707,  0.5707,  ..., -1.4500, -1.4843, -1.4843],
         [ 0.5707,  0.5707,  0.5707,  ..., -1.4672, -1.4672, -1.4672],
         ...,
         [ 0.9303,  0.8961,  0.8961,  ...,  0.1939,  0.2282,  0.2453],
         [ 0.9303,  0.9303,  0.9474,  ...,  0.1597,  0.1768,  0.2111],
         [ 0.9132,  0.9303,  0.9817,  ...,  0.0912,  0.1254,  0.1597]],

        [[ 0.7829,  0.7654,  0.7654,  ..., -1.2479, -1.2479, -1.2479],
         [ 0.7829,  0.7829,  0.7829,  ..., -1.2654, -1.2654, -1.2654],
         [ 0.7829,  0.7829,  0.7829,  ..., -1.2829, -1.2829, -1.2654],
         ...,
         [ 0.9405,  0.9055,  0.9055,  ...,  0.3978,  0.4328,  0.4503],
         [ 0.9405,  0.9230,  0.9580,  ...,  0.3627,  0.3803,  0.4153],
         [ 0.9055,  0.9230,  0.9755,  ...,  0.2927,  0.3277,  0.3627]],

        [[ 1.0539,  1.0365,  1.0365,  ..., -0.9156, -0.9156, -0.9156],
         [ 1.0539,  1.0539,  1.0539, 