In [1]:
import numpy as np
import os
import torch
from torch.utils.data import Dataset
from torchvision import transforms
from PIL import Image

class ImagePairDataset(Dataset):
    def __init__(self, npy_file, image_root, transform=None, target_name=None):
        self.image_pairs = []  # List to store image pair paths and labels
        self.transform = transform
        self.target_name = target_name

        # Load the .npy file containing the image pair entries
        pair_data = np.load(npy_file, allow_pickle=True)

        # Parse the data into image pairs and labels
        for entry in pair_data:
            img1_rel_path = entry[0]
            img2_rel_path = entry[1]
            label = entry[2]  # 1 for positive, 0 for negative
            
            # Check if either of the images are in a folder that contains the target_name
            if self.target_name in img1_rel_path and self.target_name in img2_rel_path:
                # Create absolute paths from the relative paths
                img1_path = os.path.join(image_root, img1_rel_path)
                img2_path = os.path.join(image_root, img2_rel_path)
                
                # Add to list of image pairs
                self.image_pairs.append((img1_path, img2_path, label))

    def __len__(self):
        return len(self.image_pairs)

    def __getitem__(self, idx):
        img1_path, img2_path, label = self.image_pairs[idx]
        img1 = Image.open(img1_path).convert("RGB")
        img2 = Image.open(img2_path).convert("RGB")

        if self.transform:
            img1 = self.transform(img1)
            img2 = self.transform(img2)

        return img1, img2, label

# Example Usage
npy_file = "doppelgangers/pairs_metadata/train_pairs_noflip.npy"
image_root = "doppelgangers/images"  # Root directory where images are located
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

dataset = ImagePairDataset(npy_file, image_root, transform, target_name="Arc_de")
dataloader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=True)


# Model

In [2]:
import torch.nn as nn
import torch

class VisualDisambiguationModel(nn.Module):
    def __init__(self, backbone, feature_dim):
        super(VisualDisambiguationModel, self).__init__()
        self.backbone = backbone  # Pretrained DINOv2 model
        self.fc = nn.Sequential(
            nn.Linear(2 * feature_dim, 256),
            nn.ReLU(),
            nn.Linear(256, 1),
            nn.Sigmoid()  # Binary classification output
        )

    def forward(self, img1, img2):
        # Extract features for both images
        feat1 = self.backbone(img1)
        feat2 = self.backbone(img2)

        # Concatenate features
        combined = torch.cat([feat1, feat2], dim=1)
        output = self.fc(combined)
        return output

# Load pretrained DINOv2 backbone
backbone = torch.hub.load('facebookresearch/dinov2', 'dinov2_vitl14').to('cuda')
feature_dim = backbone.embed_dim

# Initialize model
model = VisualDisambiguationModel(backbone, feature_dim).to('cuda')


Using cache found in C:\Users\user/.cache\torch\hub\facebookresearch_dinov2_main
A matching Triton is not available, some optimizations will not be enabled.
Error caught was: No module named 'triton'


In [3]:
from torch.optim import Adam
import torch.nn.functional as F

# Loss and optimizer
criterion = nn.BCELoss()
optimizer = Adam(model.parameters(), lr=1e-4)

# Training loop
for epoch in range(10):  # Adjust the number of epochs as needed
    model.train()
    running_loss = 0.0
    for img1, img2, label in dataloader:
        img1_path, img2_path = img1[0], img2[0]  # Assuming the paths are stored as tensors
        print(f"Checking path: {img1_path}")
        print(f"Checking path: {img2_path}")

        if not os.path.exists(img1_path):
            print(f"File not found: {img1_path}")
        if not os.path.exists(img2_path):
            print(f"File not found: {img2_path}")

        img1, img2, label = img1.to('cuda'), img2.to('cuda'), label.float().to('cuda')
        
        # Forward pass
        optimizer.zero_grad()
        outputs = model(img1, img2).squeeze()  # Squeeze to match label dimensions
        loss = criterion(outputs, label)
        
        # Backward pass
        loss.backward()
        optimizer.step()

        running_loss += loss.item()


    print(f"Epoch {epoch+1}, Loss: {running_loss / len(dataloader)}")


FileNotFoundError: [Errno 2] No such file or directory: 'D:\\CodesNStuffs\\DinoV2_Classification\\doppelgangers_data\\doppelgangers\\images\\Arc_de_Triomphe_du_Carrousel_by_angle\\front\\0\\pictures\\Arco - panoramio - anibal amaro.jpg'

In [None]:
# Load test dataset
npy_test_file = "path/to/pairs_metadata/test_pairs.npy"
test_dataset = ImagePairDataset(npy_test_file, image_root, transform)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False)

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for img1, img2, label in test_dataloader:
        img1, img2, label = img1.to('cuda'), img2.to('cuda'), label.to('cuda')
        outputs = model(img1, img2).squeeze()
        predicted = (outputs > 0.5).float()  # Threshold the output
        correct += (predicted == label).sum().item()
        total += label.size(0)

accuracy = correct / total
print(f"Accuracy on test set: {accuracy * 100:.2f}%")
