# Data

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
import torchvision.transforms as T
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torch

# Dataset Definition
class ImagePairsDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.data = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        img1_path, img2_path, label = row["Image1"], row["Image2"], row["Label"]

        # Load images
        img1 = Image.open(img1_path).convert("RGB")
        img2 = Image.open(img2_path).convert("RGB")

        if self.transform:
            img1 = self.transform(img1)
            img2 = self.transform(img2)

        return img1, img2, torch.tensor(label, dtype=torch.float32)

# K-Fold Data Preparation
def prepare_data_kfold(df_path, k=5, batch_size=8):
    df = pd.read_csv(df_path)
    kf = KFold(n_splits=k, shuffle=True, random_state=42)
    splits = list(kf.split(df))

    transform = T.Compose([
        T.Resize((224, 224)),
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    # Prepare loaders for each fold
    loaders = []
    for i, (train_idx, test_idx) in enumerate(splits):
        train_df = df.iloc[train_idx]
        test_df = df.iloc[test_idx]

        train_dataset = ImagePairsDataset(train_df, transform=transform)
        test_dataset = ImagePairsDataset(test_df, transform=transform)

        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

        loaders.append((train_loader, test_loader))

    return loaders


# Model

In [None]:
import torch
import torch.nn as nn

# ResNet Block Definition
class ResNetBlock(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(ResNetBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU()
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.skip_connection = nn.Identity() if in_channels == out_channels else nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1)
    
    def forward(self, x):
        identity = self.skip_connection(x)
        out = self.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += identity
        return self.relu(out)

# ResNet with 3 Residual Blocks
class ResNet(nn.Module):
    def __init__(self, in_channels, out_channels, num_blocks=3):
        super(ResNet, self).__init__()
        layers = []
        for _ in range(num_blocks):
            layers.append(ResNetBlock(in_channels, out_channels))
            in_channels = out_channels  # Output of one block becomes input for the next
        self.resnet_blocks = nn.Sequential(*layers)
    
    def forward(self, x):
        return self.resnet_blocks(x)

# Feature Extractor using DINOv2
class DINOFeatureExtractor(torch.nn.Module):
    def __init__(self, backbone):
        super().__init__()
        self.backbone = backbone

    def forward(self, x):
        return self.backbone(x)

# Model for Visual Disambiguation with ResNet blocks
class VisualDisambiguationModelWithResNet(torch.nn.Module):
    def __init__(self, feature_dim, backbone, resnet_in_channels, resnet_out_channels):
        super().__init__()
        # DINO backbone feature extractor
        self.dino_feature_extractor = DINOFeatureExtractor(backbone)
        
        # ResNet with 3 residual blocks
        self.resnet = ResNet(resnet_in_channels, resnet_out_channels, num_blocks=3)

        # Visual Disambiguation classifier
        self.classifier = torch.nn.Sequential(
            torch.nn.Linear(feature_dim * 2, 512),
            torch.nn.ReLU(),
            torch.nn.Dropout(0.5),
            torch.nn.Linear(512, 1),
            torch.nn.Sigmoid(),
        )

    def forward(self, x1, x2):
        # Pass both inputs through DINO backbone and ResNet blocks
        features1 = self.dino_feature_extractor(x1)
        features2 = self.dino_feature_extractor(x2)

        features1 = self.resnet(features1)
        features2 = self.resnet(features2)

        # Flatten the features
        features1 = features1.flatten(start_dim=1)
        features2 = features2.flatten(start_dim=1)

        # Pass features to the classifier
        combined_features = torch.cat([features1, features2], dim=1)
        return self.classifier(combined_features)



# Train and eval function

In [3]:
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm  # Import tqdm for progress bar

# Training Loop
def train_model(train_loader, model, feature_extractor, optimizer, criterion, device):
    model.train()
    feature_extractor.eval()  # Freeze DINO
    running_loss = 0.0

    # Use tqdm to show progress bar
    with tqdm(train_loader, unit="batch") as tepoch:
        for img1, img2, labels in tepoch:
            img1, img2, labels = img1.to(device), img2.to(device), labels.to(device)

            features1 = feature_extractor(img1)
            features2 = feature_extractor(img2)

            optimizer.zero_grad()
            outputs = model(features1, features2).squeeze()
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            # Update progress bar description with loss information
            tepoch.set_postfix(loss=running_loss / (tepoch.n + 1))

    return running_loss / len(train_loader)

# Evaluation
def evaluate_model(test_loader, model, feature_extractor, criterion, device):
    model.eval()
    feature_extractor.eval()
    running_loss = 0.0
    correct = 0
    total = 0

    # Use tqdm to show progress bar for evaluation
    with tqdm(test_loader, unit="batch") as tepoch:
        with torch.no_grad():
            for img1, img2, labels in tepoch:
                img1, img2, labels = img1.to(device), img2.to(device), labels.to(device)

                features1 = feature_extractor(img1)
                features2 = feature_extractor(img2)

                outputs = model(features1, features2).squeeze()
                loss = criterion(outputs, labels)
                running_loss += loss.item()

                preds = (outputs > 0.5).float()
                correct += (preds == labels).sum().item()
                total += labels.size(0)

                # Update progress bar description with loss information
                tepoch.set_postfix(loss=running_loss / (tepoch.n + 1))

    accuracy = correct / total
    return running_loss / len(test_loader), accuracy


# Train and test

In [4]:
import torch.hub

df_path = "Arc_de_T_6k.csv"
k = 3
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load data with K-Fold
loaders = prepare_data_kfold(df_path, k)

# Load DINOv2 Backbone
backbone = torch.hub.load('facebookresearch/dinov2', 'dinov2_vitb14').to(device)
feature_extractor = DINOFeatureExtractor(backbone).to(device)

# Model, Optimizer, Loss
model = VisualDisambiguationModel(feature_dim=768).to(device)  # vitl14 output dim is 1024, vitb14 is 768 
optimizer = optim.Adam(model.parameters(), lr=1e-4)
criterion = nn.BCELoss()

for fold, (train_loader, test_loader) in enumerate(loaders):
    print(f"Fold {fold + 1}/{k}")

    # Train
    for epoch in range(5):  #5 epochs per fold
        train_loss = train_model(train_loader, model, feature_extractor, optimizer, criterion, device)
        print(f"Epoch {epoch + 1}, Train Loss: {train_loss:.4f}")

    # Evaluate
    test_loss, accuracy = evaluate_model(test_loader, model, feature_extractor, criterion, device)
    print(f"Fold {fold + 1} - Test Loss: {test_loss:.4f}, Accuracy: {accuracy:.4%}")



Using cache found in C:\Users\user/.cache\torch\hub\facebookresearch_dinov2_main
A matching Triton is not available, some optimizations will not be enabled.
Error caught was: No module named 'triton'


Fold 1/3


  storage_data_ptr = tensors[0].storage().data_ptr()
  if x.storage().data_ptr() != storage_data_ptr:
100%|██████████| 500/500 [16:57<00:00,  2.03s/batch, loss=0.613]


Epoch 1, Train Loss: 0.6132


100%|██████████| 500/500 [17:51<00:00,  2.14s/batch, loss=0.525]


Epoch 2, Train Loss: 0.5254


100%|██████████| 500/500 [17:52<00:00,  2.15s/batch, loss=0.467]


Epoch 3, Train Loss: 0.4668


100%|██████████| 500/500 [18:13<00:00,  2.19s/batch, loss=0.419]


Epoch 4, Train Loss: 0.4187


100%|██████████| 500/500 [17:20<00:00,  2.08s/batch, loss=0.367]


Epoch 5, Train Loss: 0.3674


100%|██████████| 250/250 [03:57<00:00,  1.05batch/s, loss=0.356]


Fold 1 - Test Loss: 0.3557, Accuracy: 86.2000%
Fold 2/3


100%|██████████| 500/500 [17:26<00:00,  2.09s/batch, loss=0.342]


Epoch 1, Train Loss: 0.3420


100%|██████████| 500/500 [17:05<00:00,  2.05s/batch, loss=0.294]


Epoch 2, Train Loss: 0.2943


100%|██████████| 500/500 [15:20<00:00,  1.84s/batch, loss=0.259]


Epoch 3, Train Loss: 0.2586


100%|██████████| 500/500 [15:13<00:00,  1.83s/batch, loss=0.241]


Epoch 4, Train Loss: 0.2409


100%|██████████| 500/500 [15:16<00:00,  1.83s/batch, loss=0.22] 


Epoch 5, Train Loss: 0.2197


100%|██████████| 250/250 [03:33<00:00,  1.17batch/s, loss=0.211]


Fold 2 - Test Loss: 0.2111, Accuracy: 90.8000%
Fold 3/3


100%|██████████| 500/500 [15:35<00:00,  1.87s/batch, loss=0.223]


Epoch 1, Train Loss: 0.2234


100%|██████████| 500/500 [15:17<00:00,  1.84s/batch, loss=0.205]


Epoch 2, Train Loss: 0.2050


100%|██████████| 500/500 [15:28<00:00,  1.86s/batch, loss=0.189]


Epoch 3, Train Loss: 0.1890


100%|██████████| 500/500 [15:19<00:00,  1.84s/batch, loss=0.18] 


Epoch 4, Train Loss: 0.1798


100%|██████████| 500/500 [15:16<00:00,  1.83s/batch, loss=0.171]


Epoch 5, Train Loss: 0.1711


100%|██████████| 250/250 [03:26<00:00,  1.21batch/s, loss=0.171]

Fold 3 - Test Loss: 0.1707, Accuracy: 92.3500%





In [5]:
model_save_path = "Arc_de_T_model.pth"

# Save both the model's state_dict and the feature extractor (optional)
torch.save({
    'model_state_dict': model.state_dict(),
    'feature_extractor_state_dict': feature_extractor.state_dict(),
}, model_save_path)

print(f"Model saved to {model_save_path}")


Model saved to Arc_de_T_model.pth
