In [1]:
!pip install kagglehub



In [5]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("abhyudaya12/veri-vehicle-re-identification-dataset")

print("Path to dataset files:", path)

ValueError: '/content' is not present in the dataset files. You can access the other files of the attached dataset at '/kaggle/input/veri-vehicle-re-identification-dataset'

In [6]:
!mv "/root/.cache/kagglehub/datasets/abhyudaya12/veri-vehicle-re-identification-dataset/versions/1/VeRi" "/content"

In [7]:
!pip install torch torchvision tqdm pillow

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Using cached nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Using cached nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Using cached nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Using cached nvidia_curand_cu12

In [2]:
import os
import random
from PIL import Image
from torch.utils.data import Dataset
from torchvision import transforms

transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
])

def read_image_list(file_path):
    with open(file_path, 'r') as f:
        return [line.strip() for line in f.readlines()]

def build_label_dict(image_names):
    label_dict = {}
    label_id = 0
    for name in image_names:
        vid = name.split('_')[0]  # vehicle ID is the prefix
        if vid not in label_dict:
            label_dict[vid] = label_id
            label_id += 1
    return {name: label_dict[name.split('_')[0]] for name in image_names}

class TripletDataset(Dataset):
    def __init__(self, root_dir, list_file):
        self.root_dir = root_dir
        self.image_list = read_image_list(list_file)
        self.label_map = build_label_dict(self.image_list)
        self.transform = transform

        # Group by class for faster sampling
        self.class_to_indices = {}
        for i, img_name in enumerate(self.image_list):
            label = self.label_map[img_name]
            self.class_to_indices.setdefault(label, []).append(i)

    def __len__(self):
        return len(self.image_list)

    def __getitem__(self, idx):
        anchor_name = self.image_list[idx]
        anchor_label = self.label_map[anchor_name]
        anchor_img = self._load_image(anchor_name)

        # Positive
        pos_idx = idx
        while pos_idx == idx or self.label_map[self.image_list[pos_idx]] != anchor_label:
            pos_idx = random.choice(self.class_to_indices[anchor_label])
        positive_img = self._load_image(self.image_list[pos_idx])

        # Negative
        neg_label = anchor_label
        while neg_label == anchor_label:
            neg_label = random.choice(list(self.class_to_indices.keys()))
        neg_idx = random.choice(self.class_to_indices[neg_label])
        negative_img = self._load_image(self.image_list[neg_idx])

        return anchor_img, positive_img, negative_img

    def _load_image(self, img_name):
        img_path = os.path.join(self.root_dir, img_name)
        img = Image.open(img_path).convert('RGB')
        return self.transform(img)


In [3]:
import torch.nn as nn
from torchvision import models

class FeatureExtractor(nn.Module):
    def __init__(self):
        super().__init__()
        resnet = models.resnet50(pretrained=True)
        self.backbone = nn.Sequential(*list(resnet.children())[:-1])  # Remove final FC
        self.embedding = nn.Linear(2048, 256)  # 256-D feature space

    def forward(self, x):
        x = self.backbone(x).squeeze()  # (batch, 2048)
        x = self.embedding(x)           # (batch, 256)
        return x


In [4]:
import torch
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = FeatureExtractor().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

def triplet_loss(anchor, positive, negative, margin=1.0):
    pos_dist = F.pairwise_distance(anchor, positive)
    neg_dist = F.pairwise_distance(anchor, negative)
    loss = torch.relu(pos_dist - neg_dist + margin)
    return loss.mean()

# Define paths
image_root = "VeRi/image_train"
train_list = "VeRi/name_train.txt"

# Create dataset & loader
train_dataset = TripletDataset(image_root, train_list)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)


for epoch in range(5):
    model.train()
    total_loss = 0
    for a, p, n in train_loader:
        a, p, n = a.to(device), p.to(device), n.to(device)
        feat_a = model(a)
        feat_p = model(p)
        feat_n = model(n)

        loss = triplet_loss(feat_a, feat_p, feat_n)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss/len(train_loader):.4f}")




Epoch 1, Loss: 0.0881
Epoch 2, Loss: 0.0376
Epoch 3, Loss: 0.0208
Epoch 4, Loss: 0.0212
Epoch 5, Loss: 0.0127


In [5]:
class VeRiEvalDataset(Dataset):
    def __init__(self, root_dir, list_file, transform):
        self.root_dir = root_dir
        self.image_list = read_image_list(list_file)
        self.label_map = build_label_dict(self.image_list)
        self.transform = transform

    def __len__(self):
        return len(self.image_list)

    def __getitem__(self, idx):
        img_name = self.image_list[idx]
        label = self.label_map[img_name]
        img_path = os.path.join(self.root_dir, img_name)
        img = Image.open(img_path).convert('RGB')
        return self.transform(img), label


In [6]:
from torch.utils.data import DataLoader
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Build loaders
query_dataset = VeRiEvalDataset("VeRi/image_query", "VeRi/name_query.txt", transform)
gallery_dataset = VeRiEvalDataset("VeRi/image_test", "VeRi/name_test.txt", transform)

query_loader = DataLoader(query_dataset, batch_size=32, shuffle=False)
gallery_loader = DataLoader(gallery_dataset, batch_size=32, shuffle=False)

# Feature extraction
def extract_features(loader, model):
    model.eval()
    features, labels = [], []
    with torch.no_grad():
        for imgs, lbls in loader:
            imgs = imgs.to(device)
            feats = model(imgs).cpu().numpy()
            features.append(feats)
            labels.extend(lbls.numpy())
    return np.vstack(features), np.array(labels)

query_feats, query_labels = extract_features(query_loader, model)
gallery_feats, gallery_labels = extract_features(gallery_loader, model)

# Cosine similarity matching
sims = cosine_similarity(query_feats, gallery_feats)
top1 = (gallery_labels[np.argmax(sims, axis=1)] == query_labels).mean()

print(f"Top-1 Accuracy: {top1 * 100:.2f}%")


Top-1 Accuracy: 99.94%


In [11]:
print(TripletDataset.len())
# Save entire model
torch.save(model.state_dict(), "veri_reid_model.pth")

1180
