## Baseline
This is the baseline solution you're given in the competition. Your objective is to improve it. This has been slightly adapted to run smoothly outside of Bohrium (2025 competition platform).

In [None]:
import os
import clip
import numpy as np
from PIL import Image
from tqdm import tqdm
from pathlib import Path

import torch
from torch.nn import functional as F

In [None]:
import random
import numpy as np
import torch

seed = 42

random.seed(seed)                  # Python built-in random
np.random.seed(seed)               # NumPy
torch.manual_seed(seed)            # PyTorch (CPU)
torch.cuda.manual_seed(seed)       # PyTorch (single GPU)
torch.cuda.manual_seed_all(seed)   # PyTorch (all GPUs)

# Ensures deterministic behavior
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [None]:
def sort_paths_by_number(path_list):
    """
    Sort based on the numerical values of the filenames in the path,
    assuming all filenames can be converted to integers.
    """
    def get_file_number(path):
        file_name = os.path.splitext(os.path.basename(path))[0]
        return int(file_name)

    path_list.sort(key=get_file_number)


In [None]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DATA_PATH = "C:/Users/Home/Downloads/train_v1/train/" # TODO: Upload data somewhere else
model_path = "C:/Users/Home/Documents/python]/Problem-Solutions/ViT-B-32.pt" # TODO: Upload model somewhere else

model, preprocess = clip.load(model_path, device=DEVICE)
model.eval()

In [None]:
def infer(img_paths):
    """
    Compute L2‑normalized feature embeddings for a list of image file paths using the CLIP visual encoder.
    """
    embeddings = []
    for path in tqdm(img_paths):
        img = Image.open(path)
        x = preprocess(img)
        x = x.type(torch.float16).unsqueeze(0).to(DEVICE)

        with torch.no_grad():
            emb = model.visual.forward(x)

        embeddings.append(emb)

    embeddings = torch.cat(embeddings)
    embeddings = F.normalize(embeddings, p=2, dim=1)

    return embeddings

In [None]:
def match_images(BASE_DATA_DIR, result_path):
    """
    For each query image in BASE_DATA_DIR/query, find its best matching image
    in BASE_DATA_DIR/gallery by computing cosine similarity of CLIP embeddings,
    then save 1‑based match indices to result_path as a .npy file.
    """
    QUERY_DIR = BASE_DATA_DIR / "query"
    NON_QUERY_DIR = BASE_DATA_DIR / "gallery"

    query_image_paths = list(QUERY_DIR.glob("*.png"))
    non_query_image_paths = list(NON_QUERY_DIR.glob("*.png"))

    query_image_paths_str = [str(p) for p in query_image_paths]
    non_query_image_paths_str = [str(p) for p in non_query_image_paths]

    sort_paths_by_number(query_image_paths_str)
    sort_paths_by_number(non_query_image_paths_str)

    query_embeddings = infer(query_image_paths_str)
    non_query_embeddings = infer(non_query_image_paths_str)
    distances = torch.mm(query_embeddings, non_query_embeddings.t())
    distances = (distances + 1.) / 2.

    topk_dists, topk_idxs = torch.topk(distances, 11, dim=1)  # distances have shape (num_queries, num_non_queries)

    topk_dists, topk_idxs = topk_dists.cpu(), topk_idxs.cpu()

    matches_dists, matches_idxs = topk_dists[:, 1], topk_idxs[:, 1]
    matches_dists = matches_dists.cpu().numpy()
    matches_idxs = matches_idxs.cpu().numpy()

    for i in range(len(matches_idxs)):
        matches_idxs[i]+=1

    np.save(result_path, matches_idxs)

In [None]:
import tempfile
import os

with tempfile.NamedTemporaryFile(suffix=".npy", delete=False) as tmp_a, \
    tempfile.NamedTemporaryFile(suffix=".npy", delete=False) as tmp_b:
    submission_a_path = tmp_a.name
    submission_b_path = tmp_b.name

match_images(Path(DATA_PATH + "test_a"), submission_a_path)
match_images(Path(DATA_PATH + "test_b"), submission_b_path)

## Scoring model
This simulates how the leaderboard A and B would behave in the competition.

In [None]:
from scoring import score

score_a = score("submission_a.npy", DATA_PATH + "test_data.npy")
score_b = score("submission_b.npy", DATA_PATH + "validation_data.npy")

print(f"Score A: {score_a:.4f}\n Score B: {score_b:.4f}")

os.remove(submission_a_path)
os.remove(submission_b_path)