# **Task 1: Pre-trained CNN**

**Imports and Drive Mount**

In [None]:
from google.colab import drive
import sys, os

import torch
import torchvision.transforms as transforms

In [None]:
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


**Macros**

In [None]:
# Path to project folder in Drive
project_path = "/content/drive/MyDrive/projectcv2"
sys.path.append(project_path)

# Paths to resources
image_dir = os.path.join(project_path, "data")
class_json = os.path.join(image_dir, "classes.json")
model_path = os.path.join(project_path, "resnet50_embedding_head.pth")

image_size = 224

device = "cuda" if torch.cuda.is_available() else "cpu"

In [4]:
from utils import (
    load_dataset,
    get_embedding_model,
    extract_embeddings,
    retrieve_top_k,
    evaluate_retrieval,
    set_seed
)

set_seed(42)

**Dataset Preparation**

In [None]:
transform = transforms.Compose([
    transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0)),  # zoom/crop
    transforms.RandomHorizontalFlip(p=0.5),                      # mirror
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),  # lighting
    transforms.RandomAffine(degrees=15, translate=(0.05, 0.05)),  # slight rotation & shift
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225]),
])

In [None]:
dataset = load_dataset(image_dir, class_json, transform)
query_dataset = dataset["query"]
reference_dataset = dataset["reference"]

**Model Loading**

In [None]:
if os.path.exists(model_path):
    print("Loading saved pretrained model...")
    model = get_embedding_model("resnet50", pretrained=False)
    model.load_state_dict(torch.load(model_path, map_location=device))
else:
    print("Initializing and saving pretrained model...")
    model = get_embedding_model("resnet50", pretrained=True)
    torch.save(model.state_dict(), model_path)

model = model.to(device)
model.eval()

Loading saved pretrained model...




ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

**Embedding Extraction**

In [None]:
query_data = extract_embeddings(model, query_dataset, device=device)
ref_data = extract_embeddings(model, reference_dataset, device=device)

Extracting embeddings: 100%|██████████| 1/1 [00:07<00:00,  7.72s/it]
Extracting embeddings: 100%|██████████| 3/3 [00:23<00:00,  7.72s/it]


**Top-k Retrieval**

In [None]:
retrieval_result_cos1 = retrieve_top_k(query_data, ref_data, k=1, metric="cosine")
retrieval_result_cos5 = retrieve_top_k(query_data, ref_data, k=5, metric="cosine")
retrieval_result_cos10 = retrieve_top_k(query_data, ref_data, k=10, metric="cosine")

retrieval_result_euc1 = retrieve_top_k(query_data, ref_data, k=1, metric="euclidean")
retrieval_result_euc5 = retrieve_top_k(query_data, ref_data, k=5, metric="euclidean")
retrieval_result_euc10 = retrieve_top_k(query_data, ref_data, k=10, metric="euclidean")

**Evaluation**

In [None]:
metrics_cos1 = evaluate_retrieval(retrieval_result_cos1, k=1)
metrics_cos5 = evaluate_retrieval(retrieval_result_cos5, k=5)
metrics_cos10 = evaluate_retrieval(retrieval_result_cos10, k=10)

metrics_euc1 = evaluate_retrieval(retrieval_result_euc1, k=1)
metrics_euc5 = evaluate_retrieval(retrieval_result_euc5, k=5)
metrics_euc10 = evaluate_retrieval(retrieval_result_euc10, k=10)

print("Cosine Similarity")
print("Top-1 :", metrics_cos1)
print("Top-5 :", metrics_cos5)
print("Top-10:", metrics_cos10)

print("\nEuclidean Distance")
print("Top-1 :", metrics_euc1)
print("Top-5 :", metrics_euc5)
print("Top-10:", metrics_euc10)

Cosine Similarity
Top-1 : {'Precision@1': 0.75, 'Recall@1': 0.75, 'mAP@1': 0.75}
Top-5 : {'Precision@5': 0.46, 'Recall@5': 0.9, 'mAP@5': 0.8156}
Top-10: {'Precision@10': 0.265, 'Recall@10': 0.95, 'mAP@10': 0.7583}

Euclidean Distance
Top-1 : {'Precision@1': 0.7, 'Recall@1': 0.7, 'mAP@1': 0.7}
Top-5 : {'Precision@5': 0.45, 'Recall@5': 0.9, 'mAP@5': 0.7636}
Top-10: {'Precision@10': 0.275, 'Recall@10': 1.0, 'mAP@10': 0.7342}
