The bounding box, creates a crop of the image and takes aways the unimportant stuff (branches, leaves and ...) andjust leaves the birds.

The cropped image then goes through the transform pipeline:
- Resize ==> To make the images uniform!
- Totensor ==> To convert the images into a pyTorch Tensor. meanig that you go from [H,W,C] to [C,H,W], C standig for the channel that has been normalized to be between 0 and 1 (critical for the calculations).

In [None]:
#Steps 1 and 2
import pandas as pd
import os
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image

# --- Load all metadata (we'll split it ourselves) ---
# Use the correct path with forward slashes or as a raw string
CUB_ROOT_DIR = r'D:\Images\CUB_200_2011_reduced\CUB_200_2011'
IMAGE_DIR = os.path.join(CUB_ROOT_DIR, 'images')

# Read metadata files
images_df = pd.read_csv(os.path.join(CUB_ROOT_DIR, 'images.txt'), sep=' ', names=['img_id', 'filepath'])
labels_df = pd.read_csv(os.path.join(CUB_ROOT_DIR, 'image_class_labels.txt'), sep=' ', names=['img_id', 'class_id'])
split_df = pd.read_csv(os.path.join(CUB_ROOT_DIR, 'train_test_split.txt'), sep=' ', names=['img_id', 'is_train'])
classes_df = pd.read_csv(os.path.join(CUB_ROOT_DIR, 'classes.txt'), sep=' ', names=['class_id', 'class_name'])

# Merge, 0-index the class_id, and filter for first 10 classes
all_data_df = images_df.merge(labels_df, on='img_id').merge(split_df, on='img_id')
all_data_df['class_id'] = all_data_df['class_id'] - 1
all_data_df = all_data_df[all_data_df['class_id'] < 10]

# --- 1. Show an image for each class ---
print("--- Task 1: Sample Image Per Class ---")
fig, axes = plt.subplots(2, 5, figsize=(20, 8))
axes = axes.flatten()

for i in range(10):
    # Find the first image for this class
    sample_row = all_data_df[all_data_df['class_id'] == i].iloc[0]
    img_path = os.path.join(IMAGE_DIR, sample_row['filepath'])
    class_name = classes_df[classes_df['class_id'] == (i + 1)]['class_name'].values[0].split('.')[-1]
    
    img = Image.open(img_path)
    axes[i].imshow(img)
    axes[i].set_title(f"Class {i+1}: {class_name}")
    axes[i].axis('off')

plt.suptitle("Sample Image for Each of the 10 Classes", fontsize=16)
plt.tight_layout()
plt.show()


# --- 2. Show statistics of groups for training and validation ---
print("\n--- Task 2: Train/Validation Statistics per Class ---")
train_stats = all_data_df[all_data_df['is_train'] == 1]['class_id'].value_counts().sort_index()
test_stats = all_data_df[all_data_df['is_train'] == 0]['class_id'].value_counts().sort_index()

stats_df = pd.DataFrame({'Train': train_stats, 'Test': test_stats})

# --- NEW LINES TO CHANGE THE INDEX ---
stats_df.index = stats_df.index + 1  # Convert 0-9 index to 1-10
print("Statistics (Classes 1-10):")
# --- END NEW LINES ---

print(stats_df)

# Plotting the statistics
stats_df.plot(kind='bar', figsize=(12, 6), rot=0)
plt.title("Number of Images per Class (Train vs. Test)")
# --- THIS LINE IS CHANGED ---
plt.xlabel("Class ID (1-10)")
plt.ylabel("Number of Images")
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()

In [None]:
# --- Load Attribute Data ---

# Get the base directory (one level up from CUB_ROOT_DIR)
# e.g., 'D:\...\CUB_200_2011_reduced'
BASE_DIR = os.path.dirname(CUB_ROOT_DIR)

# 1. Load attribute names from the base directory
try:
    attributes_df = pd.read_csv(
        os.path.join(BASE_DIR, 'attributes.txt'),  # <-- FIXED PATH
        sep=' ', 
        names=['attr_id', 'attr_name']
    )
except FileNotFoundError:
    # Handle the case where the user's CUB_ROOT_DIR is the *actual* root
    attributes_df = pd.read_csv(
        os.path.join(CUB_ROOT_DIR, '..', 'attributes.txt'), # Try relative path
        sep=' ', 
        names=['attr_id', 'attr_name']
    )


# 2. Load which images have which attributes (this path was correct)
img_attr_df = pd.read_csv(
    os.path.join(CUB_ROOT_DIR, 'attributes', 'image_attribute_labels.txt'),
    sep=' ', 
    names=['img_id', 'attr_id', 'is_present', 'certainty_id', 'time']
)

# --- Pick one attribute to analyze, e.g., "in water" ---
# Let's find the attribute for "in water"
try:
    water_attr = attributes_df[attributes_df['attr_name'].str.contains('water')].iloc[0]
    water_attr_id = water_attr['attr_id']
    print(f"Found attribute: {water_attr['attr_name']} (ID: {water_attr_id})\n")

    # Get all image_ids from our 10-class set
    our_img_ids = set(all_data_df['img_id'])

    # Filter the attribute labels for just our images and this attribute
    water_labels = img_attr_df[
        (img_attr_df['img_id'].isin(our_img_ids)) &
        (img_attr_df['attr_id'] == water_attr_id) &
        (img_attr_df['is_present'] == 1) # 1 = True, 0 = False
    ]

    # Merge with our main data to see class distribution
    water_data = all_data_df.merge(water_labels, on='img_id', how='left')
    water_data['is_in_water'] = water_data['is_present'].fillna(0).astype(int)

    # --- 1. Show an image for each group (e.g., in_water vs. not_in_water) ---
    print(f"--- Task 1 (Advanced): Sample Images for Attribute '{water_attr['attr_name']}' ---")
    
    # Check if we have samples for both groups
    if 1 in water_data['is_in_water'].values and 0 in water_data['is_in_water'].values:
        img_in_water = water_data[water_data['is_in_water'] == 1].iloc[0]
        img_not_in_water = water_data[water_data['is_in_water'] == 0].iloc[0]

        fig, axes = plt.subplots(1, 2, figsize=(10, 5))

        img_path_1 = os.path.join(IMAGE_DIR, img_in_water['filepath'])
        axes[0].imshow(Image.open(img_path_1))
        axes[0].set_title("Group: 'is_in_water' = TRUE")
        axes[0].axis('off')

        img_path_2 = os.path.join(IMAGE_DIR, img_not_in_water['filepath'])
        axes[1].imshow(Image.open(img_path_2))
        axes[1].set_title("Group: 'is_in_water' = FALSE")
        axes[1].axis('off')

        plt.show()
    else:
        print("Could not find images for both 'in water' and 'not in water' groups.")


    # --- 2. Show statistics of groups for training and validation ---
    print(f"\n--- Task 2 (Advanced): Statistics for Attribute '{water_attr['attr_name']}' ---")
    train_water_stats = water_data[water_data['is_train'] == 1]['is_in_water'].value_counts().reindex([0, 1], fill_value=0)
    test_water_stats = water_data[water_data['is_train'] == 0]['is_in_water'].value_counts().reindex([0, 1], fill_value=0)
    
    stats_df_attr = pd.DataFrame({'Train': train_water_stats, 'Test': test_water_stats})
    stats_df_attr.index = ['Not in Water', 'In Water']
    print(stats_df_attr)
    
    stats_df_attr.plot(kind='bar', figsize=(8, 5), rot=0)
    plt.title(f"Distribution of Attribute: '{water_attr['attr_name']}'")
    plt.ylabel("Number of Images")
    plt.show()

except IndexError:
    print("Could not find an attribute containing 'water' in your dataset.")
except FileNotFoundError:
    print(f"Error: Could not find 'attributes.txt' at the expected location.")
    print(f"Looked in: {os.path.join(BASE_DIR, 'attributes.txt')}")

In [None]:
!pip install umap-learn

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models

class EmbeddingNet(nn.Module):
    """
    A network that uses a pretrained ResNet18 backbone
    to extract embeddings from images.
    """
    def __init__(self, embedding_dim=128):
        """
        Args:
            embedding_dim (int): The dimension of the output embedding.
        """
        super(EmbeddingNet, self).__init__()
        
        # 1. Load the pretrained ResNet18
        self.backbone = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
        
        # 2. Get the number of features from the layer *before* the classifier
        # In ResNet18, this layer is named 'fc'
        num_features = self.backbone.fc.in_features
        
        # 3. Replace the final classifier layer ('fc') with our new embedding layer
        # We just use a simple linear layer.
        self.backbone.fc = nn.Linear(num_features, embedding_dim)

    def forward(self, x):
        """
        Forward pass of the network.
        Args:
            x (torch.Tensor): Input batch of images [B, C, H, W]
        Returns:
            torch.Tensor: Embeddings [B, embedding_dim]
        """
        # 1. Pass image through the modified ResNet
        embeddings = self.backbone(x)
        
        # 2. Normalize the embeddings (L2 normalization)
        # This is the "Normalization (Optional)" step.
        # It makes all embedding vectors have a length of 1,
        # which is standard practice for triplet loss.
        embeddings = F.normalize(embeddings, p=2, dim=1)
        
        return embeddings

# Also, make sure the 'device' variable is defined
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm # Make sure tqdm is imported

from torchvision import transforms

# --- 1. Define the Evaluation Dataset Class ---
# This class returns (image, label) pairs
class CUBEmbeddingDataset(Dataset):
    """
    A dataset that returns just one image and its label.
    Used for generating embeddings for the entire test set.
    """
    
    def __init__(self, root_dir, split='test', transform=None):
        self.root_dir = root_dir
        self.image_dir = os.path.join(self.root_dir, 'images')
        self.transform = transform
        self.split = split
        
        # Load metadata (same as before)
        images_df = pd.read_csv(os.path.join(self.root_dir, 'images.txt'), sep=' ', names=['img_id', 'filepath'])
        labels_df = pd.read_csv(os.path.join(self.root_dir, 'image_class_labels.txt'), sep=' ', names=['img_id', 'class_id'])
        split_df = pd.read_csv(os.path.join(self.root_dir, 'train_test_split.txt'), sep=' ', names=['img_id', 'is_train'])
        
        data_df = images_df.merge(labels_df, on='img_id').merge(split_df, on='img_id')
        data_df['class_id'] = data_df['class_id'] - 1 # 0-indexed
        
        # Filter for first 10 classes. this should be removed for the real datase
        data_df = data_df[data_df['class_id'] < 10].reset_index(drop=True)
        
        # Filter by split
        target_split = 1 if self.split == 'train' else 0
        self.data_df = data_df[data_df['is_train'] == target_split].reset_index(drop=True)
        
        self.data_list = list(zip(self.data_df['filepath'], self.data_df['class_id']))

    def __len__(self):
        return len(self.data_list)
    
    def _load_image(self, filepath):
        full_path = os.path.join(self.image_dir, filepath)
        img = Image.open(full_path).convert('RGB')
        return img

    def __getitem__(self, index):
        # Get the image and its label
        img_path, label = self.data_list[index]
        img = self._load_image(img_path)
        
        if self.transform:
            img = self.transform(img)
            
        return img, label

# --- 2. Define the 'get_all_embeddings' function ---
def get_all_embeddings(model, loader, device):
    model.eval() # Set model to eval mode
    all_embeddings = []
    all_labels = []
    
    with torch.no_grad():
        for images, labels in tqdm(loader, desc="Getting Embeddings"):
            images = images.to(device)
            embeddings = model(images)
            
            all_embeddings.append(embeddings.cpu())
            all_labels.append(labels.cpu())
            
    # Concatenate all batches
    all_embeddings = torch.cat(all_embeddings, dim=0)
    all_labels = torch.cat(all_labels, dim=0)
    
    return all_embeddings, all_labels

# --- 3. Define the 'eval_loader' and 'data_transform' ---
# (This may also be undefined if you restarted the notebook)
data_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

eval_dataset = CUBEmbeddingDataset(
    root_dir=CUB_ROOT_DIR,
    split='test',
    transform=data_transform
)

eval_loader = DataLoader(
    eval_dataset,
    batch_size=32,
    shuffle=False, # No shuffling!
    num_workers=0
)

In [None]:
#UMAP
from sklearn.manifold import TSNE
from torchvision import transforms
import umap # Needs: pip install umap-learn

# --- 1. Get "baseline" embeddings ---
# We use a FRESH, untrained model to get the baseline embeddings.
# This shows us the feature space before our triplet loss training.
print("--- Task 3: Baseline Visualization (Before Training) ---")
print("Initializing a fresh, pre-trained ResNet18...")

# Make sure you have your EmbeddingNet class defined from the previous steps
fresh_model = EmbeddingNet(embedding_dim=128) 
fresh_model.to(device)
fresh_model.eval() # Set to evaluation mode

# We can re-use the `get_all_embeddings` function and `eval_loader`
# to get embeddings from this new "fresh" model.
print("Generating baseline embeddings for the test set...")
base_embeddings, base_labels = get_all_embeddings(fresh_model, eval_loader, device)

print(f"Got {base_embeddings.shape[0]} baseline embeddings.")


# --- 2. Run t-SNE ---
print("Running t-SNE... (this may take a moment)")
tsne = TSNE(n_components=2, perplexity=30, max_iter=1000, random_state=42)
tsne_embeddings = tsne.fit_transform(base_embeddings)
print("t-SNE finished.")

# --- 3. Run UMAP ---
print("Running UMAP...")
umap_reducer = umap.UMAP(n_components=2, random_state=42)
umap_embeddings = umap_reducer.fit_transform(base_embeddings)
print("UMAP finished.")

# --- 4. Plot both ---
fig, axes = plt.subplots(1, 2, figsize=(20, 10))

# t-SNE Plot
axes[0].set_title('t-SNE of Baseline Embeddings (Before Training)')
for i in range(10): # We have 10 classes
    indices = (base_labels == i)
    axes[0].scatter(
        tsne_embeddings[indices, 0], 
        tsne_embeddings[indices, 1], 
        alpha=0.6,
        label=f'Class {i+1}'
    )
axes[0].set_xlabel('t-SNE Component 1')
axes[0].set_ylabel('t-SNE Component 2')
axes[0].legend()

# UMAP Plot
axes[1].set_title('UMAP of Baseline Embeddings (Before Training)')
for i in range(10): # We have 10 classes
    indices = (base_labels == i)
    axes[1].scatter(
        umap_embeddings[indices, 0], 
        umap_embeddings[indices, 1], 
        alpha=0.6,
        label=f'Class {i+1}'
    )
axes[1].set_xlabel('UMAP Component 1')
axes[1].set_ylabel('UMAP Component 2')
axes[1].legend()

plt.suptitle("Visualization of Raw Image Features (from pre-trained ResNet18)", fontsize=16)
plt.show()

In [None]:
import torch
import pandas as pd
import os
import random
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

class TripletCUBDataset(Dataset):
    """
    Custom Dataset for loading CUB-200 for triplet loss.
    It reads the metadata files to create a list of all images,
    and then organizes them by class to enable efficient triplet sampling.
    """
    
    def __init__(self, root_dir, split='train', transform=None):
        """
        Args:
            root_dir (string): Path to the CUB_200_2011 directory.
            split (string): 'train' or 'test' to load the respective split.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.root_dir = root_dir
        self.image_dir = os.path.join(self.root_dir, 'images')
        self.transform = transform
        self.split = split
        
        # Load metadata
        self._load_metadata()
        
    def _load_metadata(self):
        # Read images.txt: <image_id> <filepath>
        images_df = pd.read_csv(
            os.path.join(self.root_dir, 'images.txt'),
            sep=' ',
            names=['img_id', 'filepath']
        )
        
        # Read image_class_labels.txt: <image_id> <class_id>
        labels_df = pd.read_csv(
            os.path.join(self.root_dir, 'image_class_labels.txt'),
            sep=' ',
            names=['img_id', 'class_id']
        )
        
        # Read train_test_split.txt: <image_id> <is_train>
        split_df = pd.read_csv(
            os.path.join(self.root_dir, 'train_test_split.txt'),
            sep=' ',
            names=['img_id', 'is_train']
        )
        
        # Merge dataframes
        data_df = images_df.merge(labels_df, on='img_id').merge(split_df, on='img_id')
        
        # PyTorch class IDs are typically 0-indexed
        # CUB-200 class IDs are 1-indexed (1-200)
        data_df['class_id'] = data_df['class_id'] - 1
        
        # Filter by split (1 for train, 0 for test)
        target_split = 1 if self.split == 'train' else 0
        self.data_df = data_df[data_df['is_train'] == target_split].reset_index(drop=True)

        # --- This is the key part for triplet sampling ---
        # 1. Create a list of all data points (filepath, class_id)
        self.data_list = list(zip(self.data_df['filepath'], self.data_df['class_id']))
        
        # 2. Create a dictionary mapping class_id -> [list of indices in self.data_list]
        self.class_to_indices = {}
        for idx, (_, class_id) in enumerate(self.data_list):
            if class_id not in self.class_to_indices:
                self.class_to_indices[class_id] = []
            self.class_to_indices[class_id].append(idx)
            
        # 3. Store a list of all unique class IDs
        self.classes = list(self.class_to_indices.keys())

    def __len__(self):
        return len(self.data_list)

    def _load_image(self, filepath):
        """Helper to load an image from its relative path."""
        full_path = os.path.join(self.image_dir, filepath)
        img = Image.open(full_path).convert('RGB')
        return img

    def __getitem__(self, index):
        """
        Generates one triplet (Anchor, Positive, Negative).
        """
        
        # 1. Get the ANCHOR image
        anchor_path, anchor_class = self.data_list[index]
        anchor_img = self._load_image(anchor_path)
        
        # 2. Get a POSITIVE image (same class, different image) 
        positive_indices = self.class_to_indices[anchor_class]
        
        # Ensure we don't pick the same image as the anchor
        positive_index = index
        while positive_index == index and len(positive_indices) > 1:
            positive_index = random.choice(positive_indices)
            
        positive_path, _ = self.data_list[positive_index]
        positive_img = self._load_image(positive_path)
        
        # 3. Get a NEGATIVE image (different class) 
        negative_class = anchor_class
        while negative_class == anchor_class:
            negative_class = random.choice(self.classes)
            
        negative_indices = self.class_to_indices[negative_class]
        negative_index = random.choice(negative_indices)
        
        negative_path, _ = self.data_list[negative_index]
        negative_img = self._load_image(negative_path)

        # 4. Apply transforms
        if self.transform:
            anchor_img = self.transform(anchor_img)
            positive_img = self.transform(positive_img)
            negative_img = self.transform(negative_img)
            
        return anchor_img, positive_img, negative_img

In [None]:
# --- Example Usage ---

# Define transforms
# These are standard transforms for a pretrained model like ResNet
data_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

# Your CUB_200_2011 directory path
CUB_ROOT_DIR = 'D:/images/CUB_200_2011_reduced/CUB_200_2011' 

# 1. Create the Dataset
train_dataset = TripletCUBDataset(
    root_dir=CUB_ROOT_DIR,
    split='train',
    transform=data_transform
)

test_dataset = TripletCUBDataset(
    root_dir=CUB_ROOT_DIR,
    split='test',
    transform=data_transform
)

# 2. Create the DataLoader
# You can tune batch_size and num_workers
train_loader = DataLoader(
    train_dataset,
    batch_size=32,
    shuffle=True,  # Shuffle is important for good training
    num_workers=0
)

test_loader = DataLoader(
    test_dataset,
    batch_size=32,
    shuffle=False, # No need to shuffle test data
    num_workers=0
)

print(f"Train dataset size: {len(train_dataset)}")
print(f"Test dataset size: {len(test_dataset)}")

# 3. Test the loader
print("\nTesting the train_loader...")
try:
    # Get one batch
    anchor_batch, positive_batch, negative_batch = next(iter(train_loader))
    
    print(f"Anchor batch shape: {anchor_batch.shape}")
    print(f"Positive batch shape: {positive_batch.shape}")
    print(f"Negative batch shape: {negative_batch.shape}")

except Exception as e:
    print(f"Error loading data: {e}")
    print("Please check your CUB_ROOT_DIR path and dataset integrity.")

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models

class EmbeddingNet(nn.Module):
    """
    A network that uses a pretrained ResNet18 backbone
    to extract embeddings from images.
    """
    def __init__(self, embedding_dim=128):
        """
        Args:
            embedding_dim (int): The dimension of the output embedding.
        """
        super(EmbeddingNet, self).__init__()
        
        # 1. Load the pretrained ResNet18
        self.backbone = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
        
        # 2. Get the number of features from the layer *before* the classifier
        # In ResNet18, this layer is named 'fc'
        num_features = self.backbone.fc.in_features
        
        # 3. Replace the final classifier layer ('fc') with our new embedding layer
        # We just use a simple linear layer.
        self.backbone.fc = nn.Linear(num_features, embedding_dim)

    def forward(self, x):
        """
        Forward pass of the network.
        Args:
            x (torch.Tensor): Input batch of images [B, C, H, W]
        Returns:
            torch.Tensor: Embeddings [B, embedding_dim]
        """
        # 1. Pass image through the modified ResNet
        embeddings = self.backbone(x)
        
        # 2. Normalize the embeddings (L2 normalization)
        # This is the "Normalization (Optional)" step.
        # It makes all embedding vectors have a length of 1,
        # which is standard practice for triplet loss.
        embeddings = F.normalize(embeddings, p=2, dim=1)
        
        return embeddings

# --- Example Usage ---

# 1. Initialize the model
embedding_dim = 128
model = EmbeddingNet(embedding_dim)

# 2. Check if a GPU is available and move the model to it
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

print(f"Using device: {device}")
print(model)

# 3. Test the model with one batch from your loader
try:
    anchor_batch, positive_batch, negative_batch = next(iter(train_loader))
    
    # Move batch to the same device as the model
    anchor_batch = anchor_batch.to(device)
    
    # Perform a forward pass
    model.eval() # Set model to evaluation mode for testing
    with torch.no_grad(): # Don't calculate gradients
        embeddings = model(anchor_batch)
        
    print(f"\nSuccessfully passed one batch through the model.")
    print(f"Input batch shape: {anchor_batch.shape}")
    print(f"Output embedding shape: {embeddings.shape}")
    
except Exception as e:
    print(f"\nError testing model with data batch: {e}")

In [None]:
import torch.optim as optim
import time
from tqdm import tqdm # A nice library for progress bars

# --- 1. Setup Optimizer and Loss Function ---

# Your project specifies a margin of 1.0 [cite: 21, 60]
margin = 1.0
loss_fn = nn.TripletMarginLoss(margin=margin)

# The project suggests experimenting. Adam is a great start.
learning_rate = 0.001
optimizer = optim.Adam(model.parameters(), lr=learning_rate)


# --- 2. Training Function ---

def train_one_epoch(model, train_loader, optimizer, loss_fn, device):
    """
    Handles the training logic for one epoch.
    """
    model.train()  # Set model to training mode
    running_loss = 0.0
    
    # Use tqdm for a progress bar
    for (anchor_img, positive_img, negative_img) in tqdm(train_loader, desc="Training"):
        # Move data to the device
        anchor_img = anchor_img.to(device)
        positive_img = positive_img.to(device)
        negative_img = negative_img.to(device)
        
        # Zero the gradients
        optimizer.zero_grad()
        
        # Get embeddings
        anchor_emb = model(anchor_img)
        positive_emb = model(positive_img)
        negative_emb = model(negative_img)
        
        # Calculate loss
        loss = loss_fn(anchor_emb, positive_emb, negative_emb)
        
        # Backpropagation
        loss.backward()
        
        # Update weights
        optimizer.step()
        
        running_loss += loss.item()
        
    return running_loss / len(train_loader)

# --- 3. Validation Function ---

def validate_one_epoch(model, test_loader, loss_fn, device):
    """
    Handles the validation logic for one epoch.
    """
    model.eval()  # Set model to evaluation mode
    running_loss = 0.0
    
    with torch.no_grad(): # No gradients needed for validation
        for (anchor_img, positive_img, negative_img) in tqdm(test_loader, desc="Validating"):
            # Move data to the device
            anchor_img = anchor_img.to(device)
            positive_img = positive_img.to(device)
            negative_img = negative_img.to(device)
            
            # Get embeddings
            anchor_emb = model(anchor_img)
            positive_emb = model(positive_img)
            negative_emb = model(negative_img)
            
            # Calculate loss
            loss = loss_fn(anchor_emb, positive_emb, negative_emb)
            
            running_loss += loss.item()
            
    return running_loss / len(test_loader)

# --- 4. Main Training Loop ---

num_epochs = 10 # You can start with 10-20 and see how it goes
train_losses = []
val_losses = []

print("Starting training...")

for epoch in range(num_epochs):
    start_time = time.time()
    
    # Train
    train_loss = train_one_epoch(model, train_loader, optimizer, loss_fn, device)
    train_losses.append(train_loss)
    
    # Validate
    val_loss = validate_one_epoch(model, test_loader, loss_fn, device)
    val_losses.append(val_loss)
    
    end_time = time.time()
    
    print(f"Epoch {epoch+1}/{num_epochs} - "
          f"Time: {end_time - start_time:.2f}s - "
          f"Train Loss: {train_loss:.4f} - "
          f"Val Loss: {val_loss:.4f}")

print("Training finished.")

# You can now use `train_losses` and `val_losses` to plot your training curves
# as required for the report.

In [None]:
import matplotlib.pyplot as plt

# The 'train_losses' and 'val_losses' variables are from your training loop
epochs_range = range(1, num_epochs + 1)

plt.figure(figsize=(10, 5))
plt.plot(epochs_range, train_losses, 'b-o', label='Training Loss')
plt.plot(epochs_range, val_losses, 'r-o', label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
class CUBEmbeddingDataset(Dataset):
    """
    A dataset that returns just one image and its label.
    Used for generating embeddings for the entire test set.
    """
    
    def __init__(self, root_dir, split='test', transform=None):
        self.root_dir = root_dir
        self.image_dir = os.path.join(self.root_dir, 'images')
        self.transform = transform
        self.split = split
        
        # Load metadata (same as before)
        images_df = pd.read_csv(os.path.join(self.root_dir, 'images.txt'), sep=' ', names=['img_id', 'filepath'])
        labels_df = pd.read_csv(os.path.join(self.root_dir, 'image_class_labels.txt'), sep=' ', names=['img_id', 'class_id'])
        split_df = pd.read_csv(os.path.join(self.root_dir, 'train_test_split.txt'), sep=' ', names=['img_id', 'is_train'])
        
        data_df = images_df.merge(labels_df, on='img_id').merge(split_df, on='img_id')
        data_df['class_id'] = data_df['class_id'] - 1 # 0-indexed
        
        # Filter for first 10 classes
        data_df = data_df[data_df['class_id'] < 10].reset_index(drop=True)
        
        # Filter by split
        target_split = 1 if self.split == 'train' else 0
        self.data_df = data_df[data_df['is_train'] == target_split].reset_index(drop=True)
        
        self.data_list = list(zip(self.data_df['filepath'], self.data_df['class_id']))

    def __len__(self):
        return len(self.data_list)
    
    def _load_image(self, filepath):
        full_path = os.path.join(self.image_dir, filepath)
        img = Image.open(full_path).convert('RGB')
        return img

    def __getitem__(self, index):
        # Get the image and its label
        img_path, label = self.data_list[index]
        img = self._load_image(img_path)
        
        if self.transform:
            img = self.transform(img)
            
        return img, label

In [None]:
# Use the same transforms as before
eval_dataset = CUBEmbeddingDataset(
    root_dir=CUB_ROOT_DIR,
    split='test',
    transform=data_transform
)

eval_loader = DataLoader(
    eval_dataset,
    batch_size=32,
    shuffle=False, # No shuffling!
    num_workers=0
)

# --- Function to get all embeddings ---
def get_all_embeddings(model, loader, device):
    model.eval() # Set model to eval mode
    all_embeddings = []
    all_labels = []
    
    with torch.no_grad():
        for images, labels in tqdm(loader, desc="Getting Test Embeddings"):
            images = images.to(device)
            embeddings = model(images)
            
            all_embeddings.append(embeddings.cpu())
            all_labels.append(labels.cpu())
            
    # Concatenate all batches
    all_embeddings = torch.cat(all_embeddings, dim=0)
    all_labels = torch.cat(all_labels, dim=0)
    
    return all_embeddings, all_labels

# Get the embeddings and labels for your test set
test_embeddings, test_labels = get_all_embeddings(model, eval_loader, device)

print(f"\nShape of test embeddings: {test_embeddings.shape}")
print(f"Shape of test labels: {test_labels.shape}")

In [None]:
from sklearn.manifold import TSNE

# 1. Create the t-SNE model
# n_components=2 means we are projecting down to 2 dimensions
tsne = TSNE(n_components=2, perplexity=30, max_iter=1000, random_state=42)

# 2. Fit and transform your embeddings
# This can take a minute
print("Running t-SNE... (this may take a moment)")
tsne_embeddings = tsne.fit_transform(test_embeddings)

print("t-SNE finished.")

# 3. Plot the results
plt.figure(figsize=(12, 10))
# Plot each class (0-9) with a different color
for i in range(10): # We have 10 classes
    indices = (test_labels == i)
    plt.scatter(
        tsne_embeddings[indices, 0], 
        tsne_embeddings[indices, 1], 
        alpha=0.6,
        label=f'Class {i+1}'
    )

plt.title('t-SNE Visualization of Test Set Embeddings')
plt.xlabel('t-SNE Component 1')
plt.ylabel('t-SNE Component 2')
plt.legend()
plt.show()

In [None]:
import torch

def calculate_precision_at_k(embeddings, labels, k=5):
    """
    Calculates Precision@k for a given set of embeddings and labels.
    
    Args:
        embeddings (torch.Tensor): The NxD tensor of embeddings.
        labels (torch.Tensor): The N-dimensional tensor of labels.
        k (int): The number of top-k neighbors to consider.
    """
    
    # 1. Calculate the pairwise distance matrix
    # We use torch.cdist for an efficient L2 (Euclidean) distance calculation
    # The result is an NxN matrix where dist_matrix[i, j] is the distance
    # between embedding i and embedding j.
    dist_matrix = torch.cdist(embeddings, embeddings, p=2)
    
    # 2. Get the indices of the top-k + 1 closest neighbors
    # We ask for k + 1 because the 1st closest neighbor (at index 0)
    # will always be the image itself (distance = 0), which we must ignore.
    # `largest=False` means we want the *smallest* distances.
    _, top_k_indices = torch.topk(dist_matrix, k + 1, dim=1, largest=False)
    
    # 3. Ignore the first column (the self-match)
    # We are left with an Nxk tensor of neighbor indices.
    top_k_neighbors = top_k_indices[:, 1:]
    
    # 4. Get the labels of these top-k neighbors
    # We use the 'labels' tensor to look up the class of each neighbor.
    # `retrieved_labels` will be an Nxk tensor.
    retrieved_labels = torch.gather(labels.unsqueeze(0).expand(top_k_neighbors.shape[0], -1), 1, top_k_neighbors)

    # 5. Compare the retrieved labels to the query labels
    # We reshape the query labels to (N, 1) for broadcasting.
    # `matches` will be an Nxk boolean tensor.
    query_labels = labels.unsqueeze(1)
    matches = (retrieved_labels == query_labels)
    
    # 6. Calculate precision for each query
    # We sum the matches (True=1, False=0) for each query and divide by k.
    precision_per_query = matches.sum(dim=1).float() / k
    
    # 7. Calculate the average precision across all queries
    average_precision = precision_per_query.mean().item()
    
    return average_precision

# --- Run the calculation ---
# You can try a few different values for k

# Using k=1 (Precision@1)
# This checks if the *single closest* image has the same label.
p_at_1 = calculate_precision_at_k(test_embeddings, test_labels, k=1)
print(f"Precision@1: {p_at_1 * 100:.2f}%")

# Using k=5 (Precision@5)
p_at_5 = calculate_precision_at_k(test_embeddings, test_labels, k=5)
print(f"Precision@5: {p_at_5 * 100:.2f}%")

# Using k=10 (Precision@10)
p_at_10 = calculate_precision_at_k(test_embeddings, test_labels, k=10)
print(f"Precision@10: {p_at_10 * 100:.2f}%")

In [None]:
import umap # Make sure this is imported

# 1. Create the UMAP model
# We use the same parameters as the baseline for a fair comparison
print("Running UMAP on trained embeddings...")
umap_reducer = umap.UMAP(n_components=2, random_state=42)

# 2. Fit and transform your *trained* test embeddings
# 'test_embeddings' is the variable holding your trained embeddings
umap_embeddings_trained = umap_reducer.fit_transform(test_embeddings)

print("UMAP finished.")

# 3. Plot the results
plt.figure(figsize=(12, 10))
# Plot each class (1-10) with a different color
for i in range(10):
    indices = (test_labels == i)
    plt.scatter(
        umap_embeddings_trained[indices, 0], 
        umap_embeddings_trained[indices, 1], 
        alpha=0.6,
        label=f'Class {i + 1}' 
    )

plt.title('UMAP Visualization of Test Set Embeddings (After Training)')
plt.xlabel('UMAP Component 1')
plt.ylabel('UMAP Component 2')
plt.legend()
plt.show()