In [134]:
# imports
import os
import numpy as np
import pandas as pd
from PIL import Image
from tqdm import tqdm

import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torchvision.transforms as T
import torchvision.models as models
from torchvision import transforms


#Set GPU as device to use.
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [135]:
df_train = pd.read_csv("../aml-2025-feathers-in-focus/train_images.csv")
df_train.head()

complete_bird_attributes = pd.read_csv("complete_bird_attributes.csv", index_col='class_key')

In [136]:
num_concepts = complete_bird_attributes.shape[1]

# test
example_label = df_train["label"].iloc[0]
example_concepts = complete_bird_attributes.loc[example_label]
print(example_concepts)


has_bill_shape::curved_(up_or_down)    0
has_bill_shape::dagger                 0
has_bill_shape::hooked                 0
has_bill_shape::needle                 0
has_bill_shape::hooked_seabird         1
                                      ..
has_crown_color::buff                  0
has_wing_pattern::solid                1
has_wing_pattern::spotted              0
has_wing_pattern::striped              0
has_wing_pattern::multi-colored        0
Name: 1, Length: 312, dtype: int64


In [137]:
class BirdConceptDataset(Dataset):
    def __init__(self, csv_df, attributes_df, images_root):
        self.df = csv_df
        self.attributes = attributes_df
        self.images_root = images_root
        
        self.transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
        ])

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        # Load label
        label = self.df.iloc[idx]["label"]
        
        # Load concept vector
        concept_vec = torch.tensor(self.attributes.loc[label].values, dtype=torch.float32)
        
        # Build full image path
        img_rel_path = self.df.iloc[idx]["image_path"]
        img_path = os.path.join(self.images_root, os.path.basename(img_rel_path))

        # Load image using your method
        image = Image.open(img_path).convert("RGB")
        image = self.transform(image)

        return image, concept_vec


In [None]:
train_images_dir = "../aml-2025-feathers-in-focus/train_images/cropped_train_images/"
train_images = [
    os.path.join(train_images_dir, f)
    for f in os.listdir(train_images_dir)
    if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp'))
]


In [139]:
train_dataset = BirdConceptDataset(
    csv_df=df_train,
    attributes_df=complete_bird_attributes,
    images_root=train_images_dir
)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)


In [140]:
import torch
import torch.nn as nn

class BirdCNN(nn.Module):
    def __init__(self):
        super().__init__()

        # -------- BLOCK 1 --------
        # Input: 3 x 224 x 224
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1, bias=False)
        self.bn1   = nn.BatchNorm2d(32)
        self.act1  = nn.LeakyReLU(0.1, inplace=True)
        self.pool1 = nn.MaxPool2d(2)  # -> 32 x 112 x 112

        # -------- BLOCK 2 --------
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1, bias=False)
        self.bn2   = nn.BatchNorm2d(64)
        self.act2  = nn.LeakyReLU(0.1, inplace=True)
        self.pool2 = nn.MaxPool2d(2)  # -> 64 x 56 x 56

        # -------- BLOCK 3 --------
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1, bias=False)
        self.bn3   = nn.BatchNorm2d(128)
        self.act3  = nn.LeakyReLU(0.1, inplace=True)
        self.pool3 = nn.MaxPool2d(2)  # -> 128 x 28 x 28

        # -------- BLOCK 4 --------
        self.conv4 = nn.Conv2d(128, 256, kernel_size=3, padding=1, bias=False)
        self.bn4   = nn.BatchNorm2d(256)
        self.act4  = nn.LeakyReLU(0.1, inplace=True)
        self.pool4 = nn.MaxPool2d(2)  # -> 256 x 14 x 14

        # Global pool -> 256 features
        self.gap = nn.AdaptiveAvgPool2d((1, 1))

        self.fc1 = nn.Linear(256, 512)
        self.act_fc1 = nn.LeakyReLU(0.1, inplace=True)
        self.dropout = nn.Dropout(0.25)
        self.fc2 = nn.Linear(512, 256)

        self.apply(self._init_weights)

    def _init_weights(self, m):
        if isinstance(m, nn.Linear):
            nn.init.xavier_uniform_(m.weight)
            nn.init.zeros_(m.bias)


    def forward(self, x):
        x = self.pool1(self.act1(self.bn1(self.conv1(x))))
        x = self.pool2(self.act2(self.bn2(self.conv2(x))))
        x = self.pool3(self.act3(self.bn3(self.conv3(x))))
        x = self.pool4(self.act4(self.bn4(self.conv4(x))))

        x = self.gap(x)
        x = torch.flatten(x, 1)  # -> (B, 256)

        #head
        x= self.act_fc1(self.fc1(x))
        x = self.dropout(x)
        x= self.fc2(x)

        return x

In [141]:
class ConceptNet(nn.Module):
    def __init__(self, num_outputs=312):
        super().__init__()

        self.backbone = BirdCNN()

        self.fc1 = nn.Linear(256, 512)
        self.bn_fc1 = nn.BatchNorm1d(512)
        self.act_fc1 = nn.LeakyReLU(0.1, inplace=True)
        self.drop1 = nn.Dropout(0.25)

        self.fc2 = nn.Linear(512, num_outputs)   # logits
        # No sigmoid here → use BCEWithLogitsLoss externally

    def forward(self, x):
        x = self.backbone(x)
        x = self.drop1(self.act_fc1(self.bn_fc1(self.fc1(x))))
        x = self.fc2(x)
        return x

In [145]:
model = ConceptNet(num_outputs=num_concepts)
model = model.to(device)
criterion = nn.BCEWithLogitsLoss()    # output is already sigmoid
criterion = criterion.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=5e-5)

In [144]:
num_epochs = 10

model.train()
for epoch in range(num_epochs):
    total_loss = 0

    for images, targets in train_loader:
        optimizer.zero_grad()

        images = images.to(device)
        targets = targets.to(device)
        preds = model(images)         # predictions (batch_size × num_concepts)
        loss = criterion(preds, targets)

        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss:.4f}")

    torch.save(model.state_dict(), "basic_best_model.pth")


Epoch 1/10, Loss: 17367.8976


KeyboardInterrupt: 

In [None]:
torch.save(model.state_dict(), "best_model.pth")

In [None]:
test_images_dir = "../aml-2025-feathers-in-focus/test_images/cropped_test_images/"

test_images = [
    os.path.join(test_images_dir, f)
    for f in os.listdir(test_images_dir)
    if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp'))
]

print(f"Found {len(test_images)} test images")


img_size = 224
test_transform = transforms.Compose([
    transforms.Resize((img_size, img_size)),
    transforms.ToTensor()
])


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# make sure model structure matches training
model = ConceptNet(num_outputs=complete_bird_attributes.shape[1])
model.load_state_dict(torch.load("best_model.pth", map_location=device))
model = model.to(device)
model.eval()


num_concepts = len(complete_bird_attributes.columns)
topk = 2  # number of concepts to keep per image

pred_list = []

with torch.no_grad():
    for img_path in tqdm(test_images, desc="Predicting test images with Top-2"):
        # Load image
        img = Image.open(img_path).convert("RGB")
        x = test_transform(img).unsqueeze(0).to(device)
        
        # Forward pass
        logits = model(x)
        probs = torch.sigmoid(logits).cpu().numpy().squeeze()  # shape (num_concepts,)
        
        # Initialize all zeros
        topk_vec = np.zeros_like(probs)
        
        # Find top-2 indices
        topk_idx = probs.argsort()[-topk:][::-1]
        topk_vec[topk_idx] = 1
        
        pred_list.append(topk_vec)


pred_array = np.stack(pred_list, axis=0)  # shape: (4000, 312)

# Use column names from complete_bird_attributes
pred_df = pd.DataFrame(pred_array, columns=complete_bird_attributes.columns)
pred_df.insert(0, "image_path", [os.path.basename(f) for f in test_images])

print(pred_df.shape)  # should be (4000, 312)

pred_df.head()


pred_df.to_csv("test_predictions.csv", index=False)


In [None]:
#KNN with K=1 on the predicted concept vectors to find the nearest training image
from sklearn.neighbors import NearestNeighbors
import pandas as pd
import numpy as np

# Drop image_path column for NN, only keep concept vectors
test_features = pred_df.drop(columns=["image_path"]).values  # shape (4000, 312)

# Make sure training attribute matrix is aligned (columns in same order)
train_features = complete_bird_attributes.values  # shape (num_birds, 312)
train_index = complete_bird_attributes.index.values  # class_key values

# Fit Nearest Neighbors model
k = 1
nn_model = NearestNeighbors(n_neighbors=k, metric='euclidean')  # can use 'cosine' or 'euclidean'
nn_model.fit(train_features)


distances, indices = nn_model.kneighbors(test_features)  # indices shape (4000, k)

nearest_class_keys = train_index[indices[:, 0]]  # shape (4000,)
output_df = pd.DataFrame({
    "image_path": pred_df["image_path"].values,
    "class_key": nearest_class_keys
})

output_df.head()

output_df.to_csv("test_nearest_neighbors.csv", index=False)



In [None]:
complete_bird_attributes.index.values