In [1]:
# imports
import os
import numpy as np
import pandas as pd
from PIL import Image
from tqdm import tqdm

import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as T
import torchvision.models as models
from torchvision import transforms


In [2]:
df_train = pd.read_csv("../aml-2025-feathers-in-focus/train_images.csv")
df_train.head()

complete_bird_attributes = pd.read_csv("complete_bird_attributes.csv", index_col='class_key')

In [3]:
num_concepts = complete_bird_attributes.shape[1]

# test
example_label = df_train["label"].iloc[0]
example_concepts = complete_bird_attributes.loc[example_label]
print(example_concepts)


has_bill_shape::curved_(up_or_down)    0
has_bill_shape::dagger                 0
has_bill_shape::hooked                 0
has_bill_shape::needle                 0
has_bill_shape::hooked_seabird         1
                                      ..
has_crown_color::buff                  0
has_wing_pattern::solid                1
has_wing_pattern::spotted              0
has_wing_pattern::striped              0
has_wing_pattern::multi-colored        0
Name: 1, Length: 312, dtype: int64


In [4]:
class BirdConceptDataset(Dataset):
    def __init__(self, csv_df, attributes_df, images_root):
        self.df = csv_df
        self.attributes = attributes_df
        self.images_root = images_root
        
        self.transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor()
        ])

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        # Load label
        label = self.df.iloc[idx]["label"]
        
        # Load concept vector
        concept_vec = torch.tensor(self.attributes.loc[label].values, dtype=torch.float32)
        
        # Build full image path
        img_rel_path = self.df.iloc[idx]["image_path"]
        img_path = os.path.join(self.images_root, os.path.basename(img_rel_path))

        # Load image using your method
        image = Image.open(img_path).convert("RGB")
        image = self.transform(image)

        return image, concept_vec


In [5]:
train_images_dir = "../aml-2025-feathers-in-focus/train_images/train_images/"
train_images = [
    os.path.join(train_images_dir, f)
    for f in os.listdir(train_images_dir)
    if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp'))
]


In [6]:
train_dataset = BirdConceptDataset(
    csv_df=df_train,
    attributes_df=complete_bird_attributes,
    images_root=train_images_dir
)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)


In [7]:
class ConceptNet(nn.Module):
    def __init__(self, num_outputs):
        super().__init__()
        
        self.backbone = models.resnet18(pretrained=True)
        self.backbone.fc = nn.Identity()   # remove classification head
        
        self.fc = nn.Sequential(
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, num_outputs),
            nn.Sigmoid()   # multilabel output
        )
        
    def forward(self, x):
        x = self.backbone(x)
        x = self.fc(x)
        return x


In [8]:
model = ConceptNet(num_outputs=num_concepts)

criterion = nn.BCELoss()    # output is already sigmoid
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)




In [9]:
num_epochs = 10

model.train()
for epoch in range(num_epochs):
    total_loss = 0

    for images, targets in train_loader:
        optimizer.zero_grad()

        preds = model(images)         # predictions (batch_size × num_concepts)
        loss = criterion(preds, targets)

        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss:.4f}")

    torch.save(model.state_dict(), "best_model.pth")


Epoch 1/10, Loss: 39.8795
Epoch 2/10, Loss: 23.6783
Epoch 3/10, Loss: 21.1442
Epoch 4/10, Loss: 17.6651


KeyboardInterrupt: 

In [55]:
torch.save(model.state_dict(), "best_model.pth")

In [10]:
test_images_dir = "../aml-2025-feathers-in-focus/test_images/test_images/"

test_images = [
    os.path.join(test_images_dir, f)
    for f in os.listdir(test_images_dir)
    if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp'))
]

print(f"Found {len(test_images)} test images")


img_size = 224
test_transform = transforms.Compose([
    transforms.Resize((img_size, img_size)),
    transforms.ToTensor()
])


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# make sure model structure matches training
model = ConceptNet(num_outputs=complete_bird_attributes.shape[1])
model.load_state_dict(torch.load("best_model.pth", map_location=device))
model = model.to(device)
model.eval()


num_concepts = len(complete_bird_attributes.columns)
topk = 2  # number of concepts to keep per image

pred_list = []

with torch.no_grad():
    for img_path in tqdm(test_images, desc="Predicting test images with Top-2"):
        # Load image
        img = Image.open(img_path).convert("RGB")
        x = test_transform(img).unsqueeze(0).to(device)
        
        # Forward pass
        logits = model(x)
        probs = torch.sigmoid(logits).cpu().numpy().squeeze()  # shape (num_concepts,)
        
        # Initialize all zeros
        topk_vec = np.zeros_like(probs)
        
        # Find top-2 indices
        topk_idx = probs.argsort()[-topk:][::-1]
        topk_vec[topk_idx] = 1
        
        pred_list.append(topk_vec)


pred_array = np.stack(pred_list, axis=0)  # shape: (4000, 312)

# Use column names from complete_bird_attributes
pred_df = pd.DataFrame(pred_array, columns=complete_bird_attributes.columns)
pred_df.insert(0, "image_path", [os.path.basename(f) for f in test_images])

print(pred_df.shape)  # should be (4000, 312)

pred_df.head()


pred_df.to_csv("test_predictions.csv", index=False)


Found 4000 test images


Predicting test images with Top-2: 100%|██████████| 4000/4000 [04:04<00:00, 16.34it/s]


(4000, 313)


In [17]:
#KNN with K=1 on the predicted concept vectors to find the nearest training image
from sklearn.neighbors import NearestNeighbors
import pandas as pd
import numpy as np

# Drop image_path column for NN, only keep concept vectors
test_features = pred_df.drop(columns=["image_path"]).values  # shape (4000, 312)

# Make sure training attribute matrix is aligned (columns in same order)
train_features = complete_bird_attributes.values  # shape (num_birds, 312)
train_index = complete_bird_attributes.index.values  # class_key values

# Fit Nearest Neighbors model
k = 3
nn_model = NearestNeighbors(n_neighbors=k, metric='euclidean')  # can also use 'cosine'
nn_model.fit(train_features)


distances, indices = nn_model.kneighbors(test_features)  # indices shape (4000, k)

nearest_class_keys = train_index[indices[:, 0]]  # shape (4000,)
output_df = pd.DataFrame({
    "image_path": pred_df["image_path"].values,
    "class_key": nearest_class_keys
})

output_df.head()

output_df.to_csv("test_nearest_neighbors.csv", index=False)



In [14]:
complete_bird_attributes.index.values

array([  1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,
        14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,
        27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,
        40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,
        53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,  65,
        66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,
        79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,
        92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103, 104,
       105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117,
       118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130,
       131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
       144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156,
       157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169,
       170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 18