In [1]:
# imports
import os
import numpy as np
import pandas as pd
from PIL import Image
from tqdm import tqdm

import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as T
import torchvision.models as models
from torchvision import transforms


#Set GPU as device to use.
if torch.cuda.is_available():
    device = torch.device("cuda")  # NVIDIA GPUs
elif torch.backends.mps.is_available():
    device = torch.device("mps")   # Apple Silicon GPUs (M1/M2/M3)
else:
    device = torch.device("cpu")   # Fallback

In [None]:
### Data Loading ###

df_train = pd.read_csv("../aml-2025-feathers-in-focus/train_images.csv")
df_train.head()

complete_bird_attributes = pd.read_csv("complete_bird_attributes.csv", index_col='class_key')

In [None]:
### Checking data ###

num_concepts = complete_bird_attributes.shape[1]

# test
example_label = df_train["label"].iloc[0]
example_concepts = complete_bird_attributes.loc[example_label]
print(example_concepts)


0      0.000000
1      0.045745
2      0.038121
3      0.000000
4      0.815794
         ...   
307    0.186233
308    0.817718
309    0.010759
310    0.086076
311    0.236708
Name: 1, Length: 312, dtype: float64


In [None]:
### Bird attribute dataset dataloader ###

class BirdConceptDataset(Dataset):
    def __init__(self, csv_df, attributes_df, images_root):
        self.df = csv_df
        self.attributes = attributes_df
        self.images_root = images_root
        
        self.transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor()
        ])

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        # Load label (1-indexed from CSV)
        label = self.df.iloc[idx]["label"]
        
        # Convert to 0-indexed for DataFrame lookup
        label_idx = label - 1
        
        # Load concept vector
        concept_vec = torch.tensor(self.attributes.loc[label_idx].values, dtype=torch.float32)
        
        # Build full image path
        img_rel_path = self.df.iloc[idx]["image_path"]
        img_path = os.path.join(self.images_root, os.path.basename(img_rel_path))

        # Load image using your method
        image = Image.open(img_path).convert("RGB")
        image = self.transform(image)

        return image, concept_vec

In [None]:
### Training image collection

train_images_dir = "../aml-2025-feathers-in-focus/train_images/train_images/"
train_images = [
    os.path.join(train_images_dir, f)
    for f in os.listdir(train_images_dir)
    if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp'))
]


In [None]:
### Training data setup ###

train_dataset = BirdConceptDataset(
    csv_df=df_train,
    attributes_df=complete_bird_attributes,
    images_root=train_images_dir
)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

In [None]:
### ConceptNet model architecture : proven working model, computationally efficient

class ConceptNet(nn.Module):
    def __init__(self, num_outputs):
        super().__init__()
        
        self.backbone = models.resnet18(pretrained=False)
        self.backbone.fc = nn.Identity()   # remove classification head
        
        self.fc = nn.Sequential(
            nn.Linear(512, num_outputs),
            nn.Dropout(0.4)  
        )
        
    def forward(self, x):
        x = self.backbone(x)
        x = self.fc(x)
        return x


In [None]:
### Training configuration ###

model = ConceptNet(num_outputs=num_concepts)
model = model.to(device)

# Choose one - comment one out:
# criterion = nn.MSE()    #for binary targets # output is already sigmoid
criterion = nn.BCEWithLogitsLoss()  # Better for [0,1] targets

criterion = criterion.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)



In [None]:
### Model training loop ###

num_epochs = 100

model.train()
for epoch in range(num_epochs):
    total_loss = 0
    total_accurate = 0
    total_samples = 0

    for images, targets in train_loader:
        optimizer.zero_grad()

        images = images.to(device)
        targets = targets.to(device)
        preds = model(images)         # predictions (batch_size × num_concepts)
        loss = criterion(preds, targets)

        loss.backward()
        optimizer.step()

        total_loss += loss.item()

        # Calculate accuracy as 1 - mean absolute error (closer to 1 = better predictions)
        probs = torch.sigmoid(preds)  # Convert logits to probabilities [0,1]
        mae = torch.abs(probs - targets).mean()  # Mean absolute error across all concepts
        accuracy_batch = 1.0 - mae.item()  # Convert error to accuracy (higher = better)
        total_accurate += accuracy_batch * images.size(0)  # Weight by batch size
        total_samples += images.size(0)

    avg_loss = total_loss / len(train_loader)
    avg_accuracy = total_accurate / total_samples

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.4f}, Accuracy: {avg_accuracy:.4f}")

    
    model_save_name = "model1.pth"
    torch.save(model.state_dict(), model_save_name)

Epoch 1/100, Loss: 0.4804, Accuracy: 0.7483
Epoch 2/100, Loss: 0.4469, Accuracy: 0.7768
Epoch 3/100, Loss: 0.4460, Accuracy: 0.7775
Epoch 4/100, Loss: 0.4450, Accuracy: 0.7782
Epoch 5/100, Loss: 0.4434, Accuracy: 0.7792
Epoch 6/100, Loss: 0.4382, Accuracy: 0.7829
Epoch 7/100, Loss: 0.4311, Accuracy: 0.7878
Epoch 8/100, Loss: 0.4261, Accuracy: 0.7918
Epoch 9/100, Loss: 0.4222, Accuracy: 0.7948
Epoch 10/100, Loss: 0.4192, Accuracy: 0.7972
Epoch 11/100, Loss: 0.4160, Accuracy: 0.7999
Epoch 12/100, Loss: 0.4139, Accuracy: 0.8019
Epoch 13/100, Loss: 0.4118, Accuracy: 0.8037
Epoch 14/100, Loss: 0.4103, Accuracy: 0.8052
Epoch 15/100, Loss: 0.4087, Accuracy: 0.8067
Epoch 16/100, Loss: 0.4077, Accuracy: 0.8078
Epoch 17/100, Loss: 0.4066, Accuracy: 0.8090
Epoch 18/100, Loss: 0.4059, Accuracy: 0.8097
Epoch 19/100, Loss: 0.4056, Accuracy: 0.8101
Epoch 20/100, Loss: 0.4047, Accuracy: 0.8111
Epoch 21/100, Loss: 0.4042, Accuracy: 0.8116
Epoch 22/100, Loss: 0.4033, Accuracy: 0.8126
Epoch 23/100, Loss:

In [None]:
### Model inference on test images ###

model_load_name = "model_0_39.pth"
torch.load(model_load_name)

test_images_dir = "../aml-2025-feathers-in-focus/test_images/test_images/"

# Sort images to match test_images_path.csv order (reverse numerical: 999.jpg, 998.jpg, ....)
test_images = [
    os.path.join(test_images_dir, f)
    for f in sorted(os.listdir(test_images_dir), 
                   key=lambda x: int(x.split('.')[0]), reverse=True)  # Sort by number, reverse order
    if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp'))
]

print(f"Found {len(test_images)} test images")

img_size = 224
test_transform = transforms.Compose([
    transforms.Resize((img_size, img_size)),
    transforms.ToTensor()
])


# make sure model structure matches training
model = ConceptNet(num_outputs=complete_bird_attributes.shape[1])
model.load_state_dict(torch.load(model_load_name, map_location=device))
model = model.to(device)
model.eval()

pred_list = []

with torch.no_grad():
    for i, img_path in enumerate(tqdm(test_images, desc="Predicting test images")):
        img = Image.open(img_path).convert("RGB")
        x = test_transform(img).unsqueeze(0).to(device)

        logits = model(x)
        probs = torch.sigmoid(logits).cpu().numpy().squeeze() 

        # Every predicted concept above threshold becomes 1
        pred_vec = probs

        pred_list.append(pred_vec)


pred_array = np.stack(pred_list, axis=0)  # shape: (4000, 312)

# Use column names from complete_bird_attributes
pred_df = pd.DataFrame(pred_array, columns=complete_bird_attributes.columns)
pred_df.insert(0, "image_path", [os.path.basename(f) for f in test_images])

print(pred_df.shape)  # should be (4000, 312)

pred_df.head()
pred_df.to_csv("test_predictions.csv", index=False)

Found 4000 test images


Predicting test images: 100%|██████████| 4000/4000 [00:39<00:00, 101.51it/s]


(4000, 313)


In [None]:
### KNN with K=1 on the predicted concept vectors to find the nearest training image ###

from sklearn.neighbors import NearestNeighbors
import pandas as pd
import numpy as np

k = 1

# Drop image_path column for NN, only keep concept vectors
test_features = pred_df.drop(columns=["image_path"]).values  # shape (4000, 312)

# Make sure training attribute matrix is aligned (columns in same order)
train_features = complete_bird_attributes.values  # shape (num_birds, 312)
train_index = complete_bird_attributes.index.values  # class_key values

# Fit Nearest Neighbors model
nn_model = NearestNeighbors(n_neighbors=k, metric='euclidean')  # can use 'cosine' or 'euclidean'
nn_model.fit(train_features)


distances, indices = nn_model.kneighbors(test_features)  # indices shape (4000, k)

nearest_class_keys = train_index[indices[:, 0]]  # shape (4000,)
output_df = pd.DataFrame({
    "image_path": pred_df["image_path"].values,
    "class_key": nearest_class_keys
})

# Load ID mapping directly from test_images_path.csv
test_path_df = pd.read_csv("../aml-2025-feathers-in-focus/test_images_path.csv")

# Create mapping: filename -> correct_id (e.g., "999.jpg" -> 1)
id_mapping = dict(zip(
    test_path_df['image_path'].str.split('/').str[-1],  # Extract filename from path
    test_path_df['id']
))

id_mapping

# Map filenames to correct IDs
output_df['id'] = output_df['image_path'].map(id_mapping)
output_df = output_df.rename(columns={"class_key": "label"})
# output_df = output_df[['id', 'image_path', 'label']]  # Keep image_path for reference
output_df = output_df[['id', 'label']]  # Remove image_path


output_df.to_csv("test_nearest_neighbors.csv", index=False)
