### Dataset Description
The dataset contains bird images, divided into train and test splits. The images are inside test_images and train_images folders.

The labels of the training images are inside train_images.csv file. In this file, the first column is image_path and the second one is the label (1 - 200). The test_images_samples.csv includes a row id with a dummy label. The final goal of the challenge is to change the label column to the predicted label.

The class_names.npy is a dictionary including the name of each label. Load the file using the following code: np.load("class_names.npy", allow_pickle=True).item()

The structure of the final submission should be exactly the same as the test_images_samples.csv! Otherwise, it will fail.

Files

- train_images - the training images
- test_images - the test images
- test_images_sample.csv - a sample submission file in the correct format
- test_images_path.csv - path to test file images
- train_images.csv - supplemental information about the data
- class_names.npy - this file includes the name of each label
- attributes.npy - this file includes the attributes which are extra information for each class.
- attributes.txt - this file includes the attribute names which are extra information for each class.

In [None]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, Subset
from torchvision import transforms, models
from PIL import Image

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
base_data_path = '/content/drive/MyDrive/UvA Data Science/AMS_feathers_in_focus-main'

#change gdrive wd
os.chdir(base_data_path)
print(f"wd changed to: {os.getcwd()}")

wd changed to: /content/drive/MyDrive/UvA Data Science/AMS_feathers_in_focus-main


**Easy to find parameters**

In [None]:
batch_size = 32
learning_rate = 0.0001
max_lr = 0.003
weight_decay = 0.0004
num_epochs = 100
num_classes = 200
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
val_split = 0.2
seed = 42

torch.manual_seed(seed)
np.random.seed(seed)
print(device)

cuda


### Setting up dataset and dataloaders
creating train and validation set (80:20)

In [None]:
class BirdDataset(Dataset):
    def __init__(self, csv_file, root_dir, img_col_idx, label_col_idx, transform=None):
        self.data = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.img_col_idx = img_col_idx
        self.label_col_idx = label_col_idx
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        filename = str(self.data.iloc[idx, self.img_col_idx])
        clean_filename = filename.lstrip('/').lstrip('\\')
        img_path = os.path.join(self.root_dir, clean_filename)

        try:
            image = Image.open(img_path).convert('RGB')
        except (FileNotFoundError, OSError):
            print(f"Could not open {img_path}, using black image.")
            image = Image.new('RGB', (224, 224), (0, 0, 0))

        # Raw CSV is 1-200. We subtract 1 to get 0-199 for PyTorch.
        raw_label = int(self.data.iloc[idx, self.label_col_idx])
        label = raw_label - 1

        # Transform
        if self.transform:
            image = self.transform(image)

        return image, label

In [None]:
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.02),
    transforms.RandomAffine(
        degrees=15,
        translate=(0.1, 0.1),
        scale=(0.8, 1.2),
        shear=10
    ),
    transforms.ToTensor(),
    transforms.Normalize(
        (0.485, 0.456, 0.406),
        (0.229, 0.224, 0.225)
    ),
    transforms.RandomErasing(p=0.15)
])

# 2. Validation
val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])

**Dataloaders**

In [None]:
from sklearn.model_selection import StratifiedKFold

full_train_dataset = BirdDataset('train_images.csv', 'train_images', 0, 1, transform=train_transform)
full_val_dataset   = BirdDataset('train_images.csv', 'train_images', 0, 1, transform=val_transform)

labels = full_train_dataset.data.iloc[:, 1].values - 1

skf = StratifiedKFold(
    n_splits=int(1 / val_split),
    shuffle=True,
    random_state=seed
)

# Take first fold
for train_idx, val_idx in skf.split(np.zeros(len(labels)), labels):
    break

train_dataset = Subset(full_train_dataset, train_idx)
val_dataset   = Subset(full_val_dataset,   val_idx)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader   = DataLoader(val_dataset,   batch_size=batch_size, shuffle=False)

print(f"Training set: {len(train_dataset)}")
print(f"Validation set: {len(val_dataset)}")

Training set: 3140
Validation set: 786


**Setting up test set**

In [1]:
test_dataset  = BirdDataset('test_images_path.csv', 'test_images', 1, 2, transform=val_transform)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

print(f"Test set: {len(test_dataset)} images to predict.")

NameError: name 'BirdDataset' is not defined

### Convolutional Neural Network

In [None]:
class Block(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()

        self.dw = nn.Sequential(
            nn.Conv2d(in_channels, in_channels, 3, padding=1, groups=in_channels, bias=False),
            nn.BatchNorm2d(in_channels),
            nn.ReLU(inplace=True)
        )

        self.pw = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, 1, bias=False),
            nn.BatchNorm2d(out_channels)
        )

        self.shortcut = nn.Conv2d(in_channels, out_channels, 1, bias=False) \
            if in_channels != out_channels else nn.Identity()

        self.activation = nn.ReLU(inplace=True)

    def forward(self, x):
        out = self.dw(x)
        out = self.pw(out)
        out = out + 0.5 * self.shortcut(x)
        return self.activation(out)


class mobile_vgg(nn.Module):
    def __init__(self, num_classes):
        super().__init__()

        self.stage1 = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            Block(32, 32),
            Block(32, 64)
        )
        self.down1 = nn.MaxPool2d(2)

        self.stage2 = nn.Sequential(
            Block(64, 64),
            Block(64, 128)
        )
        self.down2 = nn.MaxPool2d(2)

        self.stage3 = nn.Sequential(
            Block(128, 128),
            Block(128, 256)
        )
        self.down3 = nn.MaxPool2d(2)

        self.stage4 = nn.Sequential(
            Block(256, 256),
            Block(256, 512)
        )

        self.pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512, num_classes)

    def forward(self, x):
        x = self.stage1(x)
        x = self.down1(x)
        x = self.stage2(x)
        x = self.down2(x)
        x = self.stage3(x)
        x = self.down3(x)
        x = self.stage4(x)
        x = self.pool(x)
        x = torch.flatten(x, 1)
        return self.fc(x)

In [None]:
model = mobile_vgg(num_classes=num_classes).to(device)

optimizer = torch.optim.AdamW(
    model.parameters(),
    lr=1e-3,
    weight_decay=1e-4
)

scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
    optimizer,
    T_max=num_epochs,
    eta_min=1e-6
)

criterion = nn.CrossEntropyLoss(label_smoothing=0.1)

In [None]:
best_acc = 0.0
train_losses = []
val_losses = []
val_accuracies = []
lrs = []

print("Starting training...")

for epoch in range(num_epochs):

    # warmup
    if epoch < 3:  # warmup <3
        warmup_lr = (epoch + 1) / 3 * 1e-3
        optimizer.param_groups[0]['lr'] = warmup_lr

    #training

    model.train()
    running_loss = 0.0

    for batch_idx, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    avg_train_loss = running_loss / len(train_loader)
    train_losses.append(avg_train_loss)


    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    avg_val_loss = val_loss / len(val_loader)
    val_losses.append(avg_val_loss)

    val_acc = 100.0 * correct / total
    val_accuracies.append(val_acc)

    # after warmup, schedular steps in
    if epoch >= 3:
        scheduler.step()

    current_lr = optimizer.param_groups[0]['lr']
    lrs.append(current_lr)

    print(
        f"Epoch {epoch+1}/{num_epochs},"
        f"Train loss: {avg_train_loss:.3f}, "
        f"Val loss: {avg_val_loss:.3f}, "
        f"Val acc: {val_acc:.1f}%, "
        f"LR: {current_lr:.6f}"
    )

    # Save best model
    if val_acc > best_acc:
        best_acc = val_acc
        torch.save(model.state_dict(), "mobile_vgg.pth")
        print(f"Saved new best model ({val_acc:.2f}%)")

print(f"Training complete, best val acc: {best_acc:.2f}%")

Starting training...
Epoch 1/100 | Train loss: 5.271 | Val loss: 5.186 | Val acc: 1.7% | LR: 0.000333
 --> Saved new best model (1.65%)
Epoch 2/100 | Train loss: 5.123 | Val loss: 5.095 | Val acc: 3.2% | LR: 0.000667
 --> Saved new best model (3.18%)
Epoch 3/100 | Train loss: 4.998 | Val loss: 5.054 | Val acc: 3.6% | LR: 0.001000
 --> Saved new best model (3.56%)
Epoch 4/100 | Train loss: 4.901 | Val loss: 4.992 | Val acc: 4.6% | LR: 0.001000
 --> Saved new best model (4.58%)
Epoch 5/100 | Train loss: 4.794 | Val loss: 4.970 | Val acc: 4.2% | LR: 0.000999
Epoch 6/100 | Train loss: 4.694 | Val loss: 4.921 | Val acc: 4.3% | LR: 0.000998
Epoch 7/100 | Train loss: 4.600 | Val loss: 4.936 | Val acc: 4.7% | LR: 0.000996
 --> Saved new best model (4.71%)
Epoch 8/100 | Train loss: 4.481 | Val loss: 4.918 | Val acc: 5.1% | LR: 0.000994
 --> Saved new best model (5.09%)
Epoch 9/100 | Train loss: 4.364 | Val loss: 4.721 | Val acc: 7.4% | LR: 0.000991
 --> Saved new best model (7.38%)
Epoch 10/100

KeyboardInterrupt: 

0

In [None]:
model = FeatherNet(num_classes=num_classes).to(device)
model.load_state_dict(torch.load("feathernet.pth", map_location=device))
model.eval()
print("Model loaded!")

# --- Predict ---
predictions = []

with torch.no_grad():
    for inputs, _ in test_loader:
        inputs = inputs.to(device)
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        predictions.extend((preds.cpu().numpy() + 1))


# --- Save CSV ---
submission = pd.DataFrame({
    "id": range(1, len(predictions) + 1),
    "label": predictions
})

submission.to_csv("submissions.csv", index=False)
print("Saved submissions.csv!")

Model loaded!
Saved submissions.csv!
