### Dataset Description
The dataset contains bird images, divided into train and test splits. The images are inside test_images and train_images folders.

The labels of the training images are inside train_images.csv file. In this file, the first column is image_path and the second one is the label (1 - 200). The test_images_samples.csv includes a row id with a dummy label. The final goal of the challenge is to change the label column to the predicted label.

The class_names.npy is a dictionary including the name of each label. Load the file using the following code: np.load("class_names.npy", allow_pickle=True).item()

The structure of the final submission should be exactly the same as the test_images_samples.csv! Otherwise, it will fail.

Files

- train_images - the training images
- test_images - the test images
- test_images_sample.csv - a sample submission file in the correct format
- test_images_path.csv - path to test file images
- train_images.csv - supplemental information about the data
- class_names.npy - this file includes the name of each label
- attributes.npy - this file includes the attributes which are extra information for each class.
- attributes.txt - this file includes the attribute names which are extra information for each class.

In [1]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, Subset
from torchvision import transforms, models
from PIL import Image

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
base_data_path = '/content/drive/MyDrive/UvA Data Science/AMS_feathers_in_focus-main'

#change gdrive wd
os.chdir(base_data_path)
print(f"wd changed to: {os.getcwd()}")

wd changed to: /content/drive/MyDrive/UvA Data Science/AMS_feathers_in_focus-main


**Easy to find parameters**

In [4]:
batch_size = 32
learning_rate = 0.0001
max_lr = 0.003
weight_decay = 0.0004
num_epochs = 100
num_classes = 200
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
val_split = 0.2
seed = 42

torch.manual_seed(seed)
np.random.seed(seed)
print(device)

cpu


In [5]:
class BirdDataset(Dataset):
    def __init__(self, csv_file, root_dir, img_col_idx, label_col_idx, transform=None):
        self.data = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.img_col_idx = img_col_idx
        self.label_col_idx = label_col_idx
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        filename = str(self.data.iloc[idx, self.img_col_idx])
        clean_filename = filename.lstrip('/').lstrip('\\')
        img_path = os.path.join(self.root_dir, clean_filename)

        try:
            image = Image.open(img_path).convert('RGB')
        except (FileNotFoundError, OSError):
            print(f"Could not open {img_path}, using black image.")
            image = Image.new('RGB', (224, 224), (0, 0, 0))

        # Raw CSV is 1-200. We subtract 1 to get 0-199 for PyTorch.
        raw_label = int(self.data.iloc[idx, self.label_col_idx])
        label = raw_label - 1

        # Transform
        if self.transform:
            image = self.transform(image)

        return image, label

In [6]:
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomAffine(degrees=15, translate=(0.1, 0.1), scale=(0.8, 1.2), shear=10),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])

# 2. Validation
val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])

In [7]:
from sklearn.model_selection import StratifiedKFold

full_train_dataset = BirdDataset('train_images.csv', 'train_images', 0, 1, transform=train_transform)
full_val_dataset   = BirdDataset('train_images.csv', 'train_images', 0, 1, transform=val_transform)

labels = full_train_dataset.data.iloc[:, 1].values - 1

skf = StratifiedKFold(
    n_splits=int(1 / val_split),
    shuffle=True,
    random_state=seed
)

# Take the first fold
for train_idx, val_idx in skf.split(np.zeros(len(labels)), labels):
    break

train_dataset = Subset(full_train_dataset, train_idx)
val_dataset   = Subset(full_val_dataset,   val_idx)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader   = DataLoader(val_dataset,   batch_size=batch_size, shuffle=False)

print(f"Training set: {len(train_dataset)}")
print(f"Validation set: {len(val_dataset)}")

Training set: 3140
Validation set: 786


In [8]:
test_dataset  = BirdDataset('test_images_path.csv', 'test_images', 1, 2, transform=val_transform)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

print(f"Test set: {len(test_dataset)} images to predict.")

Test set: 4000 images to predict.


In [9]:
class VGG_style(nn.Module):
    def __init__(self, num_classes):
        super().__init__()

        def cnblock(in_channels, out_channels):
            return nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
                nn.BatchNorm2d(out_channels),
                nn.ReLU(inplace=True),
                nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
                nn.BatchNorm2d(out_channels),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=2, stride=2)
            )
        self.features = nn.Sequential(
            cnblock(3, 64),
            cnblock(64, 128),
            cnblock(128, 256),
            cnblock(256, 512),
            cnblock(512, 512)
        )

        self.global_pool = nn.AdaptiveAvgPool2d((1, 1))

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Dropout(0.5),
            nn.Linear(512, 2048),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(2048, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.global_pool(x)
        x = self.classifier(x)
        return x


In [10]:
model = VGG_style(num_classes=num_classes).to(device)

optimizer = optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer,
    mode='max',
    factor=0.6,
    patience=2,
    min_lr=1e-6
)

In [11]:
best_acc = 0.0

train_losses = []
val_losses = []
val_accuracies = []
lrs = []

print("Starting training...")

for epoch in range(num_epochs):
    # train
    model.train()
    running_loss = 0.0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    avg_train_loss = running_loss / len(train_loader)
    train_losses.append(avg_train_loss)

    # val
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    avg_val_loss = val_loss / len(val_loader)
    val_losses.append(avg_val_loss)

    val_acc = 100.0 * correct / total
    val_accuracies.append(val_acc)

    scheduler.step(val_acc)

    current_lr = optimizer.param_groups[0]['lr']
    lrs.append(current_lr)

    print(
        f"Epoch {epoch+1}/{num_epochs}, "
        f"Train Loss: {avg_train_loss:.3f}, "
        f"Val Loss: {avg_val_loss:.3f}, "
        f"Val Acc: {val_acc:.1f}%, "
        f"LR: {current_lr:.5f}"
    )

    # save best model
    if val_acc > best_acc:
        best_acc = val_acc
        torch.save(model.state_dict(), "vgg_style.pth")
        print(f"Saved new best model ({val_acc:.2f}%)")

print(f"Training complete | Best val acc: {best_acc:.2f}%")

Starting training...


KeyboardInterrupt: 