In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [2]:
import os
import numpy as np
import pandas as pd
from PIL import Image
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
import timm
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.cuda.amp import GradScaler, autocast


from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.cuda.amp import GradScaler, autocast

device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # checks if we're using cuda
print(f"Using device: {device}")

Using device: cpu


In [None]:
import numpy as np
attributes = np.load('data/attributes.npy')  # Shape:(200, n_attributes)

import torch
attributes_tensor = torch.tensor(attributes, dtype=torch.float32)


In [4]:
import timm
import torch.nn as nn

# pre-trained EfficientNet-B2
base_model = timm.create_model('efficientnet_b2', pretrained=True, num_classes=0)
base_model_output_dim = base_model.num_features  # Output dimension

attribute_dim = attributes_tensor.shape[1]

# The model
class CustomClassifier(nn.Module):
    def __init__(self, base_model, base_output_dim, attribute_dim, num_classes):
        super(CustomClassifier, self).__init__()
        self.base_model = base_model
        self.attribute_layer = nn.Linear(attribute_dim, 128)
        self.fc = nn.Linear(base_output_dim + 128, num_classes)

    def forward(self, x, attributes):
        # EfficientNet image features
        x = self.base_model(x)

        # attribute features
        attr_features = self.attribute_layer(attributes)

        # combine image features and attributes
        combined_features = torch.cat([x, attr_features], dim=1)

        # classify
        output = self.fc(combined_features)
        return output

num_classes = 200 # number of out classes
model = CustomClassifier(base_model, base_model_output_dim, attribute_dim, num_classes)
model = model.to(device)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/36.8M [00:00<?, ?B/s]

In [5]:
class BirdDatasetWithAttributes(Dataset):
    def __init__(self, dataframe, base_dir, attributes, transform=None):
        self.dataframe = dataframe
        self.base_dir = base_dir
        self.attributes = attributes
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_path = os.path.join(self.base_dir, self.dataframe.iloc[idx]['image_path'])
        label = self.dataframe.iloc[idx]['label'] - 1
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)

        # get label associated attributes
        attribute = self.attributes[label]
        return image, label, attribute


In [None]:
df = pd.read_csv("train_images.csv")
df['image_path'] = df['image_path'].map(lambda x: str(x)[1:])
# augmentation
train_transforms = transforms.Compose([
    transforms.Resize((260, 260)),           # input size
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.RandomResizedCrop(260, scale=(0.8, 1.0)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# validationset transformations, don't change too much
val_transforms = transforms.Compose([
    transforms.Resize((260, 260)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

train_df, val_df = train_test_split(df, test_size=0.2, stratify= df['label'], random_state=42)
train_dataset = BirdDatasetWithAttributes(train_df, "/content/drive/MyDrive/UvA IS/AML/train_images", attributes_tensor, transform=train_transforms)
val_dataset = BirdDatasetWithAttributes(val_df, "/content/drive/MyDrive/UvA IS/AML/train_images", attributes_tensor, transform=val_transforms)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4)




In [7]:
criterion = nn.CrossEntropyLoss()

optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)

scheduler = CosineAnnealingLR(optimizer, T_max=10)

In [8]:
def train_one_epoch(model, loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels, attributes in loader:
        images, labels, attributes = images.to(device), labels.to(device), attributes.to(device)

        optimizer.zero_grad()
        outputs = model(images, attributes)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    return running_loss / len(loader), 100. * correct / total

def validate(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels, attributes in loader:
            images, labels, attributes = images.to(device), labels.to(device), attributes.to(device)
            outputs = model(images, attributes)
            loss = criterion(outputs, labels)

            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

    return running_loss / len(loader), 100. * correct / total


In [9]:
num_epochs = 15
best_val_acc = 0.0

for epoch in range(num_epochs):
    train_loss, train_acc = train_one_epoch(model, train_loader, criterion, optimizer, device)
    val_loss, val_acc = validate(model, val_loader, criterion, device)

    print(f"Epoch {epoch+1}/{num_epochs}")
    print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%")
    print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%")

    scheduler.step()

    # if step produces new best val_acc, save model
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), "best_efficientnet_with_attributes.pth")
        print("Saved Best Model!")



Epoch 1/15
Train Loss: 5.0326, Train Acc: 8.09%
Val Loss: 4.4108, Val Acc: 22.26%
Saved Best Model!
Epoch 2/15
Train Loss: 3.8731, Train Acc: 33.03%
Val Loss: 3.3571, Val Acc: 38.17%
Saved Best Model!
Epoch 3/15
Train Loss: 2.8609, Train Acc: 47.83%
Val Loss: 2.5233, Val Acc: 48.85%
Saved Best Model!
Epoch 4/15
Train Loss: 2.1489, Train Acc: 60.67%
Val Loss: 2.0876, Val Acc: 56.87%
Saved Best Model!
Epoch 5/15
Train Loss: 1.7085, Train Acc: 69.81%
Val Loss: 1.7827, Val Acc: 62.34%
Saved Best Model!
Epoch 6/15
Train Loss: 1.3954, Train Acc: 76.43%
Val Loss: 1.6590, Val Acc: 66.28%
Saved Best Model!
Epoch 7/15
Train Loss: 1.1944, Train Acc: 82.80%
Val Loss: 1.5323, Val Acc: 67.81%
Saved Best Model!
Epoch 8/15
Train Loss: 1.0957, Train Acc: 85.80%
Val Loss: 1.4781, Val Acc: 69.21%
Saved Best Model!
Epoch 9/15
Train Loss: 0.9926, Train Acc: 88.03%
Val Loss: 1.4515, Val Acc: 69.34%
Saved Best Model!
Epoch 10/15
Train Loss: 0.9781, Train Acc: 88.44%
Val Loss: 1.4528, Val Acc: 70.23%
Saved Be

In [60]:
#prediction with giving test data mean attribute
mean_attribute = torch.mean(attributes_tensor, dim=0)

class TestDatasetWithAttributes(Dataset):
    def __init__(self, dataframe, base_dir, mean_attribute, transform=None):
        self.dataframe = dataframe
        self.base_dir = base_dir
        self.mean_attribute = mean_attribute  # smart move right here
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_path = os.path.join(self.base_dir, self.dataframe.iloc[idx]['image_path'])
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        # use mean attribute
        attribute = self.mean_attribute
        return image, attribute

In [78]:
#prediction with two step predictions: first predict attributes, then predict label

class TestDatasetWithPredictedAttributes(Dataset):
    def __init__(self, dataframe, base_dir, transform=None): # this one doesn't have attributes
        self.dataframe = dataframe
        self.base_dir = base_dir
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_path = os.path.join(self.base_dir, self.dataframe.iloc[idx]['image_path'])
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, idx

In [None]:
test_df = pd.read_csv("test_images_path.csv")
test_df['image_path'] = test_df['image_path'].map(lambda x: str(x)[1:])
test_dataset = TestDatasetWithPredictedAttributes(
    dataframe=test_df,
    base_dir="/content/drive/MyDrive/AML/test_images",
    transform=val_transforms
)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)



In [None]:
#two-step predictions
model.load_state_dict(torch.load("best_efficientnet_with_attributes.pth"))
model.eval()
predicted_attributes = []
predicted_indices = []

with torch.no_grad():
    for images, indices in test_loader:
        images = images.to(device)

        # send and get image features only
        outputs = model.base_model(images)
        _, predicted = outputs.max(1)

        # predict attributes based on image features
        batch_attributes = attributes_tensor[predicted.cpu()]  # Shape: [batch_size, attribute_dim]
        predicted_attributes.append(batch_attributes)
        predicted_indices.extend(indices.numpy())

# combine
predicted_attributes = torch.cat(predicted_attributes, dim=0)  # Shape: [num_test_samples, attribute_dim]


# predict label
predictions = []
with torch.no_grad():
    for images, _, indices in test_loader:
        images = images.to(device)
        attributes = predicted_attributes[indices]  # use predicted
        outputs = model(images, attributes)
        _, predicted = outputs.max(1)
        predictions.extend(predicted.cpu().numpy())

submission = pd.DataFrame({
    "id": predictions,
    "label": [p + 1 for p in predictions]
})
submission.to_csv("outputs/submission_0.csv", index=False)
print("Submission saved")


In [None]:
# predict with mean attribute
model.load_state_dict(torch.load("best_efficientnet_with_attributes.pth"))
model.eval()
predictions = []
row_ids = []

with torch.no_grad():
    for images, row_id_batch in test_loader:
        images = images.to(device)
        attributes = mean_attribute.to(device).unsqueeze(0).expand(images.size(0), -1)
        outputs = model(images, attributes)
        _, predicted = outputs.max(1)

        predictions.extend(predicted.cpu().numpy())
        row_ids.extend(row_id_batch)

submission = pd.DataFrame({
    "id": row_ids,
    "label": [p + 1 for p in predictions]
})
submission.to_csv('outputs/submission_0.csv', index=False)
print("Submission saved")

  model.load_state_dict(torch.load("best_efficientnet_with_attributes.pth"))


Submission saved
