In [7]:
import zipfile
import numpy as np
import pandas as pd
import os
import torch
import torchvision.transforms as transforms
from torchvision import models
from torchvision.models import resnet50, ResNet50_Weights
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from torch import nn, optim

!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 /root/.kaggle/kaggle.json
!kaggle competitions download -c aml-2024-feather-in-focus
with zipfile.ZipFile("aml-2024-feather-in-focus.zip", "r") as zip_ref:
    zip_ref.extractall("data")
os.listdir("data")

mv: cannot stat 'kaggle.json': No such file or directory
aml-2024-feather-in-focus.zip: Skipping, found more recently modified local copy (use --force to force download)


['train_images',
 'attributes.txt',
 'test_images_sample.csv',
 'test_images',
 'attributes.npy',
 'train_images.csv',
 'test_images_path.csv',
 'class_names.npy']

In [14]:
# Load attributes and class names
attributes = np.load("data/attributes.npy")
class_names = np.load("data/class_names.npy", allow_pickle=True).item()

# Load training and test data
train_data = pd.read_csv("data/train_images.csv")
test_data = pd.read_csv("data/test_images_path.csv")

# Transformations for image preprocessing
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Prepare attributes as tensors
attributes_tensor = torch.tensor(attributes, dtype=torch.float32).to('cuda')

In [15]:
# Custom Dataset Class
class BirdDataset(Dataset):
    def __init__(self, dataframe, img_dir, attributes_tensor, transform=None, is_train=True):
        self.dataframe = dataframe
        self.img_dir = img_dir
        self.attributes_tensor = attributes_tensor
        self.transform = transform
        self.is_train = is_train

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
      if self.is_train:
          # For training data: image_path is in column 0, label in column 1
          img_name = str(self.dataframe.iloc[idx, 0]).lstrip('/')
          img_path = os.path.join(self.img_dir, img_name)
          label = self.dataframe.iloc[idx, 1] - 1  # Convert labels to 0-indexed
          attributes = self.attributes_tensor[label]
      else:
          # For testing data: id in column 0, image_path in column 1
          img_name = str(self.dataframe.iloc[idx, 1]).lstrip('/')
          img_path = os.path.join(self.img_dir, img_name)
          label = str(self.dataframe.iloc[idx, 0])  # Use ID as label for test data
          attributes = torch.zeros(self.attributes_tensor.size(1))  # Dummy attributes for test data

      image = Image.open(img_path).convert("RGB")

      if self.transform:
          image = self.transform(image)

      return image, label, attributes

In [27]:
# Model
class BirdClassifier(nn.Module):
    def __init__(self, num_classes=200, attribute_dim=312):
        super(BirdClassifier, self).__init__()
        weights = ResNet50_Weights.DEFAULT
        self.cnn = resnet50(weights=weights)
        self.cnn.fc = nn.Identity()  # Remove last layer

        # Fully connected layers
        # Add Dropout to avoid overfitting
        self.dropout = nn.Dropout(0.5)
        self.fc1 = nn.Linear(2048 + attribute_dim, 512)
        self.fc2 = nn.Linear(512, num_classes)

    def forward(self, x, attributes):
        cnn_features = self.cnn(x)
        # Concatenate CNN features with attributes
        combined = torch.cat((cnn_features, attributes), dim=1)
        x = torch.relu(self.dropout(self.fc1(combined)))
        x = self.fc2(x)
        return x


# Initialize model
model = BirdClassifier().to('cuda')

# Loss, optimizer and scheduler
criterion = nn.CrossEntropyLoss(label_smoothing=0.1) # Smooth target labels to improve generalisation
optimizer = optim.Adam(model.parameters(), lr=1e-4)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

# Data Loaders
train_dataset = BirdDataset(train_data, "data/train_images", attributes_tensor, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

test_dataset = BirdDataset(test_data, "/content/data/test_images", attributes_tensor, transform=transform, is_train=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [28]:
# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels, attributes in train_loader:
        images, labels, attributes = images.to('cuda'), labels.to('cuda'), attributes.to('cuda')

        # Forward pass
        outputs = model(images, attributes)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
    scheduler.step()  # Update learning rate

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}")

Epoch [1/10], Loss: 5.0615
Epoch [2/10], Loss: 3.8903
Epoch [3/10], Loss: 2.8178
Epoch [4/10], Loss: 2.1762
Epoch [5/10], Loss: 1.7857
Epoch [6/10], Loss: 1.5274
Epoch [7/10], Loss: 1.4529
Epoch [8/10], Loss: 1.4056
Epoch [9/10], Loss: 1.3852
Epoch [10/10], Loss: 1.3464


In [29]:
model.eval()
submission = []
device = torch.device("cuda")

with torch.no_grad():
    for batch in test_loader:
        images, ids, attributes = batch  # Unpack the batch
        images = images.to(device)

        # First, get the model's predictions without attributes
        initial_outputs = model(images, torch.zeros(images.size(0), 312).to(device))
        _, predicted_labels = torch.max(initial_outputs, 1)

        # Then, use these predicted labels to get the corresponding attributes
        attributes = attributes_tensor[predicted_labels]

        # Now, make the final prediction using these attributes
        outputs = model(images, attributes)
        _, final_predictions = torch.max(outputs, 1)

        # Convert ids and predictions to numpy arrays
        ids_np = ids.cpu().numpy() if isinstance(ids, torch.Tensor) else np.array(ids)
        preds_np = final_predictions.cpu().numpy()

        submission.extend(zip(ids_np, preds_np + 1))
# Create submission DataFrame
submission_df = pd.DataFrame(submission, columns=['id', 'label'])

# Save submission file
submission_df.to_csv('submission4.csv', index=False)
print("Submission file created successfully!")


Submission file created successfully!
