In [1]:
#!pip install transformers datasets accelerate
#!pip install -q transformers datasets accelerate

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import numpy as np
import pandas as pd
from datasets import load_from_disk, DatasetDict, ClassLabel
from transformers import AutoFeatureExtractor, AutoModelForImageClassification
import os
import shutil

In [4]:
DATA_PATH = "processed_bird_data"

print("Loading data.")
try:
    dataset = load_from_disk(DATA_PATH)
except FileNotFoundError:
    print(f"Error: {DATA_PATH} not found.")
    raise

Loading data.


In [5]:
BATCH_SIZE = 32
EPOCHS = 30
LEARNING_RATE = 2e-4
if torch.cuda.is_available():
    DEVICE = torch.device("cuda")
elif torch.backends.mps.is_available():
    DEVICE = torch.device("mps")
else:
    DEVICE = torch.device("cpu")

print(f"Configuration Set:")
print(f"Device: {DEVICE}")
print(f"Batch Size: {BATCH_SIZE}")
print(f"Epochs: {EPOCHS}")
MODEL_NAME = "google/mobilenet_v2_1.0_224"
feature_extractor = AutoFeatureExtractor.from_pretrained(MODEL_NAME)

Configuration Set:
Device: mps
Batch Size: 32
Epochs: 30




In [6]:
TRAIN_DATA_PATH = "processed_bird_data" 
TEST_DATA_PATH = "processed_bird_test_data"

print(f"Loading dataset from: {TRAIN_DATA_PATH}")

try:
    dataset = load_from_disk(TRAIN_DATA_PATH)
    print("Success! Loaded DatasetDict.")
    print(dataset)
except Exception as e:
    print(f"Error loading data: {e}")
    print("Make sure you have run the data_prep.ipynb script locally first!")

Loading dataset from: processed_bird_data
Success! Loaded DatasetDict.
DatasetDict({
    train: Dataset({
        features: ['label', 'image'],
        num_rows: 3337
    })
    validation: Dataset({
        features: ['label', 'image'],
        num_rows: 589
    })
})


In [7]:
def transform(batch):
    inputs = feature_extractor([x for x in batch["image"]], return_tensors="pt")
    inputs["label"] = batch["label"]
    return inputs

dataset = dataset.with_transform(transform)


def collate_fn(examples):
    pixel_values = torch.stack([example["pixel_values"] for example in examples])
    labels = torch.tensor([example["label"] for example in examples])
    return {"pixel_values": pixel_values, "labels": labels}

train_loader = DataLoader(dataset["train"], batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(dataset["validation"], batch_size=BATCH_SIZE, shuffle=False, collate_fn=collate_fn)

print(f"Train batches: {len(train_loader)}")
print(f"Val batches:   {len(val_loader)}")

Train batches: 105
Val batches:   19


In [9]:
print("Initializing Baseline Model (MobileNetV2).")
model = AutoModelForImageClassification.from_pretrained(
    MODEL_NAME,
    num_labels=200,
    ignore_mismatched_sizes=True
)
model.to(DEVICE)

optimizer = optim.AdamW(model.parameters(), lr=2e-4)
criterion = nn.CrossEntropyLoss()

print("Model ready.")

Initializing Baseline Model (MobileNetV2).


Some weights of MobileNetV2ForImageClassification were not initialized from the model checkpoint at google/mobilenet_v2_1.0_224 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([1001]) in the checkpoint and torch.Size([200]) in the model instantiated
- classifier.weight: found shape torch.Size([1001, 1280]) in the checkpoint and torch.Size([200, 1280]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model ready.


In [8]:
best_val_acc = 0.0
save_path = "baseline_best_model.pth"

print("Starting training loop.\n")

for epoch in range(EPOCHS):
    model.train()
    running_loss = 0.0
    correct_train = 0
    total_train = 0

    for i, batch in enumerate(train_loader):
        pixel_values = batch["pixel_values"].to(DEVICE)
        labels = batch["labels"].to(DEVICE)

        optimizer.zero_grad()

        # forward pass
        outputs = model(pixel_values=pixel_values)
        logits = outputs.logits

        loss = criterion(logits, labels)
        loss.backward()
        optimizer.step()

        # stats
        running_loss += loss.item()
        _, predicted = torch.max(logits, 1)
        total_train += labels.size(0)
        correct_train += (predicted == labels).sum().item()

        if i % 20 == 0:
            print(f"[Epoch {epoch+1}] Batch {i}/{len(train_loader)} | loss={loss.item():.4f}")

    train_epoch_loss = running_loss / len(train_loader)
    train_epoch_acc = correct_train / total_train

    # val
    model.eval()
    val_running_loss = 0.0
    correct_val = 0
    total_val = 0

    with torch.no_grad():
        for batch in val_loader:
            pixel_values = batch["pixel_values"].to(DEVICE)
            labels = batch["labels"].to(DEVICE)

            outputs = model(pixel_values=pixel_values)
            loss = criterion(outputs.logits, labels)

            val_running_loss += loss.item()
            _, predicted = torch.max(outputs.logits, 1)
            total_val += labels.size(0)
            correct_val += (predicted == labels).sum().item()

    val_epoch_loss = val_running_loss / len(val_loader)
    val_epoch_acc = correct_val / total_val

    # final raport (per epoch)
    print(f"Train: loss={train_epoch_loss:.4f}, acc={train_epoch_acc:.4f}")
    print(f"Val:   loss={val_epoch_loss:.4f}, acc={val_epoch_acc:.4f}")

    # saving the best model
    if val_epoch_acc > best_val_acc:
        best_val_acc = val_epoch_acc
        torch.save(model.state_dict(), save_path)
        print("Best model saved!")

    print("-" * 30)

print(f"\nTraining complete. Best Validation Accuracy: {best_val_acc:.4f}")

Starting training loop.

[Epoch 1] Batch 0/105 | loss=5.3962
[Epoch 1] Batch 20/105 | loss=5.0772
[Epoch 1] Batch 40/105 | loss=4.5836
[Epoch 1] Batch 60/105 | loss=4.4015
[Epoch 1] Batch 80/105 | loss=3.9820
[Epoch 1] Batch 100/105 | loss=3.3678
Train: loss=4.4091, acc=0.1471
Val:   loss=3.4175, acc=0.2869
Best model saved!
------------------------------
[Epoch 2] Batch 0/105 | loss=2.5011
[Epoch 2] Batch 20/105 | loss=2.1296
[Epoch 2] Batch 40/105 | loss=2.5208
[Epoch 2] Batch 60/105 | loss=2.0957
[Epoch 2] Batch 80/105 | loss=1.7931
[Epoch 2] Batch 100/105 | loss=1.7414
Train: loss=2.2404, acc=0.5454
Val:   loss=2.4194, acc=0.4363
Best model saved!
------------------------------
[Epoch 3] Batch 0/105 | loss=1.3454
[Epoch 3] Batch 20/105 | loss=1.6203
[Epoch 3] Batch 40/105 | loss=1.0966
[Epoch 3] Batch 60/105 | loss=1.0457
[Epoch 3] Batch 80/105 | loss=1.4614
[Epoch 3] Batch 100/105 | loss=1.1165
Train: loss=1.2187, acc=0.7776
Val:   loss=2.1754, acc=0.4686
Best model saved!
-------

In [11]:
TEST_DATA_PATH = "processed_bird_test_data"
save_path = "baseline_best_model.pth"
all_preds = []

# Loading test data (this notebook hates me)
print(f"Loading test data from {TEST_DATA_PATH}.")
try:
    test_dataset_raw = load_from_disk(TEST_DATA_PATH)
    test_ds = test_dataset_raw["test"] if "test" in test_dataset_raw else test_dataset_raw
    
    test_ds = test_ds.with_transform(transform)
    test_loader = DataLoader(test_ds, batch_size=32, shuffle=False, collate_fn=collate_fn)
    print("Test loader ready.")
except Exception as e:
    print(f"Error loading test data: {e}")
    test_loader = None

if test_loader and os.path.exists(save_path):
    print(f"Loading weights from {save_path}.")
    # Loading model from above
    model.load_state_dict(torch.load(save_path, map_location=DEVICE))
    model.to(DEVICE)
    model.eval()

    print(f"Predicting classes.")
    with torch.no_grad():
        for i, batch in enumerate(test_loader):
            pixel_values = batch["pixel_values"].to(DEVICE)
            
            outputs = model(pixel_values=pixel_values)
            preds = torch.argmax(outputs.logits, dim=1).cpu().numpy()
            all_preds.extend(preds)
            
            if i % 20 == 0: 
                print(f"Processing batch {i}/{len(test_loader)}")

    print("\nGenerating CSV...")
    clean_ds = load_from_disk(TEST_DATA_PATH)
    clean_ds = clean_ds["test"] if "test" in clean_ds else clean_ds
    
    submission_ids = clean_ds["id"] if "id" in clean_ds.column_names else range(len(all_preds))
    
    submission_df = pd.DataFrame({
        "id": submission_ids,
        "label": all_preds
    })

    # Fixes:
    submission_df["label"] = submission_df["label"] + 1
    submission_df = submission_df.sort_values(by="id")
    
    csv_filename = "baseline_submission.csv"
    submission_df.to_csv(csv_filename, index=False)
    
    print(f"Saved {csv_filename} successfully!")
    print("First 5 rows:")
    print(submission_df.head())

else:
    print(f"Error: Could not find '{save_path}' or Test Data folder.")

Loading test data from processed_bird_test_data.
Test loader ready.
Loading weights from baseline_best_model.pth.
Predicting classes.
Processing batch 0/125
Processing batch 20/125
Processing batch 40/125
Processing batch 60/125
Processing batch 80/125
Processing batch 100/125
Processing batch 120/125

Generating CSV...
Saved baseline_submission.csv successfully!
First 5 rows:
   id  label
0   1     69
1   2     37
2   3     74
3   4     12
4   5     74
