## 1. Import Libraries

In [32]:
import os
import sys
import torch
import zipfile
import numpy as np # number python
from torch import nn # neural network (Conv2D - 2D Convolution Layer, Linear, Logistic)
from torch.nn import functional as F # activation function (ReLU, Sigmoid, SoftMax)
import torch.utils.data as td # tools to create & load dataset
import torchvision as tv # torchvision library for computer vision tasks
import pandas as pd # library for data analysis, manipulation -> data augmentation
from PIL import Image # library used for image handling
from matplotlib import pyplot as plt # plotting library to plot & visualize results

In [38]:
print(f'PyTorch Version: {torch.__version__}')

device = 'cuda' if torch.cuda.is_available() else 'cpu'

print(device)
print(torch.cuda.get_device_name())

try: 
    print(f'CUDA Version: {torch.version.cuda}')
except:
    pass

PyTorch Version: 2.5.1+cu121
cuda
NVIDIA GeForce RTX 4070 Laptop GPU
CUDA Version: 12.1


## 2. Loading & Processing Dataset

### Dataset Location

In [31]:
dataset_root_dir = 'E:/Year 3 Sem 1/COS30082 Applied Machine Learning/Assignment 1/'

### Data Processing 

In [None]:
class BirdDataset(td.Dataset):  # td.Dataset is a class from td (torch.utils.data)

    # Constructor [Param: root file, train/val/test mode, image size, split ratio]
    # Custom Dataset uses different Constructor [Param up to dev]
    def __init__(
        self,
        root_dir,
        mode: str = "train",
        image_size=(224, 224),
        val_ratio: float = 0.2,
        split_seed: int = 42,
    ):
        super().__init__()  # Ensure proper inheritance
        self.image_size = image_size
        self.mode = mode
        self.val_ratio = val_ratio
        self.split_seed = split_seed

        # For train/val, always load from train.txt then split deterministically by seed
        if mode in ("train", "val"):
            txt_path = os.path.join(root_dir, "train.txt")
            print("Text File:", txt_path)

            full_df = pd.read_csv(
                txt_path,
                header=None,  # no header in the file
                sep=" ",
                names=["file_path", "class"],
            )

            if not (0.0 <= val_ratio < 1.0):
                raise ValueError("val_ratio must be in [0.0, 1.0)")

            num_samples = len(full_df)
            if num_samples == 0 or val_ratio == 0.0:
                # No split needed (all samples go to train; val will be empty)
                train_indices = np.arange(num_samples)
                val_indices = np.array([], dtype=int)
            else:
                rng = np.random.RandomState(split_seed)
                permuted_indices = rng.permutation(num_samples)
                train_cutoff = int(round((1.0 - val_ratio) * num_samples))
                train_indices = permuted_indices[:train_cutoff]
                val_indices = permuted_indices[train_cutoff:]

            if mode == "train":
                self.data = full_df.iloc[train_indices].reset_index(drop=True)
            else:  # mode == "val"
                self.data = full_df.iloc[val_indices].reset_index(drop=True)

            # Images live under Train for both train and val splits
            self.images_dir = os.path.join(root_dir, "Train")
        else:
            # For other modes (e.g., "test"), fall back to <mode>.txt convention
            txt_path = os.path.join(root_dir, f"{mode}.txt")
            print("Text File:", txt_path)

            self.data = pd.read_csv(
                txt_path,
                header=None,
                sep=" ",
                names=["file_path", "class"],
            )

            # Heuristic: use Test folder for test mode, Train otherwise
            self.images_dir = (
                os.path.join(root_dir, "Test") if mode == "test" else os.path.join(root_dir, "Train")
            )

    # Return No. of Data Images
    def __len__(self):  # __len__ is used by built-in len()
        print("Dataset Length:", len(self.data))
        return len(self.data)

    # Return Init Configuration
    def __repr__(self):
        return "BirdDataset: (mode='{}', image_size={}, val_ratio={}, seed={})".format(
            self.mode, self.image_size, self.val_ratio, self.split_seed
        )

    # Preparing the Images
    def __getitem__(self, idx):
        # Build image path from split's dataframe
        img_path = os.path.join(self.images_dir, self.data.iloc[idx]["file_path"])

        # Load Image
        img = Image.open(img_path)

        # Transformation
        transform = tv.transforms.Compose(
            [
                # Resize the Image
                tv.transforms.Resize(self.image_size),
                # Convert it to Tensor
                tv.transforms.ToTensor(),
                # Normalize to standard range (-1, 1)
                tv.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
            ]
        )

        # Processed Image
        x = transform(img)

        # Get Class Label
        d = self.data.iloc[idx]["class"]

        return x, d

    def number_of_classes(self):
        # Assumes classes are labeled from 0..C-1
        return self.data["class"].max() + 1
    # Max + 1 because the index starts from 0.
        

### View Modified Dataset