## 1. Import Libraries

In [32]:
import os
import sys
import torch
import zipfile
import numpy as np # number python
from torch import nn # neural network (Conv2D - 2D Convolution Layer, Linear, Logistic)
from torch.nn import functional as F # activation function (ReLU, Sigmoid, SoftMax)
import torch.utils.data as td # tools to create & load dataset
import torchvision as tv # torchvision library for computer vision tasks
import pandas as pd # library for data analysis, manipulation -> data augmentation
from PIL import Image # library used for image handling
from matplotlib import pyplot as plt # plotting library to plot & visualize results

In [38]:
print(f'PyTorch Version: {torch.__version__}')

device = 'cuda' if torch.cuda.is_available() else 'cpu'

print(device)
print(torch.cuda.get_device_name())

try: 
    print(f'CUDA Version: {torch.version.cuda}')
except:
    pass

PyTorch Version: 2.5.1+cu121
cuda
NVIDIA GeForce RTX 4070 Laptop GPU
CUDA Version: 12.1


## 2. Loading & Processing Dataset

### Dataset Location

In [31]:
dataset_root_dir = 'E:/Year 3 Sem 1/COS30082 Applied Machine Learning/Assignment 1/'

### Data Processing 

In [None]:
class BirdDataset(td.Dataset): #td.Dataset is a class from td (torch.utils.data)

    # Constructor [Param: root file, train (training or testing dataset), download (online? if local x exist), target & label transform]

    # Custom Dataset uses different Constructor [Param up to dev]
    def __init__(self, root_dir, mode="train", image_size=(224,224) ):
        super().__init__() # Call Parent __init__ function, ensure proper inheritance. (Newed Python)
        self.image_size = image_size # Create new variable to store current self image_size 
        self.mode = mode

        # Loading .txt file (train.txt)
        txt_path = os.path.join (root_dir, f'{mode}.txt')
        print("Text File:", txt_path)
        
        self.data = pd.read_csv(
            txt_path,
            header = None,  # header = None means theres no header in the file
            sep = " ",  
            names = ["file_path", "class"]
        ) 
        
        # Loading Dataset from Train Folder     
        self.images_dir = os.path.join(root_dir, "Train")
        ## Temp: Should Return "E:\Year 3 Sem 1\COS30082 Applied Machine Learning\Assignment 1\Dataset\Train"
    
    # Return No. of Data Images
    def __len__(self): # __len__ is a method with built-in len() function
        print("Dataset Length:", len(self.data))
        return len(self.data)

    
    
    # Return Init Configuration
    def __repr__(self): 
        # __repr__ is a method to represent an object as a string. Images have x and y. Not using repr would result in the return of the memory location instead ( 0x10e104570)
        # Can represent other things too from a memory location
        
        return "BirdDataset: (mode='{}', image_size={})".format(self.mode, self.image_size)

    # Preparing the Images
    def __getitem__(self,idx):
        # Method automatically called, Allows class to behave like list or arrays
        # Used to call and "get" datasets in the form of image and label
        # Index 'idx'

        # Image Path
        img_path = os.path.join(self.images_dir, self.data.iloc[idx]['file_path'])
        ## Temp: Should Return "E:\Year 3 Sem 1\COS30082 Applied Machine Learning\Assignment 1\Dataset\Train" + "Specific Image Location"

        # Load Image
        img = Image.open(img_path)

        # Transformation
        transform = tv.transforms.Compose([
            # Resize the Image
            tv.transforms.Resize(self.image_size),

            # Convert it to Tensor
            tv.transforms.ToTensor(),

            # Normalize it to the standard range (1, -1)
            tv.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))

            # C, H and W [Channel(Most common color channels), Height, Width]
            # 1st tuple -> Mean for (R,G,B), 2nd tuple -> S.T.D for (R,G,B)
        ])

        # Processed Image    
        x = transform(img)

        # Get Class Label from Train.txt file
        d = self.data.iloc[idx]['class']
        
        return x, d 
        
    def number_of_classes(self):
        return self.data['class'].max() + 1
    # Max is used to find the largest class number (Classes are represented by numbers)
    # Max + 1 because the index starts from 0 (To Compensate).
        

### View Modified Dataset