## Hyperparameter Tuning

The hyperparameters in deep learning to tune are the number of neurons, activation function, optimiser, learning rate, batch size, and epochs. 
The second step is to tune the number of layers

References:
- https://docs.ray.io/en/latest/tune/examples/includes/async_hyperband_example.html
- https://docs.ray.io/en/latest/tune/examples/tune-pytorch-cifar.html 
- https://docs.ray.io/en/latest/tune/examples/includes/mnist_pytorch.html

In [1]:
from typing import Dict, Any

import os
from filelock import FileLock
from tqdm import tqdm
import gdown
import zipfile

import numpy as np
import torch
import torch.optim as optim
import torch.nn as nn
from torch.utils.data import random_split
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from torchvision.models import efficientnet_b5, EfficientNet_B5_Weights

from ray import train, tune
from ray.tune.schedulers import ASHAScheduler

print("Libraries imported. Using device:", "cuda" if torch.cuda.is_available() else "cpu")

Libraries imported. Using device: cpu


## Data Load

In [2]:
if not os.path.exists("../input/train_images_5_class"):
    os.makedirs("../input/train_images_5_class")
    output_path = "../input/train_images_5_class.zip"
    gdown.download(f"https://drive.google.com/uc?id=1EfxHr8S4llUwu8P36Frq3h6XSBzrsHvp", output_path, quiet=False)
    with zipfile.ZipFile(output_path, 'r') as zip_ref:
        zip_ref.extractall("../input")

In [9]:
def download_dataset(data_dir: str, zip_url: str, zip_filename: str, extract_dir: str) -> None:
    """
    Download and extract the dataset from Google Drive if it doesn't exist.

    Args:
        data_dir (str): Directory where the dataset zip file will be stored.
        zip_url (str): URL of the dataset zip file.
        zip_filename (str): Name for the downloaded zip file.
        extract_dir (str): Directory where the dataset will be extracted.
    """
    # Create the data directory if it doesn't exist.
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)
        print(f"Created directory: {data_dir}")

    # Check if the dataset is already extracted.
    if not os.path.exists(extract_dir):
        zip_path = os.path.join(data_dir, zip_filename)
        print(f"Downloading dataset from {zip_url} to {zip_path}")
        gdown.download(zip_url, zip_path, quiet=False)
        print("Extracting dataset...")
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            zip_ref.extractall(data_dir)
        print(f"Extraction complete. Dataset available at {extract_dir}")
    else:
        print(f"Dataset already exists at {extract_dir}")

def get_data_loaders(
    data_dir: str,
    transform: transforms.Compose,
    train_split: float = 0.7,
    val_split: float = 0.15,
    batch_size: int = 32,
    random_seed: int = 42
) -> (DataLoader, DataLoader):
    """
    Create training and validation DataLoaders from an ImageFolder dataset.

    Args:
        data_dir (str): Root directory of the dataset.
        transform (transforms.Compose): Transformations to apply.
        train_split (float): Proportion for training.
        val_split (float): Proportion for validation.
        batch_size (int): Batch size.
        random_seed (int): Random seed for splitting.

    Returns:
        Tuple[DataLoader, DataLoader]: Training and validation DataLoader objects.
    """
    dataset = datasets.ImageFolder(root=data_dir, transform=transform)
    total_len = len(dataset)
    train_len = int(train_split * total_len)
    val_len = int(val_split * total_len)
    # Optionally, the remainder can be used for test if needed.
    # test_len = int(total_len - train_len - val_len)

    # Split the dataset
    train_dataset, val_dataset = random_split(
        dataset,
        [train_len, val_len],
        generator=torch.Generator().manual_seed(random_seed)
    )

    # Create DataLoaders.
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    
    return train_loader, val_loader

In [10]:
DATA_DIR = "../input/train_images_5_class"
ZIP_URL = "https://drive.google.com/uc?id=1EfxHr8S4llUwu8P36Frq3h6XSBzrsHvp"
ZIP_FILENAME = "train_images_5_class.zip"
EXTRACT_DIR = DATA_DIR

weights = EfficientNet_B5_Weights.DEFAULT
transform = transforms.Compose([
    transforms.Resize((456, 456)),
    transforms.ToTensor(),
    weights.transforms()  # Normalization as expected by EfficientNetB5.
])

download_dataset(DATA_DIR, ZIP_URL, ZIP_FILENAME, EXTRACT_DIR)

train_loader, val_loader = get_data_loaders(
    data_dir=EXTRACT_DIR,
    transform=transform,
    train_split=0.7,
    val_split=0.15,
    batch_size=32,
    random_seed=42
)

print("Train and Validation DataLoaders are ready.")

Created directory: ../input/train_images_5_class
Dataset already exists at ../input/train_images_5_class


FileNotFoundError: Couldn't find any class folder in ../input/train_images_5_class.