# Hypertune cifar10

In [None]:
DATADIR = ("data/raw/cifar10/")
TUNEDIR = ("hypertune")
from loguru import logger
import ray


In [None]:
# from torchvision import datasets, transforms
# from loguru import logger
from pathlib import Path

# # Ensure the data directory exists before downloading dataset
data_dir = Path(DATADIR).resolve()
if not data_dir.exists():
    data_dir.mkdir(parents=True)
    logger.info(f"Created {data_dir}")

tune_dir = Path(TUNEDIR).resolve()
if not tune_dir.exists():
    tune_dir.mkdir(parents=True)
    logger.info(f"Created {tune_dir}")

# # Create transformer to convert images to tensors
# transformer = transforms.Compose([transforms.ToTensor()])

# # Download CIFAR10 dataset
# train_dataset = datasets.CIFAR10(root=data_dir, train=True, download=True, transform=transformer)
# test_dataset = datasets.CIFAR10(root=data_dir, train=False, download=True, transform=transformer)

# logger.info(
#     f"Dataset is now available:\n"
#     f"TRAIN: {train_dataset}\n"
#     f"TEST: {test_dataset}"
# )

In [None]:
# train_dataset.classes

In [None]:
# from torch.utils.data import DataLoader

# # Create data loaders for training and testing
# train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
# test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# # Inspect the shape of a batch of training data
# for images, labels in train_loader:
#     logger.info(f"Image batch dimensions: {images.shape}")
#     logger.info(f"Image label dimensions: {labels.shape}")
#     break

In [None]:
def get_data_loaders(batch_size, data_dir):
    from filelock import FileLock
    from torchvision import datasets, transforms
    from torch.utils.data import DataLoader

    with FileLock(data_dir / ".lock"):
        # Create transformer to convert images to tensors
        transformer = transforms.Compose([transforms.ToTensor()])

        # Download CIFAR10 dataset
        train_dataset = datasets.CIFAR10(root=data_dir, train=True, download=True, transform=transformer, )
        test_dataset = datasets.CIFAR10(root=data_dir, train=False, download=True, transform=transformer)

        # Create data loaders for training and testing
        train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)
        test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=4)

    return train_loader, test_loader

In [None]:
import torch
from pathlib import Path

config = {
    # Fixed parameters
    "epochs": 5,
    "data_dir": Path(DATADIR).resolve(),
    "batch_size": 64,
    "input_size": 3,
    "output_size": 20,
    "hidden_size": 128,
    "dropout": 0,
    "num_layers": 5,
    "learning_rate": 0.001,
    "loss_fn": torch.nn.CrossEntropyLoss(), # suitable for multi-class classification
    "optimizer": torch.optim.Adam,
    # "scheduler": torch.optim.lr_scheduler.LRScheduler,
    "scheduler": torch.optim.lr_scheduler.ReduceLROnPlateau,
    "metrics": "accuracy",
    "device": "cuda" if torch.cuda.is_available() else "cpu", 
}

In [None]:
# Setup for simple neural network with learning curve plotting

import torch.nn as nn
from torchsummary import summary
import matplotlib.pyplot as plt

class SimpleNN(nn.Module):
    """
    A simple feedforward neural network for image classification.

    Args:
        input_size (int): Number of input channels (e.g., 3 for RGB images).
        hidden_size (int): Number of units in hidden layers.
        output_size (int): Number of output classes.
        dropout (float): Dropout probability for regularization.
        num_layers (int): Number of hidden layers.

    Methods:
        forward(x): Forward pass through the network.
        summary(): Prints a summary of the network architecture.
        plot_learning_curve(): Plots the learning curve (loss and accuracy).
    """
    def __init__(self, input_size, hidden_size, output_size, dropout, num_layers, use_residual=False, use_batchnorm=False):
        super(SimpleNN, self).__init__()
        self.input_size = input_size
        self.use_residual = use_residual
        self.use_batchnorm = use_batchnorm
        # For learning curve
        self.train_losses = []
        self.val_losses = []
        self.train_accuracies = []
        self.val_accuracies = []

        layers = []
        # Flatten input image
        layers.append(nn.Flatten())
        # First linear layer from input to hidden
        layers.append(nn.Linear(input_size * 32 * 32, hidden_size))
        if self.use_batchnorm:
            layers.append(nn.BatchNorm1d(hidden_size))
        layers.append(nn.ReLU())
        layers.append(nn.Dropout(dropout))

        # Additional hidden layers with optional residual and batchnorm
        for i in range(num_layers - 1):
            linear = nn.Linear(hidden_size, hidden_size)
            block = [linear]
            if self.use_batchnorm:
                block.append(nn.BatchNorm1d(hidden_size))
            block.append(nn.ReLU())
            block.append(nn.Dropout(dropout))
            if self.use_residual:
                # Residual block as a custom nn.Module
                block = [ResidualBlock(hidden_size, block)]
            layers.extend(block)

        # Output layer
        layers.append(nn.Linear(hidden_size, output_size))
        self.network = nn.Sequential(*layers)


    def forward(self, x):
        """
        Forward pass of the network.

        Args:
            x (torch.Tensor): Input tensor of shape (batch_size, input_size, 32, 32).

        Returns:
            torch.Tensor: Output logits.
        """
        return self.network(x) 

    def summary(self):
        """
        Prints a summary of the network architecture using torchsummary.
        """
        summary(self.network, (self.input_size, 32, 32))

    def plot_learning_curve(self):
        """
        Plots the learning curve (loss and accuracy).
        """
        epochs = range(1, len(self.train_losses) + 1)
        fig, ax1 = plt.subplots()
        ax1.plot(epochs, self.train_losses, 'b-', label='Train Loss')
        ax1.plot(epochs, self.val_losses, 'r-', label='Val Loss')
        ax1.set_xlabel('Epoch')
        ax1.set_ylabel('Loss')
        ax1.legend(loc='upper left')
        ax2 = ax1.twinx()
        ax2.plot(epochs, self.train_accuracies, 'b--', label='Train Acc')
        ax2.plot(epochs, self.val_accuracies, 'r--', label='Val Acc')
        ax2.set_ylabel('Accuracy')
        ax2.legend(loc='upper right')
        plt.title('Learning Curve')
        plt.show()

class ResidualBlock(nn.Module):
    def __init__(self, hidden_size, block_layers):
        super().__init__()
        self.block = nn.Sequential(*block_layers)
        self.hidden_size = hidden_size

    def forward(self, x):
        return x + self.block(x)


model_NN = SimpleNN(
    input_size=config["input_size"],
    hidden_size=config["hidden_size"],
    output_size=config["output_size"],
    dropout=config["dropout"],
    num_layers=config["num_layers"]
)
model_NN_with_residual = SimpleNN(
    input_size=config["input_size"],
    hidden_size=config["hidden_size"],
    output_size=config["output_size"],
    dropout=config["dropout"],
    num_layers=config["num_layers"],
    use_residual=True
)
model_NN_with_batchnorm = SimpleNN(
    input_size=config["input_size"],
    hidden_size=config["hidden_size"],
    output_size=config["output_size"],
    dropout=config["dropout"],
    num_layers=config["num_layers"],
    use_batchnorm=True
)

# Show a summary of the model architecture
model_NN.summary(); model_NN_with_residual.summary(); model_NN_with_batchnorm.summary()

In [None]:
from tqdm import tqdm

def train_one_epoch(model, train_loader, optimizer, loss_fn, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for images, labels in tqdm(train_loader, desc="Training"):
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = loss_fn(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    avg_loss = running_loss / len(train_loader)
    accuracy = 100 * correct / total
    return avg_loss, accuracy

def evaluate(model, test_loader, loss_fn, device):
    model.eval()
    test_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = loss_fn(outputs, labels)
            test_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    avg_loss = test_loss / len(test_loader)
    accuracy = 100 * correct / total
    return avg_loss, accuracy

def train_and_evaluate(model, config, logger):
    train_loader, test_loader = get_data_loaders(config["batch_size"], config["data_dir"])
    device = config["device"]
    model.to(device)
    optimizer = config["optimizer"](model.parameters(), lr=config["learning_rate"])
    loss_fn = config["loss_fn"]
    num_epochs = config["epochs"]

    for epoch in range(num_epochs):
        train_loss, train_accuracy = train_one_epoch(model, train_loader, optimizer, loss_fn, device)
        val_loss, val_accuracy = evaluate(model, test_loader, loss_fn, device)

        model.train_losses.append(train_loss)
        model.val_losses.append(val_loss)
        model.train_accuracies.append(train_accuracy)
        model.val_accuracies.append(val_accuracy)

        logger.info(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Test Loss: {val_loss:.4f}, Train Acc: {train_accuracy:.2f}%, Test Acc: {val_accuracy:.2f}%")
    
    return model.train_losses[-1], model.val_losses[-1], model.val_accuracies[-1]


In [None]:
# train_and_evaluate(model_NN, config, logger)
# model_NN.plot_learning_curve()

In [None]:
# train_and_evaluate(model_NN_with_residual, config, logger)
# model_NN_with_residual.plot_learning_curve()

In [None]:
# train_and_evaluate(model_NN_with_batchnorm, config, logger)

In [None]:
# from mltrainer import Trainer, ReportTypes, TrainerSettings

# model_NN2 = SimpleNN(
#     input_size=config["input_size"],
#     hidden_size=config["hidden_size"],
#     output_size=config["output_size"],
#     dropout=config["dropout"],
#     num_layers=config["num_layers"]
# )

# model_NN2.to(device)

# trainer = Trainer(
#     model=model_NN2,
#     settings=TrainerSettings(
#         epochs=config["epochs"],
#         metrics=[config["metrics"]],
#         logdir=Path("./logs"),
#         train_steps=len(train_loader),
#         valid_steps=len(test_loader),
#         reporttypes=[ReportTypes.TOML],
#         scheduler_kwargs={"patience": 5},
#         earlystop_kwargs={"patience": 5},
#     ),
#     loss_fn=config["loss_fn"],
#     optimizer=torch.optim.Adam,
#     traindataloader=train_loader,
#     validdataloader=test_loader,
#     scheduler=config["scheduler"],
#     device=device,
# )
# trainer.loop()

In [None]:
config2 = {
    # Fixed parameters
    "epochs": 5,
    "data_dir": Path(DATADIR).resolve(),
    "batch_size": 64,
    "input_size": 3,
    "output_size": 20,
    "hidden_size": 128,
    "dropout": 0,
    "num_fully_connected_layers": 2,
    "learning_rate": 0.001,
    "loss_fn": torch.nn.CrossEntropyLoss(), # suitable for multi-class classification
    "optimizer": torch.optim.Adam,
    # "scheduler": torch.optim.lr_scheduler.LRScheduler,
    "scheduler": torch.optim.lr_scheduler.ReduceLROnPlateau,
    "metrics": "accuracy",
    "device": "cuda" if torch.cuda.is_available() else "cpu",
    
    # convolutional layer parameters
    "num_conv_layers": 3,
    "filters": 64,
    "kernel_size": 3,
    "stride": 1,
    "padding": 1,
    
}

In [None]:
# Setup for simple neural network
import torch.nn as nn
from torchsummary import summary

class SimpleCNN(nn.Module):
    """
    A simple Convolutional Neural Network (CNN) for image classification.
    Args:
        input_size (int): Number of input channels (e.g., 3 for RGB images).
        hidden_size (int): Number of units in the fully connected hidden layer.
        output_size (int): Number of output classes.
        dropout (float): Dropout probability for regularization.
        num_conv_layers (int): Number of convolutional layers.
        filters (int): Number of filters in each convolutional layer.
        kernel_size (int): Size of the convolutional kernels.
        stride (int): Stride for the convolutional layers.
        padding (int): Padding for the convolutional layers.
        num_fully_connected_layers (int): Number of fully connected layers.
    Methods:
        forward(x): Forward pass through the network.
        summary(): Prints a summary of the network architecture.
        plot_learning_curve(): Plots the training/validation loss and accuracy curves.
    """
    def __init__(self, input_size, hidden_size, output_size, dropout, num_conv_layers, filters, kernel_size, stride, padding, num_fully_connected_layers):
        super(SimpleCNN, self).__init__()
        self.input_size = input_size
        # For learning curve
        self.train_losses = []
        self.val_losses = []
        self.train_accuracies = []
        self.val_accuracies = []

        layers = []
        in_channels = input_size

        # Add convolutional layers
        for _ in range(num_conv_layers):
            layers.append(nn.Conv2d(in_channels, filters, kernel_size, stride, padding))
            layers.append(nn.ReLU())
            layers.append(nn.MaxPool2d(2))
            in_channels = filters

        layers.append(nn.Flatten())

        # Dynamically compute feature size after conv/pool layers
        with torch.no_grad():
            dummy = torch.zeros(1, input_size, 32, 32)
            for layer in layers:
                dummy = layer(dummy)
            feature_size = dummy.shape[1]

        # Add the first fully connected layer
        layers.append(nn.Linear(feature_size, hidden_size))
        layers.append(nn.ReLU())
        layers.append(nn.Dropout(dropout))

        # Add additional fully connected layers if num_fully_connected_layers > 1
        for _ in range(num_fully_connected_layers - 1):
            layers.append(nn.Linear(hidden_size, hidden_size))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout))

        # Output layer
        layers.append(nn.Linear(hidden_size, output_size))

        self.network = nn.Sequential(*layers)

    def forward(self, x):
        """
        Forward pass of the network.
        Args:
            x (torch.Tensor): Input tensor of shape (batch_size, input_size, 32, 32).
        Returns:
            torch.Tensor: Output logits.
        """
        return self.network(x) 

    def summary(self):
        """
        Prints a summary of the network architecture using torchsummary.
        """
        summary(self.network, (self.input_size, 32, 32))
    
    def plot_learning_curve(self):
        """
        Plots the learning curve (loss and accuracy).
        """
        import matplotlib.pyplot as plt
        epochs = range(1, len(self.train_losses) + 1)
        fig, ax1 = plt.subplots()
        ax1.plot(epochs, self.train_losses, 'b-', label='Train Loss')
        ax1.plot(epochs, self.val_losses, 'r-', label='Val Loss')
        ax1.set_xlabel('Epoch')
        ax1.set_ylabel('Loss')
        ax1.legend(loc='upper left')
        ax2 = ax1.twinx()
        ax2.plot(epochs, self.train_accuracies, 'b--', label='Train Acc')
        ax2.plot(epochs, self.val_accuracies, 'r--', label='Val Acc')
        ax2.set_ylabel('Accuracy')
        ax2.legend(loc='upper right')
        plt.title('Learning Curve')
        plt.show()

model_CNN = SimpleCNN(
    input_size=config2["input_size"],
    hidden_size=config2["hidden_size"],
    output_size=config2["output_size"],
    dropout=config2["dropout"],
    num_conv_layers=config2["num_conv_layers"],
    filters=config2["filters"],
    kernel_size=config2["kernel_size"],
    stride=config2["stride"],
    padding=config2["padding"],
    num_fully_connected_layers=config2["num_fully_connected_layers"]
)

# Show a summary of the model architecture
model_CNN.summary(); model_CNN.summary

In [None]:
# train_and_evaluate(model_CNN, config2, logger)
# model_CNN.plot_learning_curve()

In [None]:
# from mltrainer import Trainer, ReportTypes, TrainerSettings, metrics
# model_CNN = SimpleNN(
#     input_size=config2["input_size"],
#     hidden_size=config2["hidden_size"],
#     output_size=config2["output_size"],
#     dropout=config2["dropout"],
#     num_layers=config2["num_layers"]
# )

# model_CNN.to(config2["device"])

# trainer = Trainer(
#     model=model_CNN,
#     settings=TrainerSettings(
#         epochs=config2["epochs"],
#         metrics=[
#             metrics.Accuracy()
#         ],
#         logdir=Path("./logs"),
#         train_steps=len(train_loader),
#         valid_steps=len(test_loader),
#         reporttypes=[ReportTypes.TOML],
#         scheduler_kwargs={"patience": 5},
#         earlystop_kwargs={"patience": 5},
#     ),
#     loss_fn=config2["loss_fn"],
#     optimizer=torch.optim.Adam,
#     traindataloader=train_loader,
#     validdataloader=test_loader,
#     scheduler=config2["scheduler"],
#     device=config2["device"],
# )
# trainer.loop()

In [None]:
from ray import tune

def train_and_evaluate_ray(config: dict):
    model = SimpleCNN(
        input_size=config["input_size"],
        hidden_size=config["hidden_size"],
        output_size=config["output_size"],
        dropout=config["dropout"],
        num_conv_layers=config["num_conv_layers"],
        filters=config["filters"],
        kernel_size=config["kernel_size"],
        stride=config["stride"],
        padding=config["padding"],
        num_fully_connected_layers=config["num_fully_connected_layers"]
    )
    train_loader, test_loader = get_data_loaders(config["batch_size"], config["data_dir"])
    device = config["device"]
    model.to(device)
    optimizer = config["optimizer"](model.parameters(), lr=config["learning_rate"])
    loss_fn = config["loss_fn"]
    num_epochs = config["epochs"]

    for epoch in range(num_epochs):
        train_loss, train_accuracy = train_one_epoch(model, train_loader, optimizer, loss_fn, device)
        val_loss, val_accuracy = evaluate(model, test_loader, loss_fn, device)

        model.train_losses.append(train_loss)
        model.val_losses.append(val_loss)
        model.train_accuracies.append(train_accuracy)
        model.val_accuracies.append(val_accuracy)
        
        ray.train.report({
            "train_loss": train_loss,
            "val_loss": val_loss,
            "train_accuracy": train_accuracy,
            "val_accuracy": val_accuracy
        })
        # logger.info(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Test Loss: {val_loss:.4f}, Train Acc: {train_accuracy:.2f}%, Test Acc: {val_accuracy:.2f}%")
    
    return model.train_losses[-1], model.val_losses[-1], model.val_accuracies[-1]

config_structure = {
    
    # Fixed parameters
    "epochs": 3,
    "data_dir": Path(DATADIR).resolve(),
    "tune_dir": Path(TUNEDIR).resolve(),
    "batch_size": 64,
    "input_size": 3,
    "output_size": 20,
    "hidden_size": 350,
    "dropout": 0,
    "num_fully_connected_layers": tune.grid_search([2,4]),
    "learning_rate": 0.001,
    "loss_fn": torch.nn.CrossEntropyLoss(), # suitable for multi-class classification
    "optimizer": torch.optim.Adam,
    # "scheduler": torch.optim.lr_scheduler.LRScheduler,
    "scheduler": torch.optim.lr_scheduler.ReduceLROnPlateau,
    "metrics": "accuracy",
    "device": "cuda" if torch.cuda.is_available() else "cpu",
    
    # convolutional layer parameters
    "num_conv_layers": tune.grid_search([2,3,4]),
    "filters": tune.grid_search([64,128,152]),
    "kernel_size": tune.grid_search([2,3]),
    "stride": 1,
    "padding": 0,
}

analysis = tune.run(
    train_and_evaluate_ray,
    config=config_structure,
    name="cnn_hyperparameter_gridsearch",
    metric="val_loss",
    mode="min",
    storage_path=str(config_structure["tune_dir"]),  # ensure path is string
    stop={"training_iteration": config_structure["epochs"]},
    verbose=1,
    resume=True   
)

In [None]:
from plotly import graph_objects as go

def plot_contour(df, x, y, z, start=None, end=None, size=None, colorscale="plasma", colorbar_title=None):
    # Set defaults for contour range if not provided
    z_min, z_max = df[z].min(), df[z].max()
    if start is None:
        start = z_min
    if end is None:
        end = z_max
    if size is None:
        size = (end - start) / 20 if end > start else 0.01
    if colorbar_title is None:
        colorbar_title = z

    fig = go.Figure()

    fig.add_trace(
        go.Contour(
            z=df[z],
            x=df[x],
            y=df[y],
            contours=dict(
                coloring='heatmap',
                showlabels=True,  # show labels on contouz_max
                start=start,       # start of the contour range
                end=end,          # end of the contour range
                size=size,
            ),
            colorscale="plasma",
            colorbar=dict(
                title='Accuracy'
            )
        )
    )

    fig.add_trace(
        go.Scatter(
            x=df[x],
            y=df[y],
            mode='markers',
            marker=dict(
                color='black',
                size=8,
                symbol='circle'
            ),
            customdata=df[z],
            hovertemplate=(
                f'{x}: %{{x}}<br>'
                f'{y}: %{{y}}<br>'
                f'{z}: %{{customdata:.4f}}<extra></extra>'
            ),
            name='Data Points'
        )
    )

    fig.update_layout(
        title=f"Contour Plot of {z} by {x} and {y}",
        xaxis_title=x,
        yaxis_title=y,
        xaxis=dict(showgrid=False),
        yaxis=dict(showgrid=False),
        plot_bgcolor='white',
        paper_bgcolor='white'
    )

    return fig

In [None]:
results_df = analysis.results_df
results_df.info()  

In [None]:
img_kernels = plot_contour(
    analysis.results_df,
    x="config/kernel_size",
    y="config/filters",
    z="val_accuracy",
)
img_kernels.show()

In [None]:
img_layers = plot_contour(
    analysis.results_df,
    x="config/num_conv_layers",
    y="config/num_fully_connected_layers",
    z="val_accuracy",
)
img_layers.show()

In [None]:
img_fc_units = plot_contour(
    analysis.results_df,
    x="config/hidden_size",
    y="config/num_fully_connected_layers",
    z="val_accuracy",
)
img_fc_units.show()

In [None]:
results_df.sort_values("val_accuracy", ascending=False)

In [None]:
from ray.tune.schedulers import AsyncHyperBandScheduler

config_hyperband = {
    # Fixed parameters
    "epochs": 10,
    "data_dir": Path(DATADIR).resolve(),
    "tune_dir": Path(TUNEDIR).resolve(),
    "batch_size": 64,
    "input_size": 3,
    "output_size": 20,
    "hidden_size": tune.randint(254, 512),
    "dropout": 0,
    "num_fully_connected_layers": 2,
    "learning_rate": 0.001,
    "loss_fn": torch.nn.CrossEntropyLoss(), # suitable for multi-class classification
    "optimizer": torch.optim.Adam,
    "scheduler": torch.optim.lr_scheduler.ReduceLROnPlateau,
    "metrics": "accuracy",
    "device": "cuda" if torch.cuda.is_available() else "cpu",
    # convolutional layer parameters
    "num_conv_layers": tune.grid_search([3]),
    "filters": tune.randint(100, 200),
    "kernel_size": tune.randint(2, 3),
    "stride": 1,
    "padding": tune.grid_search([0, 1]),  # typical options for padding
}

# Create an AsyncHyperBandScheduler for efficient hyperparameter search
scheduler_hyperband = AsyncHyperBandScheduler(
    time_attr="training_iteration",  # attribute that tracks training progress
    grace_period=3,                  # minimum number of iterations before stopping trials
    reduction_factor=3,              # controls how aggressively to cut underperforming trials
    max_t=config_hyperband["epochs"] # maximum number of training iterations
)

analysis = tune.run(
    train_and_evaluate_ray,
    config=config_hyperband,
    metric="val_loss",
    mode="min",
    storage_path=str(config_structure["tune_dir"]),  # ensure path is string
    num_samples=20,
    stop={"training_iteration": config_structure["epochs"]},
    verbose=1,
    scheduler=scheduler_hyperband,
    resume=True
)


In [None]:
best = analysis.get_best_config()
best

In [None]:
best.plot_learning_curve()