# Hyperparameter tuning

## Dataset

In [None]:
# Use the below functionality to execute your model (that you will adjust later step by step)
# This block of code provides you the functionality to train a model. Results are printed after each epoch

import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import tqdm


def load_mnist_data(root_path='./data', batch_size=4):
    """
    Loads MNIST dataset into your directory.
    You can change the root_path to point to a already existing path if you want to safe a little bit of memory :)
    """
    transform = transforms.Compose(
        [transforms.ToTensor(),
        transforms.Normalize((0.5), (0.5))]
    )

    trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)

    testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)
    testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)

    return trainloader, testloader

## Training loops

In [None]:
def train_model(model, optimizer, loss_fn, dataloader):
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

    model = model.to(device)
    model.train()

    running_loss = 0.0
    running_accuracy = []
    for imgs, targets in dataloader:
        imgs, targets = imgs.to(device=device), targets.to(device=device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(imgs.reshape(imgs.shape[0], -1))

        loss = loss_fn(outputs, targets)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()

        # Calculate the Accuracy (how many of all samples are correctly classified?)
        max_outputs = torch.max(outputs, dim=1).indices
        accuracy = (max_outputs.detach() == targets.detach()).to(dtype=torch.float32).mean()
        running_accuracy.append(accuracy)
    
    avg_loss = running_loss / len(dataloader)
    avg_acc = torch.tensor(running_accuracy).mean()
    # print(f'Training iteration finished with loss: {avg_loss:.3f} and accuracy {avg_acc:.3f}')

    return avg_loss, avg_acc


def eval_model(model, loss_fn, dataloader):
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

    model = model.to(device)
    model.eval()

    running_loss = 0.0
    running_accuracy = []
    with torch.no_grad():
        for imgs, targets in dataloader:
            imgs, targets = imgs.to(device=device), targets.to(device=device)

            # forward + backward + optimize
            outputs = model(imgs.reshape(imgs.shape[0], -1))

            loss = loss_fn(outputs, targets)

            # print statistics
            running_loss += loss.item()

            # Calculate the Accuracy (how many of all samples are correctly classified?)
            max_outputs = torch.max(outputs, dim=1).indices
            accuracy = (max_outputs.detach() == targets.detach()).to(dtype=torch.float32).mean()
            running_accuracy.append(accuracy)
    
    avg_loss = running_loss / len(dataloader)
    avg_acc = torch.tensor(running_accuracy).mean()
    # print(f'Evaluation iteration finished with loss: {avg_loss:.3f} and accuracy {avg_acc:.3f}')

    return avg_loss, avg_acc


def operate(model, optimizer, loss_fn, train_dataloader, test_dataloader, epochs):
    t_losses, t_accs = [], []
    e_losses, e_accs = [], []
    for epoch in range(0, epochs):
        t_avg_loss, t_avg_acc = train_model(
            model, optimizer, loss_fn, train_dataloader
        )
        t_losses.append(t_avg_loss)
        t_accs.append(t_accs)

        e_avg_loss, e_avg_acc = eval_model(
            model, loss_fn, test_dataloader
        )
        e_losses.append(e_avg_loss)
        e_accs.append(e_accs)

    return torch.as_tensor(t_losses), torch.as_tensor(t_accs), torch.as_tensor(e_losses), torch.as_tensor(e_accs)

## Define your model

In [None]:
# use two parameters to create your model
# 1) the amount of hidden layers
# 2) the neurons per hidden layer

# we tune those two parameters with Optuna

In [None]:
from torch.nn import Linear

def build_model(trial):
    layers = []

    n_layers = trial.suggest_int(...)
    n_neurons_per_layer = trial.suggest_int(...)

    return # your_model

## Define your objective with optuna hyperparameter tuning

In [None]:
# find the best hyperparameters for
# 1) the amount of hidden layers
# 2) the neurons per hidden layer
# 3) batch size
# 4) learning rate

DEVICE = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
loss_fn = 
EPOCHS = 10

In [None]:
import optuna
from torch.optim import Adam
import tqdm


def objective(trial):
    model = build_model(trial).to(DEVICE)

    lr = trial.suggest_float
    batch_size = trial.suggest_categorical

    optimizer = # your optimizer

    train_loader, test_loader = load_mnist_data(batch_size=batch_size)

    for epoch in tqdm.tqdm(range(EPOCHS), desc='Iterating epoch'):
        # training and evaluation of your model

    
    return # the evaluation accuracy after ith epoch


## Run the optuna study

In [None]:
from optuna.trial import TrialState

# Add stream handler of stdout to show the messages
study_name = "example-study"  # Unique identifier of the study.
storage_name = "sqlite:///{}.db".format(study_name)
study = optuna.create_study(study_name=study_name, storage=storage_name, direction='maximize')

study.optimize(objective, n_trials=15)

pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

print("Study statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

## Open the optuna trial with optuna-dashboard