# Hypertuning experiment

## Importing libraries

In [1]:
from pathlib import Path
import gin
import numpy as np
import torch
from typing import List
from torch.nn.utils.rnn import pad_sequence
from mltrainer import rnn_models, Trainer
from torch import optim
from mads_datasets import datatools

  from .autonotebook import tqdm as notebook_tqdm
2024-06-04 11:17:27,001	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


## Loading gesture data

In [2]:
from mads_datasets import DatasetFactoryProvider, DatasetType
from mltrainer.preprocessors import PaddedPreprocessor
preprocessor = PaddedPreprocessor()

gesturesdatasetfactory = DatasetFactoryProvider.create_factory(DatasetType.GESTURES)
streamers = gesturesdatasetfactory.create_datastreamer(batchsize=32, preprocessor=preprocessor)
train = streamers["train"]
valid = streamers["valid"]
trainstreamer = train.stream()
validstreamer = valid.stream()

[32m2024-06-04 11:21:05.244[0m | [1mINFO    [0m | [36mmads_datasets.base[0m:[36mdownload_data[0m:[36m121[0m - [1mFolder already exists at C:\Users\jelle\.cache\mads_datasets\gestures[0m
100%|[38;2;30;71;6m██████████[0m| 2600/2600 [00:02<00:00, 1205.39it/s]
100%|[38;2;30;71;6m██████████[0m| 651/651 [00:00<00:00, 1291.06it/s]


## Creating model logging folder

In [3]:
import mlflow
from datetime import datetime

mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("gestures")
modeldir = Path("gestures_experiments/").resolve()
modeldir.exists()

True

## Experiment 1: Optimizers & Learning rates

In [4]:
gin.parse_config_file("gestures_gru.gin")
model = rnn_models.GRUmodel()
loss_fn = torch.nn.CrossEntropyLoss()
learning_rates = [0.01, 0.001, 0.0001]
optimizers = [optim.Adam, optim.SGD, optim.RMSprop]

In [8]:
# from mltrainer import TrainerSettings, ReportTypes
# from mltrainer.metrics import Accuracy

# for current_learning_rate in learning_rates:
    
#     for current_optimizer in optimizers:
#         accuracy = Accuracy()
#         settings = TrainerSettings(
#             optimizer_kwargs={'lr': current_learning_rate, 'weight_decay': 1e-05},
#             epochs=50,
#             metrics=[accuracy],
#             logdir=Path("gestures_experiments"),
#             train_steps=len(train),
#             valid_steps=len(valid),
#             reporttypes=[ReportTypes.GIN, ReportTypes.TENSORBOARD, ReportTypes.MLFLOW],
#             earlystop_kwargs={'save': True, 'verbose': True, 'patience': 10}
#         )
    
#         trainer = Trainer(
#             model=model,
#             settings=settings,
#             loss_fn=loss_fn,
#             optimizer=current_optimizer,
#             traindataloader=trainstreamer,
#             validdataloader=validstreamer,
#             scheduler=optim.lr_scheduler.ReduceLROnPlateau,
#             device='cpu',
#         )

#         trainer.loop()

#         tag = datetime.now().strftime("%Y%m%d-%H%M")
#         modelpath = modeldir / (tag + "model.pt")
#         torch.save(model, modelpath)

## Experiment 2: Hidden Sizes & Dropout rates

In [6]:
gin_files = []

for i in range(9):
    file_number = i + 1
    file_string = f'configs/experiment_2_{file_number}.gin'
    gin_files.append(file_string)

gin_files

['configs/experiment_2_1.gin',
 'configs/experiment_2_2.gin',
 'configs/experiment_2_3.gin',
 'configs/experiment_2_4.gin',
 'configs/experiment_2_5.gin',
 'configs/experiment_2_6.gin',
 'configs/experiment_2_7.gin',
 'configs/experiment_2_8.gin',
 'configs/experiment_2_9.gin']

In [9]:
# from mltrainer import TrainerSettings, ReportTypes
# from mltrainer.metrics import Accuracy

# accuracy = Accuracy()

# for gin_file in gin_files:
#     gin.parse_config_file(gin_file)
#     print(f"Reading gin-file {gin_file}")
    
#     settings = TrainerSettings(
#         optimizer_kwargs={'lr': 0.01, 'weight_decay': 1e-05},
#         epochs=50,
#         metrics=[accuracy],
#         logdir=Path("gestures_experiments"),
#         train_steps=len(train),
#         valid_steps=len(valid),
#         reporttypes=[ReportTypes.GIN, ReportTypes.TENSORBOARD, ReportTypes.MLFLOW],
#         earlystop_kwargs={'save': True, 'verbose': True, 'patience': 10}
#     )
    
#     trainer = Trainer(
#         model=model,
#         settings=settings,
#         loss_fn=loss_fn,
#         optimizer=optim.Adam,
#         traindataloader=trainstreamer,
#         validdataloader=validstreamer,
#         scheduler=optim.lr_scheduler.ReduceLROnPlateau,
#         device='cpu',
#     )

#     trainer.loop()

#     tag = datetime.now().strftime("%Y%m%d-%H%M")
#     modelpath = modeldir / (tag + "model.pt")
#     torch.save(model, modelpath)
#     print("==========================================================")