In [1]:
from pathlib import Path
import gin
import numpy as np
import torch
from typing import List
from torch.nn.utils.rnn import pad_sequence
from mltrainer import rnn_models, Trainer
from torch import optim

from mads_datasets import datatools

# 1 Iterators
We will be using an interesting dataset. [link](https://tev.fbk.eu/resources/smartwatch)

From the site:
> The SmartWatch Gestures Dataset has been collected to evaluate several gesture recognition algorithms for interacting with mobile applications using arm gestures. Eight different users performed twenty repetitions of twenty different gestures, for a total of 3200 sequences. Each sequence contains acceleration data from the 3-axis accelerometer of a first generation Sony SmartWatch™, as well as timestamps from the different clock sources available on an Android device. The smartwatch was worn on the user's right wrist. 


In [79]:
from mads_datasets import DatasetFactoryProvider, DatasetType
from mltrainer.preprocessors import PaddedPreprocessor
preprocessor = PaddedPreprocessor()

gesturesdatasetfactory = DatasetFactoryProvider.create_factory(DatasetType.GESTURES)
streamers = gesturesdatasetfactory.create_datastreamer(batchsize=32, preprocessor=preprocessor)
train = streamers["train"]
valid = streamers["valid"]

[32m2024-12-08 14:43:06.681[0m | [1mINFO    [0m | [36mmads_datasets.base[0m:[36mdownload_data[0m:[36m121[0m - [1mFolder already exists at C:\Users\Francesca\.cache\mads_datasets\gestures[0m
100%|[38;2;30;71;6m██████████████████████████████████████████████████████████████████████████████████████████████[0m| 2600/2600 [00:01<00:00, 1452.10it/s][0m
100%|[38;2;30;71;6m████████████████████████████████████████████████████████████████████████████████████████████████[0m| 651/651 [00:00<00:00, 1558.64it/s][0m


In [80]:
trainstreamer = train.stream()
validstreamer = valid.stream()
x, y = next(iter(trainstreamer))
x.shape, y

(torch.Size([32, 32, 3]),
 tensor([18,  2, 17,  6,  9, 15, 13, 18,  9,  2, 11,  8, 14, 18,  8,  9,  6, 15,
          6, 17,  8, 18,  8,  1, 11, 15, 17, 12,  8, 19, 14, 19]))

In [81]:
from mltrainer import TrainerSettings, ReportTypes
from mltrainer.metrics import Accuracy

accuracy = Accuracy()

settings = TrainerSettings(
    epochs=5,
    metrics=[accuracy],
    logdir=Path("gestures"),
    train_steps=len(train),
    valid_steps=len(valid),
    reporttypes=[ReportTypes.GIN, ReportTypes.TENSORBOARD, ReportTypes.MLFLOW],
    scheduler_kwargs={"factor": 0.5, "patience": 5},
    earlystop_kwargs=None
)
settings

epochs: 5
metrics: [Accuracy]
logdir: gestures
train_steps: 81
valid_steps: 20
reporttypes: [<ReportTypes.GIN: 1>, <ReportTypes.TENSORBOARD: 2>, <ReportTypes.MLFLOW: 3>]
optimizer_kwargs: {'lr': 0.001, 'weight_decay': 1e-05}
scheduler_kwargs: {'factor': 0.5, 'patience': 5}
earlystop_kwargs: None

In [82]:
gin.parse_config_file("gestures_gru.gin")

ParsedConfigFileIncludesAndImports(filename='gestures_gru.gin', imports=['gin.torch.external_configurables'], includes=[])

In [83]:
import torch
if torch.backends.mps.is_available() and torch.backends.mps.is_built():
    device = torch.device("mps")
    print("Using MPS")
elif torch.cuda.is_available():
    device = "cuda:0"
    print("using cuda")
else:
    device = "cpu"
    print("using cpu")

# on my mac, at least for the BaseRNN model, mps does not speed up training
# probably because the overhead of copying the data to the GPU is too high
# however, it might speed up training for larger models, with more parameters
device = "cpu"

using cuda


In [84]:
gin.parse_config_file('gestures_gru.gin')
model = rnn_models.GRUmodel()
gin.get_bindings("GRUmodel")

loss_fn = torch.nn.CrossEntropyLoss()

In [85]:
import mlflow
from datetime import datetime

mlflow.set_tracking_uri(uri="http://127.0.0.1:5000")
mlflow.set_experiment("gestures")
modeldir = Path("../../models/gestures/").resolve()
if not modeldir.exists():
    modeldir.mkdir(parents=True)

gin.parse_config_file("gestures_gru.gin")

with mlflow.start_run():
    mlflow.set_tag("model", "GRUmodel")
    mlflow.set_tag("dev", "francesca")
    mlflow.log_params(gin.get_bindings("GRUmodel"))

    model = rnn_models.GRUmodel()

    trainer = Trainer(
        model=model,
        settings=settings,
        loss_fn=loss_fn,
        optimizer=optim.Adam,
        traindataloader=trainstreamer,
        validdataloader=validstreamer,
        scheduler=optim.lr_scheduler.ReduceLROnPlateau,
        device=device,
    )
    trainer.loop()

    tag = datetime.now().strftime("%Y%m%d-%H%M")
    modelpath = modeldir / (tag + "model.pt")
    torch.save(model, modelpath)

[32m2024-12-08 14:43:29.738[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mdir_add_timestamp[0m:[36m29[0m - [1mLogging to gestures\20241208-144329[0m
  0%|[38;2;30;71;6m                                                                                                              [0m| 0/5 [00:00<?, ?it/s][0m
  0%|[38;2;30;71;6m                                                                                                             [0m| 0/81 [00:00<?, ?it/s][0m[A
  4%|[38;2;30;71;6m███▋                                                                                                 [0m| 3/81 [00:00<00:03, 21.41it/s][0m[A
  7%|[38;2;30;71;6m███████▍                                                                                             [0m| 6/81 [00:00<00:03, 21.55it/s][0m[A
 11%|[38;2;30;71;6m███████████▏                                                                                         [0m| 9/81 [00:00<00:03, 20.88it/s][0m[A
 15%|[38;2;30;71

🏃 View run nosy-lamb-812 at: http://127.0.0.1:5000/#/experiments/2/runs/fca2b746019c45eebb0a4a590fd4730d
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/2





settings --> class TrainerSettings bepaalt epochs, scheduler args, early stopping, report type
```
settings = TrainerSettings(
    epochs=5,
    metrics=[accuracy],
    logdir=Path("gestures"),
    train_steps=len(train),
    valid_steps=len(valid),
    reporttypes=[ReportTypes.GIN, ReportTypes.TENSORBOARD, ReportTypes.MLFLOW],
    scheduler_kwargs={"factor": 0.5, "patience": 5},
    earlystop_kwargs=None
)
```
trainer: class Trainer bepaalt mode, optimization, loss function, scheduler, device
```
trainer = Trainer(
        model=model,
        settings=settings,
        loss_fn=loss_fn,
        optimizer=optim.Adam,
        traindataloader=trainstreamer,
        validdataloader=validstreamer,
        scheduler=optim.lr_scheduler.ReduceLROnPlateau,
        device=device,
    )
```

In [86]:
settings = TrainerSettings(
    epochs=5,
    metrics=[accuracy],
    logdir=Path("gestures"),
    train_steps=len(train),
    valid_steps=len(valid),
    reporttypes=[ReportTypes.GIN, ReportTypes.TENSORBOARD, ReportTypes.MLFLOW],
    scheduler_kwargs={"factor": 0.5, "patience": 5},
    earlystop_kwargs=None
)

In [87]:
gin.parse_config_file('gestures_gru.gin')
model = rnn_models.GRUmodel()
gin.get_bindings("GRUmodel")
loss_fn = torch.nn.CrossEntropyLoss()

In [88]:
print(gin.parse_config_file("gestures_gru.gin"))

ParsedConfigFileIncludesAndImports(filename='gestures_gru.gin', imports=['gin.torch.external_configurables'], includes=[])


In [106]:
num_runs= 3
i = 0
dropout_rate = 0.2 - (i / (num_runs - 1)) * (0.2 - 0.01)
print(f"Run: Using dropout rate = {dropout_rate:.4f}")

Run: Using dropout rate = 0.2000


In [107]:
import mlflow
from datetime import datetime

#mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_tracking_uri(uri="http://127.0.0.1:5000")
mlflow.set_experiment("gestures")
modeldir = Path("../../models/gestures/").resolve()
if not modeldir.exists():
    modeldir.mkdir(parents=True)

gin.parse_config_file("gestures_gru.gin")

num_runs= 3

for i in range(num_runs):
    with mlflow.start_run():
            # Define the model hyperparameters
            dropout_rate = 0.2 - (i / (num_runs - 1)) * (0.2 - 0.01)
            print(f"Run {i+1}: Using dropout rate = {dropout_rate:.4f}")
            params = {
                "dropout_rate": dropout_rate
            }

            mlflow.set_tag("model", "GRUmodel")
            mlflow.set_tag("dev", "francesca")
            mlflow.log_params(gin.get_bindings("GRUmodel"))
            # Log the hyperparameters
            mlflow.log_params(params)
        
            # Log the loss metric
            #mlflow.log_metric("accuracy", accuracy)
        
            # Set a tag that we can use to remind ourselves what this run was for
            mlflow.set_tag("Training Info", "Basic GRU model with dropout decending")
            
            model = rnn_models.GRUmodel()

            # Calculate the dropout rate for this run range 0.5 naar 0.01
            
            model.dropout = param['dropout_rate']
            
    
            trainer = Trainer(
            model=model,
            settings=settings,
            loss_fn=loss_fn,
            optimizer=optim.Adam,
            traindataloader=trainstreamer,
            validdataloader=validstreamer,
            scheduler=optim.lr_scheduler.ReduceLROnPlateau,
            device=device,
        )
            trainer.loop()
        
            tag = datetime.now().strftime("%Y%m%d-%H%M")
            modelpath = modeldir / (tag + "model.pt")
            torch.save(model, modelpath)
            # Log the saved model as an artifact in MLflow
            mlflow.log_artifact(local_path=modelpath, artifact_path="pytorch_models")

[32m2024-12-08 15:09:39.555[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mdir_add_timestamp[0m:[36m29[0m - [1mLogging to gestures\20241208-150939[0m


Run 1: Using dropout rate = 0.2000


  0%|[38;2;30;71;6m                                                                                                              [0m| 0/5 [00:00<?, ?it/s][0m
  0%|[38;2;30;71;6m                                                                                                             [0m| 0/81 [00:00<?, ?it/s][0m[A
  4%|[38;2;30;71;6m███▋                                                                                                 [0m| 3/81 [00:00<00:03, 21.80it/s][0m[A
  7%|[38;2;30;71;6m███████▍                                                                                             [0m| 6/81 [00:00<00:03, 23.52it/s][0m[A
 11%|[38;2;30;71;6m███████████▏                                                                                         [0m| 9/81 [00:00<00:03, 23.44it/s][0m[A
 15%|[38;2;30;71;6m██████████████▊                                                                                     [0m| 12/81 [00:00<00:03, 22.97it/s][0m[A
 19%|[38;2;30;71;6

🏃 View run charming-shrimp-67 at: http://127.0.0.1:5000/#/experiments/2/runs/dce3b7b3041b4c8c832bbf620a10a02b
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/2
Run 2: Using dropout rate = 0.1050


  0%|[38;2;30;71;6m                                                                                                              [0m| 0/5 [00:00<?, ?it/s][0m
  0%|[38;2;30;71;6m                                                                                                             [0m| 0/81 [00:00<?, ?it/s][0m[A
  4%|[38;2;30;71;6m███▋                                                                                                 [0m| 3/81 [00:00<00:03, 25.53it/s][0m[A
  7%|[38;2;30;71;6m███████▍                                                                                             [0m| 6/81 [00:00<00:02, 26.04it/s][0m[A
 11%|[38;2;30;71;6m███████████▏                                                                                         [0m| 9/81 [00:00<00:03, 23.93it/s][0m[A
 15%|[38;2;30;71;6m██████████████▊                                                                                     [0m| 12/81 [00:00<00:02, 24.54it/s][0m[A
 19%|[38;2;30;71;6

🏃 View run shivering-loon-659 at: http://127.0.0.1:5000/#/experiments/2/runs/f37d130c47ed4675b1fd54405633b5d8
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/2
Run 3: Using dropout rate = 0.0100


  0%|[38;2;30;71;6m                                                                                                              [0m| 0/5 [00:00<?, ?it/s][0m
  0%|[38;2;30;71;6m                                                                                                             [0m| 0/81 [00:00<?, ?it/s][0m[A
  4%|[38;2;30;71;6m███▋                                                                                                 [0m| 3/81 [00:00<00:03, 23.34it/s][0m[A
  7%|[38;2;30;71;6m███████▍                                                                                             [0m| 6/81 [00:00<00:03, 23.34it/s][0m[A
 11%|[38;2;30;71;6m███████████▏                                                                                         [0m| 9/81 [00:00<00:03, 23.34it/s][0m[A
 15%|[38;2;30;71;6m██████████████▊                                                                                     [0m| 12/81 [00:00<00:02, 23.60it/s][0m[A
 19%|[38;2;30;71;6

🏃 View run peaceful-cod-668 at: http://127.0.0.1:5000/#/experiments/2/runs/cf283f93fe78433aac00448995f3e6a9
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/2





Try to update the code above with the following two commands.
    
```python
gin.parse_config_file('gestures_gru.gin')
model = rnn_model.GRUmodel()
```

To discern between the changes, also modify the tag mlflow.set_tag("model", "new-tag-here") where you add
a new tag of your choice. This way you can keep the models apart.

Excercises:

- improve the RNN model
- test different things. What works? What does not?
- experiment with either GRU or LSTM layers, create your own models + ginfiles. 
- experiment with adding Conv1D layers.

You should be able to get above 90% accuracy with the dataset.

In [46]:
mlflow.end_run()

## EXAMPLE MLFLOW WITH CNNs

In [69]:
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope
import mlflow
import torch.optim as optim
from mltrainer import metrics, Trainer, TrainerSettings, ReportTypes
from datetime import datetime
experiment_path = "mlflow_test"
from mads_datasets import DatasetFactoryProvider, DatasetType
from mltrainer.preprocessors import BasePreprocessor
from loguru import logger

In [70]:
from torch import nn
print(f"Using {device} device")

# Define model
class CNN(nn.Module):
    def __init__(self, filters, units1, units2, input_size=(32, 1, 28, 28)):
        super().__init__()

        self.convolutions = nn.Sequential(
            nn.Conv2d(1, filters, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(filters, filters, kernel_size=3, stride=1, padding=0),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(filters, filters, kernel_size=3, stride=1, padding=0),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
        )

        activation_map_size = self._conv_test(input_size)
        logger.info(f"Aggregating activationmap with size {activation_map_size}")
        self.agg = nn.AvgPool2d(activation_map_size)

        self.dense = nn.Sequential(
            nn.Flatten(),
            nn.Linear(filters, units1),
            nn.ReLU(),
            nn.Linear(units1, units2),
            nn.ReLU(),
            nn.Linear(units2, 10)
        )

    def _conv_test(self, input_size = (32, 1, 28, 28)):
        x = torch.ones(input_size)
        x = self.convolutions(x)
        return x.shape[-2:]

    def forward(self, x):
        x = self.convolutions(x)
        x = self.agg(x)
        logits = self.dense(x)
        return logits

model = CNN(filters=32, units1=128, units2=64).to("cuda")


[32m2024-12-08 14:04:10.261[0m | [1mINFO    [0m | [36m__main__[0m:[36m__init__[0m:[36m22[0m - [1mAggregating activationmap with size torch.Size([2, 2])[0m


Using cpu device


In [71]:
fashionfactory = DatasetFactoryProvider.create_factory(DatasetType.FASHION)
batchsize = 64
preprocessor = BasePreprocessor()
streamers = fashionfactory.create_datastreamer(batchsize=batchsize, preprocessor=preprocessor)
train = streamers["train"]
valid = streamers["valid"]
trainstreamer = train.stream()
validstreamer = valid.stream()
mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment(experiment_path)

[32m2024-12-08 14:04:14.912[0m | [1mINFO    [0m | [36mmads_datasets.base[0m:[36mdownload_data[0m:[36m121[0m - [1mFolder already exists at C:\Users\Francesca\.cache\mads_datasets\fashionmnist[0m
[32m2024-12-08 14:04:14.913[0m | [1mINFO    [0m | [36mmads_datasets.base[0m:[36mdownload_data[0m:[36m124[0m - [1mFile already exists at C:\Users\Francesca\.cache\mads_datasets\fashionmnist\fashionmnist.pt[0m


<Experiment: artifact_location='file:///C:/Users/Francesca/Documents/osint/code_repo/AI/MADS-MachineLearning-FP/dev/notebooks/3_recurrent_networks/mlruns/3', creation_time=1733662735510, experiment_id='3', last_update_time=1733662735510, lifecycle_stage='active', name='mlflow_test', tags={}>

In [72]:


# Define the hyperparameter search space
settings = TrainerSettings(
    epochs=3,
    metrics=[accuracy],
    logdir="modellog",
    train_steps=100,
    valid_steps=100,
    reporttypes=[ReportTypes.MLFLOW],
)


# Define the objective function for hyperparameter optimization
def objective(params):
    # Start a new MLflow run for tracking the experiment
    with mlflow.start_run():
        # Set MLflow tags to record metadata about the model and developer
        mlflow.set_tag("model", "convnet")
        mlflow.set_tag("dev", "fp")
        # Log hyperparameters to MLflow
        mlflow.log_params(params)
        mlflow.log_param("batchsize", f"{batchsize}")


        # Initialize the optimizer, loss function, and accuracy metric
        optimizer = optim.Adam
        loss_fn = torch.nn.CrossEntropyLoss()
        accuracy = metrics.Accuracy()

        # Instantiate the CNN model with the given hyperparameters
        model = CNN(**params)
        # Train the model using a custom train loop
        trainer = Trainer(
            model=model,
            settings=settings,
            loss_fn=loss_fn,
            optimizer=optimizer,
            traindataloader=trainstreamer,
            validdataloader=validstreamer,
            scheduler=optim.lr_scheduler.ReduceLROnPlateau,
            device=device,
        )
        trainer.loop()

        # Save the trained model with a timestamp
        tag = datetime.now().strftime("%Y%m%d-%H%M")
        modelpath = modeldir / (tag + "model.pt")
        torch.save(model, modelpath)

        # Log the saved model as an artifact in MLflow
        mlflow.log_artifact(local_path=modelpath, artifact_path="pytorch_models")
        return {'loss' : trainer.test_loss, 'status': STATUS_OK}

In [73]:
search_space = {
    'filters' : scope.int(hp.quniform('filters', 16, 128, 8)),
    'units1' : scope.int(hp.quniform('units1', 32, 128, 8)),
    'units2' : scope.int(hp.quniform('units2', 32, 128, 8)),
}

In [None]:
best_result = fmin(
    fn=objective,
    space=search_space,
    algo=tpe.suggest,
    max_evals=3,
    trials=Trials()
)

In [75]:
best_result

{'filters': np.float64(96.0),
 'units1': np.float64(40.0),
 'units2': np.float64(80.0)}