# 1. Hypertuning 1D CNN
Study the pytorch documentation for:
- Dropout https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html
- normalization layers https://pytorch.org/docs/stable/nn.html#normalization-layers

Experiment with adding dropout and normalization layers to your model. Some rough guidelines where to add them relative to Linear or Conv2d layers:
- Dropout: after Linear or Conv2d layers. Often added after the last Linear layer *before* the output layer, but could occur more often.
- Normalization layers: right after (blocks of) Linear or Conv2d layers, but before activation functions.

In [1]:
from pathlib import Path
import torch
import torch.nn as nn
from loguru import logger
import warnings
warnings.simplefilter("ignore", UserWarning)

In [2]:
from mads_datasets import DatasetFactoryProvider, DatasetType
from mltrainer.preprocessors import BasePreprocessor

for dataset in DatasetType:
    print(dataset)

DatasetType.FLOWERS
DatasetType.IMDB
DatasetType.GESTURES
DatasetType.FASHION
DatasetType.SUNSPOTS
DatasetType.IRIS
DatasetType.PENGUINS
DatasetType.FAVORITA
DatasetType.SECURE


In [3]:
from mads_datasets import DatasetFactoryProvider, DatasetType
from mltrainer.preprocessors import PaddedPreprocessor
preprocessor = PaddedPreprocessor()

gesturesfactory = DatasetFactoryProvider.create_factory(DatasetType.GESTURES)
streamers = gesturesfactory.create_datastreamer(batchsize=32, preprocessor=preprocessor)
train = streamers["train"]
valid = streamers["valid"]

[32m2024-12-13 13:32:01.338[0m | [1mINFO    [0m | [36mmads_datasets.base[0m:[36mdownload_data[0m:[36m121[0m - [1mFolder already exists at C:\Users\Francesca\.cache\mads_datasets\gestures[0m
100%|[38;2;30;71;6m██████████████████████████████████████████████████████████████████████████████████████████████[0m| 2600/2600 [00:01<00:00, 1372.77it/s][0m
100%|[38;2;30;71;6m████████████████████████████████████████████████████████████████████████████████████████████████[0m| 651/651 [00:00<00:00, 1439.42it/s][0m


In [223]:
from mads_datasets import DatasetFactoryProvider, DatasetType
from mltrainer.preprocessors import PaddedPreprocessor
preprocessor = PaddedPreprocessor()

gesturesfactory = DatasetFactoryProvider.create_factory(DatasetType.FLOWERS)
streamers = gesturesfactory.create_datastreamer(batchsize=32, preprocessor=preprocessor)
train = streamers["train"]
valid = streamers["valid"]

[32m2024-12-13 16:22:29.252[0m | [1mINFO    [0m | [36mmads_datasets.base[0m:[36mdownload_data[0m:[36m121[0m - [1mFolder already exists at C:\Users\Francesca\.cache\mads_datasets\flowers[0m


In [224]:
len(train), len(valid)

(91, 22)

In [225]:
trainstreamer = train.stream()
validstreamer = valid.stream()
x, y = next(iter(trainstreamer))
x.shape, y.shape

(torch.Size([32, 3, 224, 224]), torch.Size([32]))

In [226]:
import torch
import torch.nn as nn
import torch.optim as optim

# Define the 1D CNN model with one convolutional layer
class Gesture1DCNN(nn.Module):
    def __init__(self, input_channels=3, filters=64, units1=128, units2=64, num_classes=20):
        super(Gesture1DCNN, self).__init__()

        self.convolutions = nn.Sequential(
            # Firs convolutional layer
            nn.Conv1d(in_channels=input_channels, out_channels=filters, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            # Second convolutional layer
            nn.MaxPool1d(kernel_size=2, stride=2),
            nn.Conv1d(in_channels=filters, out_channels=filters*2, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2, stride=2),
            # Third convolutional layer (optional)
            nn.Conv1d(in_channels=filters*2, out_channels=filters*4, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2, stride=2),
        )

        self.agg =  nn.AdaptiveMaxPool1d(1)  # Global max pooling reduces each feature map to a single value

        # Fully connected layer
        self.dense = nn.Sequential(
            nn.Linear(filters*4, units1),
            nn.ReLU(),
            nn.Linear(units1, units2),
            nn.ReLU(),
            nn.Linear(units2, num_classes)
        )

    def forward(self, x):
        x = x.permute(0, 2, 1)  # Now the shape is (batch_size, 3, 30)
        x = self.convolutions(x) 
        x = self.agg(x)
        # Remove the last dimension (sequence length is 1) for fully connected layer
        x = x.view(x.size(0), -1)  # Flatten to (batch_size, 64)
        logits = self.dense(x)
        return logits



In [227]:

input_channels = 3  # Number of input channels (e.g., x, y, z accelerometer data)
num_classes = 20     # Number of gesture classes

# Instantiate the model
model = Gesture1DCNN()

# Print model summary
print(model)

output = model(x)
print(output.shape)  # Should be (32, num_classes) — batch size x number of classes


Gesture1DCNN(
  (convolutions): Sequential(
    (0): Conv1d(3, 64, kernel_size=(5,), stride=(1,), padding=(2,))
    (1): ReLU()
    (2): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv1d(64, 128, kernel_size=(3,), stride=(1,), padding=(1,))
    (4): ReLU()
    (5): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv1d(128, 256, kernel_size=(3,), stride=(1,), padding=(1,))
    (7): ReLU()
    (8): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (agg): AdaptiveMaxPool1d(output_size=1)
  (dense): Sequential(
    (0): Linear(in_features=256, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=64, bias=True)
    (3): ReLU()
    (4): Linear(in_features=64, out_features=20, bias=True)
  )
)


RuntimeError: permute(sparse_coo): number of dimensions in the tensor input does not match the length of the desired ordering of dimensions i.e. input.dim() = 4 is not equal to len(dims) = 3

## Naive Model

In [208]:
import torch
import torch.nn as nn

# Define the Naive 1D CNN model
class Naive1DCNN(nn.Module):
    def __init__(self, input_channels=3, filters=64, units1=128, num_classes=20):
        super(Naive1DCNN, self).__init__()

        self.convolutions = nn.Sequential(
            # Firs convolutional layer
            nn.Conv1d(in_channels=input_channels, out_channels=filters, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            # Second convolutional layer
            nn.MaxPool1d(kernel_size=2, stride=2),
            
        )

        self.agg =  nn.AdaptiveMaxPool1d(1)  # Global max pooling reduces each feature map to a single value

        # Fully connected layer
        self.dense = nn.Sequential(
            nn.Linear(filters, units1),
            nn.ReLU(),
            nn.Linear(units1, num_classes)
        )

    def forward(self, x):
        x = x.permute(0, 2, 1)  # Now the shape is (batch_size, 3, 30)
        x = self.convolutions(x) 
        x = self.agg(x)
        # Remove the last dimension (sequence length is 1) for fully connected layer
        x = x.view(x.size(0), -1)  # Flatten to (batch_size, 64)
        logits = self.dense(x)
        return logits




In [209]:
# Instantiate the model
model_naive = Naive1DCNN()

# Print model summary
print(model_naive)

output = model_naive(x)
print(output.shape)  # Should be (32, num_classes) — batch size x number of classes

Naive1DCNN(
  (convolutions): Sequential(
    (0): Conv1d(3, 64, kernel_size=(5,), stride=(1,), padding=(2,))
    (1): ReLU()
    (2): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (agg): AdaptiveMaxPool1d(output_size=1)
  (dense): Sequential(
    (0): Linear(in_features=64, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=20, bias=True)
  )
)
torch.Size([32, 20])


In [210]:
import torch.optim as optim
from mltrainer import metrics, Trainer
optimizer = optim.Adam
loss_fn = torch.nn.CrossEntropyLoss()
accuracy = metrics.Accuracy()

In [211]:
log_dir = Path("../../models/cnn").resolve()
if not log_dir.exists():
    log_dir.mkdir(parents=True)

In [212]:
from mltrainer import TrainerSettings, ReportTypes

settings = TrainerSettings(
    epochs=5,
    metrics=[accuracy],
    logdir=log_dir,
    train_steps=len(train),
    valid_steps=len(valid),
    reporttypes=[ReportTypes.TENSORBOARD, ReportTypes.MLFLOW],
)
settings

epochs: 5
metrics: [Accuracy]
logdir: C:\Users\Francesca\Documents\osint\code_repo\AI\MADS-MachineLearning-FP\dev\models\cnn
train_steps: 81
valid_steps: 20
reporttypes: [<ReportTypes.TENSORBOARD: 2>, <ReportTypes.MLFLOW: 3>]
optimizer_kwargs: {'lr': 0.001, 'weight_decay': 1e-05}
scheduler_kwargs: {'factor': 0.1, 'patience': 10}
earlystop_kwargs: {'save': False, 'verbose': True, 'patience': 10}

In [213]:
experiment_path = "mlflow_cnn1D-naive"

In [166]:
if torch.backends.mps.is_available() and torch.backends.mps.is_built():
    device = torch.device("mps")
    print("Using MPS")
elif torch.cuda.is_available():
    device = "cuda:0"
    print("using cuda")
else:
    device = "cpu"
    print("using cpu")

using cuda


In [214]:
trainer = Trainer(
    model=model_naive,
    settings=settings,
    loss_fn=loss_fn,
    optimizer=optimizer,
    traindataloader=trainstreamer,
    validdataloader=validstreamer,
    scheduler=optim.lr_scheduler.ReduceLROnPlateau,
    device=device,
    )

[32m2024-12-13 16:18:50.276[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mdir_add_timestamp[0m:[36m29[0m - [1mLogging to C:\Users\Francesca\Documents\osint\code_repo\AI\MADS-MachineLearning-FP\dev\models\cnn\20241213-161850[0m
[32m2024-12-13 16:18:50.281[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36m__init__[0m:[36m72[0m - [1mFound earlystop_kwargs in settings.Set to None if you dont want earlystopping.[0m


In [215]:
trainer.loop()

  0%|[38;2;30;71;6m                                                                                                              [0m| 0/5 [00:00<?, ?it/s][0m
  0%|[38;2;30;71;6m                                                                                                             [0m| 0/81 [00:00<?, ?it/s][0m[A
 12%|[38;2;30;71;6m████████████▎                                                                                       [0m| 10/81 [00:00<00:01, 56.94it/s][0m[A
 46%|[38;2;30;71;6m█████████████████████████████████████████████▏                                                     [0m| 37/81 [00:00<00:00, 151.03it/s][0m[A
100%|[38;2;30;71;6m███████████████████████████████████████████████████████████████████████████████████████████████████[0m| 81/81 [00:00<00:00, 194.54it/s][0m[A
[32m2024-12-13 16:18:52.954[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m191[0m - [1mEpoch 0 train 2.4680 test 1.6984 metric ['0.6031'][0m
 20%|[38;2;

# Use MLFLOW
Start mlflow with:

```
mlflow server     --backend-store-uri sqlite:///mlflow.db     --default-artifact-root ./mlruns     --host 127.0.0.1:5000
```

In [216]:
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope
import mlflow
import torch.optim as optim
from mltrainer import metrics, Trainer, TrainerSettings, ReportTypes
from datetime import datetime
experiment_path = "mlflow_test"
from mads_datasets import DatasetFactoryProvider, DatasetType
from mltrainer.preprocessors import BasePreprocessor
from loguru import logger
from datetime import datetime

In [217]:
#end previous run
mlflow.end_run()


🏃 View run intrigued-worm-231 at: http://127.0.0.1:5000/#/experiments/551708861503849523/runs/606d86c097bc481488e91bde12a12e15
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/551708861503849523


In [218]:
experiment_path = "mlflow_gestures1Dconv"
gesturesfactory = DatasetFactoryProvider.create_factory(DatasetType.GESTURES)
batchsize = 32
preprocessor = PaddedPreprocessor()
streamers = gesturesfactory.create_datastreamer(batchsize=batchsize, preprocessor=preprocessor)

train = streamers["train"]
valid = streamers["valid"]
trainstreamer = train.stream()
validstreamer = valid.stream()
mlflow.set_tracking_uri(uri="http://127.0.0.1:5000")
mlflow.set_experiment(experiment_path)

[32m2024-12-13 16:19:01.677[0m | [1mINFO    [0m | [36mmads_datasets.base[0m:[36mdownload_data[0m:[36m121[0m - [1mFolder already exists at C:\Users\Francesca\.cache\mads_datasets\gestures[0m
100%|[38;2;30;71;6m██████████████████████████████████████████████████████████████████████████████████████████████[0m| 2600/2600 [00:01<00:00, 2159.44it/s][0m
100%|[38;2;30;71;6m████████████████████████████████████████████████████████████████████████████████████████████████[0m| 651/651 [00:00<00:00, 2229.31it/s][0m


<Experiment: artifact_location='mlflow-artifacts:/551708861503849523', creation_time=1734100231912, experiment_id='551708861503849523', last_update_time=1734100231912, lifecycle_stage='active', name='mlflow_gestures1Dconv', tags={}>

In [219]:

# Define the hyperparameter search space
settings = TrainerSettings(
    epochs=3,
    metrics=[accuracy],
    logdir="modellog",
    train_steps=100,
    valid_steps=100,
    reporttypes=[ReportTypes.MLFLOW],
)


# Define the objective function for hyperparameter optimization
def objective(params):
    # Start a new MLflow run for tracking the experiment
    with mlflow.start_run():
        # Set MLflow tags to record metadata about the model and developer
        mlflow.set_tag("model", "convnet")
        mlflow.set_tag("dev", "fp")
        # Log hyperparameters to MLflow
        mlflow.log_params(params)
        print(params)
        mlflow.log_param("batchsize", f"{batchsize}")


        # Initialize the optimizer, loss function, and accuracy metric
        optimizer = optim.Adam
        loss_fn = torch.nn.CrossEntropyLoss()
        accuracy = metrics.Accuracy()

        # Instantiate the CNN model with the given hyperparameters
        model = Gesture1DCNN(**params)
        # Train the model using a custom train loop
        trainer = Trainer(
            model=model,
            settings=settings,
            loss_fn=loss_fn,
            optimizer=optimizer,
            traindataloader=trainstreamer,
            validdataloader=validstreamer,
            scheduler=optim.lr_scheduler.ReduceLROnPlateau,
            device=device,
        )
        trainer.loop()

        # Save the trained model with a timestamp
        tag = datetime.now().strftime("%Y%m%d-%H%M")
        modelpath = modeldir / (tag + "model.pt")
        torch.save(model, modelpath)

        # Log the saved model as an artifact in MLflow
        mlflow.log_artifact(local_path=modelpath, artifact_path="pytorch_models")
        return {'loss' : trainer.test_loss, 'status': STATUS_OK}

In [220]:
# search_space = {
#     'filters' : scope.int(hp.quniform('filters', 16, 128, 8)),
#     'units1' : scope.int(hp.quniform('units1', 32, 128, 8)),
#     'units2' : scope.int(hp.quniform('units2', 32, 128, 8)),
# }
search_space = {
     'filters' : scope.int(hp.quniform('filters', 16, 128, 8)),
     'units1' : scope.int(hp.quniform('units1', 32, 256, 8)),
     'units2' : scope.int(hp.quniform('units2', 32, 256, 8)),
}

In [221]:
best_result = fmin(
    fn=objective,
    space=search_space,
    algo=tpe.suggest,
    max_evals=5,
    trials=Trials()
)

{'filters': 88, 'units1': 232, 'units2': 176}                                                                                            
  0%|                                                                                              | 0/5 [00:00<?, ?trial/s, best loss=?]

[32m2024-12-13 16:19:13.205[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mdir_add_timestamp[0m:[36m29[0m - [1mLogging to modellog\20241213-161913[0m
[32m2024-12-13 16:19:13.224[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36m__init__[0m:[36m72[0m - [1mFound earlystop_kwargs in settings.Set to None if you dont want earlystopping.[0m
  0%|[38;2;30;71;6m                                                                                                              [0m| 0/3 [00:00<?, ?it/s][0m
[A
  0%|[38;2;30;71;6m                                                                                                            [0m| 0/100 [00:00<?, ?it/s][0m
[A[A
  1%|[38;2;30;71;6m#                                                                                                   [0m| 1/100 [00:00<00:10,  9.30it/s][0m
[A[A
  2%|[38;2;30;71;6m##                                                                                                  [0m| 2/100 [

🏃 View run glamorous-worm-441 at: http://127.0.0.1:5000/#/experiments/551708861503849523/runs/2ee1a3f446dd45e4811283a0060bb405          

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/551708861503849523                                                            

{'filters': 64, 'units1': 128, 'units2': 144}                                                                                            
 20%|█████████████▊                                                       | 1/5 [00:02<00:10,  2.59s/trial, best loss: 0.155273899435997]

[32m2024-12-13 16:19:15.767[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mdir_add_timestamp[0m:[36m29[0m - [1mLogging to modellog\20241213-161915[0m
[32m2024-12-13 16:19:15.770[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36m__init__[0m:[36m72[0m - [1mFound earlystop_kwargs in settings.Set to None if you dont want earlystopping.[0m
  0%|[38;2;30;71;6m                                                                                                              [0m| 0/3 [00:00<?, ?it/s][0m
[A
  0%|[38;2;30;71;6m                                                                                                            [0m| 0/100 [00:00<?, ?it/s][0m
[A[A
 24%|[38;2;30;71;6m#######################5                                                                          [0m| 24/100 [00:00<00:00, 237.57it/s][0m
[A[A
 48%|[38;2;30;71;6m###############################################                                                   [0m| 48/100 [0

🏃 View run unique-turtle-57 at: http://127.0.0.1:5000/#/experiments/551708861503849523/runs/a2351b2fec5c45d897886afa009458be            

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/551708861503849523                                                            

{'filters': 120, 'units1': 192, 'units2': 40}                                                                                            
 40%|██████████████████████████▊                                        | 2/5 [00:04<00:07,  2.35s/trial, best loss: 0.11422119289636612]

[32m2024-12-13 16:19:17.952[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mdir_add_timestamp[0m:[36m29[0m - [1mLogging to modellog\20241213-161917[0m
[32m2024-12-13 16:19:17.955[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36m__init__[0m:[36m72[0m - [1mFound earlystop_kwargs in settings.Set to None if you dont want earlystopping.[0m
  0%|[38;2;30;71;6m                                                                                                              [0m| 0/3 [00:00<?, ?it/s][0m
[A
  0%|[38;2;30;71;6m                                                                                                            [0m| 0/100 [00:00<?, ?it/s][0m
[A[A
 20%|[38;2;30;71;6m###################6                                                                              [0m| 20/100 [00:00<00:00, 193.01it/s][0m
[A[A
 42%|[38;2;30;71;6m#########################################1                                                        [0m| 42/100 [0

🏃 View run beautiful-dove-171 at: http://127.0.0.1:5000/#/experiments/551708861503849523/runs/c79e0967233e4b749f8cf09b144f232d          

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/551708861503849523                                                            

{'filters': 88, 'units1': 224, 'units2': 168}                                                                                            
 60%|████████████████████████████████████████▏                          | 3/5 [00:06<00:04,  2.24s/trial, best loss: 0.11422119289636612]

[32m2024-12-13 16:19:20.063[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mdir_add_timestamp[0m:[36m29[0m - [1mLogging to modellog\20241213-161920[0m
[32m2024-12-13 16:19:20.066[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36m__init__[0m:[36m72[0m - [1mFound earlystop_kwargs in settings.Set to None if you dont want earlystopping.[0m
  0%|[38;2;30;71;6m                                                                                                              [0m| 0/3 [00:00<?, ?it/s][0m
[A
  0%|[38;2;30;71;6m                                                                                                            [0m| 0/100 [00:00<?, ?it/s][0m
[A[A
 25%|[38;2;30;71;6m########################5                                                                         [0m| 25/100 [00:00<00:00, 248.38it/s][0m
[A[A
 51%|[38;2;30;71;6m#################################################9                                                [0m| 51/100 [0

🏃 View run fortunate-cod-256 at: http://127.0.0.1:5000/#/experiments/551708861503849523/runs/b44d87305c8048d397988b5f76b1d6f7           

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/551708861503849523                                                            

{'filters': 80, 'units1': 176, 'units2': 248}                                                                                            
 80%|█████████████████████████████████████████████████████▌             | 4/5 [00:09<00:02,  2.16s/trial, best loss: 0.06940709054470062]

[32m2024-12-13 16:19:22.103[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mdir_add_timestamp[0m:[36m29[0m - [1mLogging to modellog\20241213-161922[0m
[32m2024-12-13 16:19:22.106[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36m__init__[0m:[36m72[0m - [1mFound earlystop_kwargs in settings.Set to None if you dont want earlystopping.[0m
  0%|[38;2;30;71;6m                                                                                                              [0m| 0/3 [00:00<?, ?it/s][0m
[A
  0%|[38;2;30;71;6m                                                                                                            [0m| 0/100 [00:00<?, ?it/s][0m
[A[A
 25%|[38;2;30;71;6m########################5                                                                         [0m| 25/100 [00:00<00:00, 243.00it/s][0m
[A[A
 52%|[38;2;30;71;6m##################################################9                                               [0m| 52/100 [0

🏃 View run treasured-bat-808 at: http://127.0.0.1:5000/#/experiments/551708861503849523/runs/1084437040304a3cb4d203a17193a5b3           

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/551708861503849523                                                            

100%|███████████████████████████████████████████████████████████████████| 5/5 [00:10<00:00,  2.19s/trial, best loss: 0.06940709054470062]


In [222]:
best_result

{'filters': np.float64(88.0),
 'units1': np.float64(224.0),
 'units2': np.float64(168.0)}

In [129]:
best_result

{'filters': np.float64(112.0),
 'units1': np.float64(72.0),
 'units2': np.float64(120.0)}

In [132]:
best_result

{'filters': np.float64(104.0),
 'units1': np.float64(120.0),
 'units2': np.float64(120.0)}

In [141]:
## Naive model