In [1]:
%env MLFLOW_TRACKING_URI=sqlite:///mlruns.db

env: MLFLOW_TRACKING_URI=sqlite:///mlruns.db


# 1. Adding dropout and normalization layers
Study the pytorch documentation for:
- Dropout https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html
- normalization layers https://pytorch.org/docs/stable/nn.html#normalization-layers

Experiment with adding dropout and normalization layers to your model. Some rough guidelines where to add them relative to Linear or Conv2d layers:
- Dropout: after Linear or Conv2d layers. Often added after the last Linear layer *before* the output layer, but could occur more often.
- Normalization layers: right after (blocks of) Linear or Conv2d layers, but before activation functions.

In [2]:
from pathlib import Path
import torch
import torch.nn as nn
from loguru import logger
import warnings
warnings.simplefilter("ignore", UserWarning)
from mads_datasets import DatasetFactoryProvider, DatasetType
from mltrainer.preprocessors import BasePreprocessor

fashionfactory = DatasetFactoryProvider.create_factory(DatasetType.FLOWERS)
batchsize = 64
preprocessor = BasePreprocessor()
streamers = fashionfactory.create_datastreamer(batchsize=batchsize, preprocessor=preprocessor)
train = streamers["train"]
valid = streamers["valid"]
trainstreamer = train.stream()
validstreamer = valid.stream()
import torch
if torch.backends.mps.is_available() and torch.backends.mps.is_built():
    device = torch.device("mps")
    print("Using MPS")
elif torch.cuda.is_available():
    device = "cuda:0"
    print("using cuda")
else:
    device = "cpu"
    print("using cpu")

[32m2024-09-23 17:01:03.964[0m | [1mINFO    [0m | [36mmads_datasets.base[0m:[36mdownload_data[0m:[36m121[0m - [1mFolder already exists at C:\Users\leons\.cache\mads_datasets\flowers[0m


using cuda


In [17]:
import torch
from torch import nn
from loguru import logger
from torchsummary import summary

# Define the CNN model
class CNN(nn.Module):
    def __init__(self, filters: int, units1: int, units2: int, input_size: tuple):
        super().__init__()
        self.in_channels = input_size[1]
        self.input_size = input_size

        self.convolutions = nn.Sequential(
            nn.Conv2d(self.in_channels, filters, kernel_size=3, stride=2, padding=1),
            nn.BatchNorm2d(filters),
            nn.ReLU(),
            nn.Dropout(p=0.2),
            nn.MaxPool2d(kernel_size=4),  # Output size halved
            nn.Conv2d(filters, filters*2, kernel_size=3, stride=2, padding=0),
            nn.BatchNorm2d(filters*2),
            nn.ReLU(),
            nn.Dropout(p=0.2),
            nn.MaxPool2d(kernel_size=4),  # Output size halved again
            # Output size halved once more
        )

        # Calculate the flattened size based on actual output shape after convolutions
        flattened_size = self._get_flattened_size(input_size)
        logger.info(f"Flattened size for the first Linear layer: {flattened_size}")

        # Remove AdaptiveAvgPool2d, as the tensor is already reduced
        self.dense = nn.Sequential(
            nn.Flatten(),  # Flatten the 2D to 1D
            nn.Linear(flattened_size, units1),  # Input size should match the flattened size
            nn.BatchNorm1d(units1),
            nn.ReLU(),
            nn.Dropout(p=0.2),
            nn.Linear(units1, units2),
            nn.BatchNorm1d(units2),
            nn.ReLU(),
            nn.Dropout(p=0.2),
            nn.Linear(units2, 10)  # Output 10 classes
        )

    # This function calculates the flattened size after convolutions
    def _get_flattened_size(self, input_size):
        x = torch.ones(1, *input_size[1:], dtype=torch.float32)  # Add batch dimension
        x = self.convolutions(x)
        logger.info(f"Output shape after convolutions: {x.shape}")
        return x.numel()  # Return the total number of elements (flattened size)

    def forward(self, x):
        x = self.convolutions(x)
        x = self.dense(x)  # Forward to dense layers
        return x

# Define the model
model = CNN(filters=128, units1=128, units2=64, input_size=(32, 3, 224, 224))

# Print the model summary
summary(model, input_size=(3, 224, 224), device='cpu')  # Correct input size for summary


[32m2024-09-23 17:04:52.483[0m | [1mINFO    [0m | [36m__main__[0m:[36m_get_flattened_size[0m:[36m49[0m - [1mOutput shape after convolutions: torch.Size([1, 256, 3, 3])[0m
[32m2024-09-23 17:04:52.489[0m | [1mINFO    [0m | [36m__main__[0m:[36m__init__[0m:[36m29[0m - [1mFlattened size for the first Linear layer: 2304[0m


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1        [-1, 128, 112, 112]           3,584
       BatchNorm2d-2        [-1, 128, 112, 112]             256
              ReLU-3        [-1, 128, 112, 112]               0
           Dropout-4        [-1, 128, 112, 112]               0
         MaxPool2d-5          [-1, 128, 28, 28]               0
            Conv2d-6          [-1, 256, 13, 13]         295,168
       BatchNorm2d-7          [-1, 256, 13, 13]             512
              ReLU-8          [-1, 256, 13, 13]               0
           Dropout-9          [-1, 256, 13, 13]               0
        MaxPool2d-10            [-1, 256, 3, 3]               0
          Flatten-11                 [-1, 2304]               0
           Linear-12                  [-1, 128]         295,040
      BatchNorm1d-13                  [-1, 128]             256
             ReLU-14                  [

In [13]:
from mltrainer.trainer import TrainerSettings, ReportTypes
from mltrainer import metrics
log_dir = Path("../../models/cnn").resolve()
if not log_dir.exists():
    log_dir.mkdir(parents=True)
accuracy = metrics.Accuracy()
settings = TrainerSettings(
    epochs=10,
    metrics=[accuracy],
    logdir=log_dir,
    train_steps=len(train),
    valid_steps=len(valid),
    reporttypes=[ReportTypes.TENSORBOARD],
)

In [14]:
import torch.optim as optim
from mltrainer import metrics
from mltrainer.trainer import Trainer
optimizer = optim.Adam
loss_fn = torch.nn.CrossEntropyLoss()
accuracy = metrics.Accuracy()
trainer = Trainer(
    model=model,
    settings=settings,
    loss_fn=loss_fn,
    optimizer=optimizer,
    traindataloader=trainstreamer,
    validdataloader=validstreamer,
    scheduler=optim.lr_scheduler.ReduceLROnPlateau,
    device=device,
    )

[32m2024-09-23 17:03:57.117[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mdir_add_timestamp[0m:[36m29[0m - [1mLogging to C:\Users\leons\OneDrive\Bureaublad\School\models\cnn\20240923-170357[0m
[32m2024-09-23 17:03:57.166[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36m__init__[0m:[36m72[0m - [1mFound earlystop_kwargs in settings.Set to None if you dont want earlystopping.[0m


In [15]:
trainer.loop()

  2%|[38;2;30;71;6m▏         [0m| 1/45 [00:15<11:11, 15.26s/it]
  0%|[38;2;30;71;6m          [0m| 0/10 [00:15<?, ?it/s]


KeyboardInterrupt: 

----------
Personal notes!

- Dropout is tpyically used after the activation function and before the pooling function. This is because after the pooling function there are less activation neurons to drop out. so less weights to be influenced by the dropout. Dropout is designed to randomly set a fraction of the activations to zero. Applying it after the activation function ensures that the dropout is applied to the non-linear transformed features, which helps in regularizing the network more effectively.
Also to maintain sctructure of the network.

- Stabilizing Activations: By normalizing the activations before applying the non-linearity, you ensure that the inputs to the activation functions have a consistent distribution, which helps in stabilizing the training process.
Improving Gradient Flow: Normalization helps in maintaining a stable gradient flow through the network, which can prevent issues like vanishing or exploding gradients.
Accelerating Training: Normalized activations can lead to faster convergence during training, as the network can learn more efficiently.
    Why Not After Activation?
    Placing normalization layers after the activation functions can still work, but it might not be as effective. The primary goal of normalization is to control the distribution of the inputs to the activation functions, ensuring they are within a range that the activation functions can handle well.

- Adding only batch normalization layer to the model, the accuracy improved to 92.3%! re adding the dropout layers decreased performance slightly but should improve generalization.


# 2. Adding convolutional and pooling layers
Previous lessons, you have started to experiment with you model.
You might have tested the impact of the amount of units, the depth of layers and different learning rates.

This lesson, we have added some new types of layers: convolutional and pooling layers.
Experiment with adding these new layers.

Also, have a look at the `ModuleList`: https://pytorch.org/docs/stable/generated/torch.nn.ModuleList.html#modulelist
It can be really useful to create a list of layers from a configfile, and then use that list to create your model.
Instead of just adding a single layer, you could also add a block of layers (eg a Conv2d layer, followed by a ReLU layer, followed by a BatchNorm2d layer, followed by a MaxPool2d layer) and repeat that in a loop, adding it to the `ModuleList`.

# 3. Improve your pipeline
In addition to new layers, we have expanded our logging tools with MLFlow, so we currently can choose between gin-config, tensorboard and MLFlow.

Expand your training pipeline you started in the previous lesson such that:

- you can switch between models by changing a config file
- you can test different hyperparameters by changing a config file
- you automatically log settings: model picked, hyperparameters, metrics, etc. : use either gin-config, tensorboard or MLFlow to log that, or a combination, whatever you prefer.
- Important: doing a master means you don't just start engineering a pipeline, but you need to reflect. Why do you see the results you see? What does this mean, considering the theory? Write down lessons learned and reflections, based on experimental results.
- continuously improve your code: 
    - clean up your experimental environment, such that it doesnt get too messy
    - automate the boring stuff: use a Makefile, use configfiles, automate logging, etc.
    - use git: commit your changes often and with descriptive messages
    - separate code for pipelines, configs, models, modeltraining and results.