Code taken from and changed for my purpose:

https://github.com/albumentations-team/albumentations_examples/blob/master/notebooks/

https://colab.research.google.com/github/wandb/examples/blob/master/colabs/pytorch/Simple_PyTorch_Integration.ipynb#scrollTo=ciNgNihpqlCR

In [1]:
#!pip install wandb onnx -Uq

In [2]:
import os
import random

import numpy as np
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from tqdm.auto import tqdm

# Ensure deterministic behavior
torch.backends.cudnn.deterministic = True
random.seed(hash("setting random seeds") % 2**32 - 1)
np.random.seed(hash("improves reproducibility") % 2**32 - 1)
torch.manual_seed(hash("by removing stochasticity") % 2**32 - 1)
torch.cuda.manual_seed_all(hash("so runs are repeatable") % 2**32 - 1)

# Device configuration
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# remove slow mirror from list of MNIST mirrors
torchvision.datasets.MNIST.mirrors = [mirror for mirror in torchvision.datasets.MNIST.mirrors
                                      if not mirror.startswith("http://yann.lecun.com")]

In [3]:
import wandb
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mbjno005[0m ([33mdeep_learning_2023[0m). Use [1m`wandb login --relogin`[0m to force relogin


True

In [4]:
# Config settings for the model.
config = dict(
    epochs=20,
    classes=10,
    kernels=[16, 32],
    batch_size=128,
    learning_rate=0.003,
    dataset="MNIST-digit",
    architecture="CNN")

In [5]:
def model_pipeline(hyperparameters):

    # tell wandb to get started
    with wandb.init(project="MNIST-digit-classification-pgr207", config=hyperparameters):
      # access all HPs through wandb.config, so logging matches execution!
      config = wandb.config

      # make the model, data, and optimization problem
      model, train_loader, test_loader, criterion, optimizer = make(config)
      print(model)

      # and use them to train the model
      train(model, train_loader, criterion, optimizer, config)

      # and test its final performance
      test(model, test_loader)

    return model

In [6]:
def make(config):
    # Make the data
    train, test = get_data(train=True), get_data(train=False)
    train_loader = make_loader(train, batch_size=config.batch_size)
    test_loader = make_loader(test, batch_size=config.batch_size)
    
    ###################
    # Model functions #
    ###################
    
    # Functions to make the different kinds of models
    #model = ConvNet(config.kernels, config.classes).to(device) #conv2d model
    #model = Conv1DNet(config.kernels, config.classes).to(device) # conv1d model
    #model = MLP(config.kernels, config.classes).to(device) # first mlp model
    #model = MLPv2(config.kernels, config.classes).to(device) # second mlp model
    model = MLPv3(config.kernels, config.classes).to(device) # second mlp model

    # Make the loss and optimizer
    criterion = nn.CrossEntropyLoss()
    
    #######################
    # Optimizer functions #
    #######################
    
    # 1st optimizer ADAM
    #optimizer = torch.optim.Adam(
    #    model.parameters(), lr=config.learning_rate)
    
    # 2nd optimizer Stochastic gradient descent
    #optimizer = torch.optim.SGD(
    #    model.parameters(), lr=config.learning_rate)
    
    # 3rd optimizer RAdam
    optimizer = torch.optim.RAdam(
        model.parameters(), lr=config.learning_rate)
    
    return model, train_loader, test_loader, criterion, optimizer

In [7]:
# Getting the data and doing data augmentation.
def get_data(slice=5, train=True):
    full_dataset = torchvision.datasets.MNIST(root=".",
                                              train=train, 
                                              transform=transforms.Compose([
                                                  transforms.RandomRotation(15), # First data augmentation.
                                                  transforms.ColorJitter(brightness=0.25, contrast=0.25), # Second data augmentation
                                                  #transforms.RandomResizedCrop((28,28), scale=(0.9, 1.0)), # Third data augmentaton
                                                  transforms.ToTensor(),
                                              ]),
                                              download=True)
    #  equiv to slicing with [::slice] 
    sub_dataset = torch.utils.data.Subset(
        full_dataset, indices=range(0, len(full_dataset), slice)
    )
    
    return sub_dataset


def make_loader(dataset, batch_size):
    loader = torch.utils.data.DataLoader(dataset=dataset,
                                         batch_size=batch_size, 
                                         shuffle=True,
                                         pin_memory=True, num_workers=2)
    return loader

# First MLP model
This code defines a multi-layer perceptron (MLP) model for classification. The model consists of three hidden layers and an output layer. Each hidden layer applies a linear transformation followed by Sigmoid activation. The output layer is a simple linear layer that reduces the dimensionality of the output to the number of classes.

In [8]:
# This model got accuracy of 87% after 5 epochs decided to make another model to try to get a better accuracy.
# 94,5% at 20 epochs
class MLP(nn.Module):
    def __init__(self, kernels, classes = 10):
        super(MLP, self).__init__()
        
        self.layer1 = nn.Sequential(
            nn.Flatten(start_dim=1, end_dim=-1),
            nn.Linear(in_features=784, out_features=64, bias=True),
            nn.Sigmoid()
        )
        self.layer2 = nn.Sequential(
            nn.Linear(in_features=64, out_features=32, bias=True),
            nn.Sigmoid(),
        )
        self.layer3 = nn.Sequential(
            nn.Linear(in_features=32, out_features=16, bias=True),
            nn.Sigmoid(),
        )
        self.layer4 = nn.Sequential(
            nn.Linear(in_features=16, out_features=10, bias=True),
        )
    
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        return out
        

# Second MLP model

This code defines a multi-layer perceptron (MLP) model for classification. The model consists of three hidden layers and an output layer. Each hidden layer applies a linear transformation followed by batch normalization, ReLU activation and dropout. The output layer is a simple linear layer that reduces the dimensionality of the output to the number of classes.

In [9]:
# Added dropout klayers and batch normalization.
class MLPv2(nn.Module):
    def __init__(self, kernels, classes = 10):
        super(MLPv2, self).__init__()
        
        # First layer: input is flattened, then passed through a linear layer,
        # batch normalization, ReLU activation and dropout
        self.layer1 = nn.Sequential(
            nn.Flatten(start_dim=1, end_dim=-1),  # Flatten the input
            nn.Linear(in_features=784, out_features=64),  # Linear layer
            nn.BatchNorm1d(64),  # Batch normalization
            nn.ReLU(),  # ReLU activation
            nn.Dropout(0.5)  # Dropout for regularization
        )
        
        # Second layer: similar to the first layer but with different input/output sizes
        self.layer2 = nn.Sequential(
            nn.Linear(in_features=64, out_features=32),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.Dropout(0.5)
        )
        
        # Third layer: similar to the first layer but with different input/output sizes
        self.layer3 = nn.Sequential(
            nn.Linear(in_features=32, out_features=16),
            nn.BatchNorm1d(16),
            nn.ReLU(),
            nn.Dropout(0.5)
        )
        
        # Fourth (output) layer: a linear layer that reduces the output to the number of classes
        self.layer4 = nn.Sequential(
            nn.Linear(in_features=16, out_features=10),
        )
    
    def forward(self, x):
        # Pass the input through each layer in turn
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        
        return out


# Third MLP

This code defines a multi-layer perceptron (MLP) model for classification. The model consists of the same layers as the second MLP model except I have removed the dropout and added another hidden layer in the middle of the network.

In [10]:
# Latest MLP model. Removed dropout layers.
class MLPv3(nn.Module):
    def __init__(self, kernels, classes = 10):
        super(MLPv3, self).__init__()
        
        # First layer: input is flattened, then passed through a linear layer,
        # batch normalization, ReLU activation
        self.layer1 = nn.Sequential(
            nn.Flatten(start_dim=1, end_dim=-1),  # Flatten the input
            nn.Linear(in_features=784, out_features=128),  # Linear layer
            nn.BatchNorm1d(128),  # Batch normalization
            nn.ReLU()  # ReLU activation
        )
        
        # Second layer: similar to the first layer but with different input/output sizes
        self.layer2 = nn.Sequential(
            nn.Linear(in_features=128, out_features=64),
            nn.BatchNorm1d(64),
            nn.ReLU()
        )
        
        # Third layer: similar to the first layer but with different input/output sizes
        self.layer3 = nn.Sequential(
            nn.Linear(in_features=64, out_features=32),
            nn.BatchNorm1d(32),
            nn.ReLU()
        )
        
        # Fourth layer: similar to the first layer but with different input/output sizes
        self.layer4 = nn.Sequential(
            nn.Linear(in_features=32, out_features=16),
            nn.BatchNorm1d(16),
            #nn.LeakyReLU()  # comment in and out as needed to change the last actication funciton
            nn.Sigmoid()   # comment in and out as needed to change the last actication funciton 
            #nn.Tanh()      # comment in and out as needed to change the last actication funciton
        )
        
        # Fifth (output) layer: a linear layer that reduces the output to the number of classes
        self.layer5 = nn.Sequential(
            nn.Linear(in_features=16, out_features=10),
        )
    
    def forward(self, x):
        # Pass the input through each layer in turn
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.layer5(out)
        
        return out


# Conv2DNet

This code defines a Convolutional Neural Network (CNN) model using 2D convolutions for classification. The model consists of two convolutional layers and a fully connected output layer. Each convolutional layer applies a 2D convolution followed by ReLU activation and max pooling. The output of the last convolutional layer is flattened and then passed through the fully connected layer to produce class scores.

In [11]:
# Define a Convolutional Neural Network (CNN) model Like the model in the book.
class ConvNet(nn.Module):
    def __init__(self, kernels, classes=10):
        super(ConvNet, self).__init__()
        
        # First convolutional layer: applies a 2D convolution, followed by ReLU activation and max pooling
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, kernels[0], kernel_size=5, stride=1, padding=2),  # 2D convolution
            nn.ReLU(),  # ReLU activation
            nn.MaxPool2d(kernel_size=2, stride=2)  # Max pooling
        )
        
        # Second convolutional layer: takes the output of the first layer as input, followed by ReLU and max pooling
        self.layer2 = nn.Sequential(
            nn.Conv2d(kernels[0], kernels[1], kernel_size=5, stride=1, padding=2),  # 2D convolution
            nn.ReLU(),  # ReLU activation
            nn.MaxPool2d(kernel_size=2, stride=2)  # Max pooling
        )
        
        # Fully connected (linear) layer: takes the flattened output of the second convolutional layer and outputs class scores
        self.fc = nn.Linear(7 * 7 * kernels[-1], classes)
        
    def forward(self, x):
        out = self.layer1(x)  # Pass through the first layer
        out = self.layer2(out)  # Pass through the second layer
        out = out.reshape(out.size(0), -1)  # Flatten the output for the fully connected layer
        out = self.fc(out)  # Pass through the fully connected layer to get class scores
        return out


# Conv1DNet

This code defines a Convolutional Neural Network (CNN) model using 1D convolutions for classification. The model consists of two convolutional layers and a fully connected output layer. Each convolutional layer applies a 1D convolution followed by ReLU activation and max pooling. The output of the last convolutional layer is flattened and then passed through the fully connected layer to produce class scores.

In [12]:
class Conv1DNet(nn.Module):
    def __init__(self, kernels, classes=10):
        super(Conv1DNet, self).__init__()
        
        # First convolutional layer: applies a 1D convolution, followed by ReLU activation and max pooling
        self.layer1 = nn.Sequential(
            nn.Conv1d(1, kernels[0], kernel_size=5, stride=1, padding=2),  # 1D convolution
            nn.ReLU(),  # ReLU activation
            nn.MaxPool1d(kernel_size=2, stride=2)  # Max pooling
        )
        
        # Second convolutional layer: similar to the first but takes the output of the first layer as input
        self.layer2 = nn.Sequential(
            nn.Conv1d(kernels[0], kernels[1], kernel_size=5, stride=1, padding=2),  # 1D convolution
            nn.ReLU(),  # ReLU activation
            nn.MaxPool1d(kernel_size=2, stride=2)  # Max pooling
        )
        
        # Fully connected (linear) layer: takes the flattened output of the second convolutional layer and outputs class scores
        self.fc = nn.Linear(6272, classes)
        
    def forward(self, x):
        x = x.view(x.size(0), 1, -1)  # Flatten the 2D image to 1D for Conv1D
        out = self.layer1(x)  # Pass through the first layer
        out = self.layer2(out)  # Pass through the second layer
        out = out.view(out.size(0), -1)  # Flatten the output for the fully connected layer
        out = self.fc(out)  # Pass through the fully connected layer to get class scores
        return out

In [13]:
# Code to get the total amount of featers for the conv1d model
# Initialize the model
model = Conv1DNet(config['kernels'], config['classes']).to(device)

# Create a dummy input with the same size as your actual input data
dummy_input = torch.randn(config['batch_size'], 1, 28*28)

# Pass the dummy input through all layers except the Linear layer
out = model.layer1(dummy_input)
out = model.layer2(out)

# Calculate the total number of features
total_features = out.shape[1] * out.shape[2]

print(f'The total number of features is: {total_features}')

The total number of features is: 6272


# Training models

In [14]:
def train(model, loader, criterion, optimizer, config):
    # Tell wandb to watch what the model gets up to: gradients, weights, and more!
    wandb.watch(model, criterion, log="all", log_freq=10)

    # Run training and track with wandb
    total_batches = len(loader) * config.epochs
    example_ct = 0  # number of examples seen
    batch_ct = 0
    for epoch in tqdm(range(config.epochs)):
        for _, (images, labels) in enumerate(loader):

            loss = train_batch(images, labels, model, optimizer, criterion)
            example_ct +=  len(images)
            batch_ct += 1

            # Report metrics every 25th batch
            if ((batch_ct + 1) % 25) == 0:
                train_log(loss, example_ct, epoch)


def train_batch(images, labels, model, optimizer, criterion):
    images, labels = images.to(device), labels.to(device)
    
    # Forward pass 
    outputs = model(images)
    loss = criterion(outputs, labels)
    
    # Backward pass
    optimizer.zero_grad()
    loss.backward()

    # Step with optimizer
    optimizer.step()

    return loss

In [15]:
def train_log(loss, example_ct, epoch):
    # Where the magic happens
    wandb.log({"epoch": epoch, "loss": loss}, step=example_ct)
    print(f"Loss after {str(example_ct).zfill(5)} examples: {loss:.8f}")

In [16]:
def test(model, test_loader):
    model.eval()

    # Run the model on some test examples
    with torch.no_grad():
        correct, total = 0, 0
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        print(f"Accuracy of the model on the {total} " +
              f"test images: {correct / total:%}")
        
        wandb.log({"test_accuracy": correct / total})

    # Save the model in the exchangeable ONNX format
    torch.onnx.export(model, images, "model.onnx")
    wandb.save("model.onnx")
    

In [17]:
# Build, train and analyze the model with the pipeline
model = model_pipeline(config)

MLPv3(
  (layer1): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=784, out_features=128, bias=True)
    (2): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): LeakyReLU(negative_slope=0.01)
  )
  (layer2): Sequential(
    (0): Linear(in_features=128, out_features=64, bias=True)
    (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): LeakyReLU(negative_slope=0.01)
  )
  (layer3): Sequential(
    (0): Linear(in_features=64, out_features=32, bias=True)
    (1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): LeakyReLU(negative_slope=0.01)
  )
  (layer4): Sequential(
    (0): Linear(in_features=32, out_features=16, bias=True)
    (1): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): Sigmoid()
  )
  (layer5): Sequential(
    (0): Linear(in_features=16, out_features=10, bias=True)
  )
)


  0%|          | 0/20 [00:00<?, ?it/s]

Loss after 03072 examples: 2.28107810
Loss after 06272 examples: 2.11455154
Loss after 09472 examples: 2.03084850
Loss after 12640 examples: 1.96760666
Loss after 15840 examples: 1.89694870
Loss after 19040 examples: 1.86847055
Loss after 22240 examples: 1.80164826
Loss after 25408 examples: 1.72789240
Loss after 28608 examples: 1.64718163
Loss after 31808 examples: 1.55817318
Loss after 35008 examples: 1.48586202
Loss after 38176 examples: 1.41408932
Loss after 41376 examples: 1.30658150
Loss after 44576 examples: 1.21463144
Loss after 47776 examples: 1.12426674
Loss after 50944 examples: 1.06831110
Loss after 54144 examples: 1.00182259
Loss after 57344 examples: 0.90050596
Loss after 60512 examples: 0.78170323
Loss after 63712 examples: 0.76844156
Loss after 66912 examples: 0.64471918
Loss after 70112 examples: 0.61719620
Loss after 73280 examples: 0.52422452
Loss after 76480 examples: 0.53971839
Loss after 79680 examples: 0.48502189
Loss after 82880 examples: 0.43508470
Loss after 8

VBox(children=(Label(value='0.434 MB of 0.434 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇████
loss,█▇▇▇▆▆▅▅▄▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁

0,1
epoch,19.0
loss,0.12905
test_accuracy,0.966
