# LAB 3.1 - CNS (Sequential MNIST)

Import of libraries, fix of random seed and device.


In [2]:
import json
import os
import random
import numpy as np
import itertools
from typing import Callable
from tqdm.notebook import tqdm
import torch
from torchvision import datasets
import pandas as pd

TABLE_FILE_PATH = f'sequential_mnist/variables/accuracy_table.json'

seed = 0
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)

device = 'cuda'

# Bonus track 2 & 4 - Sequential MNIST classification task & benchmarking RNN models on the sequential MNIST task

Function able to download and get tensors related to MNIST data and labels of train and test set.

In [3]:
def download_mnist() -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
    """
    Function able to download MNIST dataset and return it.

    returns:
        tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: Training data and labels and test data and labels of MNIST dataset.
    """
    mnist_dir = 'MNIST/'
    if not os.path.exists(mnist_dir):
        os.mkdir(mnist_dir)
    TR_MNIST = datasets.MNIST(root=f'{mnist_dir}', train=True, download=True, transform=None)
    TS_MNIST = datasets.MNIST(root=f'{mnist_dir}', train=False, download=True, transform=None)
    preprocess_x = lambda x: x.reshape(-1, 28 * 28, 1).transpose(0, 1).contiguous().type(torch.float32).to(device) / 255
    return \
      preprocess_x(TR_MNIST.train_data).to(device), \
      TR_MNIST.train_labels.to(device), \
      preprocess_x(TS_MNIST.test_data).to(device), \
      TS_MNIST.test_labels.to(device)


TR_DATA_MNIST, TR_LABELS_MNIST, TS_DATA_MNIST, TS_LABELS_MNIST = download_mnist()

TR_DATA_MNIST.shape, TR_LABELS_MNIST.shape, TS_DATA_MNIST.shape, TS_LABELS_MNIST.shape

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to MNIST/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 90149356.43it/s]


Extracting MNIST/MNIST/raw/train-images-idx3-ubyte.gz to MNIST/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to MNIST/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 112579641.10it/s]

Extracting MNIST/MNIST/raw/train-labels-idx1-ubyte.gz to MNIST/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to MNIST/MNIST/raw/t10k-images-idx3-ubyte.gz



100%|██████████| 1648877/1648877 [00:00<00:00, 25887380.63it/s]


Extracting MNIST/MNIST/raw/t10k-images-idx3-ubyte.gz to MNIST/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to MNIST/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 22151777.64it/s]

Extracting MNIST/MNIST/raw/t10k-labels-idx1-ubyte.gz to MNIST/MNIST/raw






(torch.Size([784, 60000, 1]),
 torch.Size([60000]),
 torch.Size([784, 10000, 1]),
 torch.Size([10000]))

Function able to compute the accuracy metric.

In [4]:
def accuracy(out: torch.Tensor, pred: torch.Tensor) -> float:
    """
    Function that compute accuracy given an output and prediction tensor.

    out: Output tensor.
    pred: Prediction tensor.

    returns:
        float: Computed accuracy value.
    """
    return (sum(pred.argmax(-1) - out == 0) / len(out)).item()

Train function able to fit a model given in input.

In [5]:
def train(
        model: torch.nn.Module,
        TR: tuple[torch.Tensor, torch.Tensor],
        TS: tuple[torch.Tensor, torch.Tensor],
        epochs: int = 10,
        batch_size: int = 64,
        sgd_config: dict = {},
        tqdm=None,
        verbose: bool = False,
) -> tuple:
    """
    Function able to train a given model.

    model: Model to train.
    TR: Tuple composed by X train and Y train torch tensors.
    TS: Tuple composed by X test and Y test torch tensors.
    epochs: Number of epochs of training.
    batch_size: Dimension of batch.
    sgd_config: Dictionary containing sgd configurations (lr and momentum).
    tqdm: TQDM object to show the progressbar. It is None when progressbar is not shown.

    returns:
        tuple: Results of training. In particular the tuple is composed by 2 variables:
            - train_accuracy: List of accuracy of training set computed for each epoch.
            - test_accuracy: List of accuracy of test set computed for each epoch.
    """
    optimizer = torch.optim.Adam(model.parameters(), **sgd_config)
    criterion = torch.nn.CrossEntropyLoss()
    train_accuracy, test_accuracy = None, None
    X_TR, Y_TR = TR
    X_TS, Y_TS = TS
    model.eval()

    iterable = range(epochs)
    if tqdm is not None:
        iterable = tqdm(iterable)
    
    n_batches_tr = int(X_TR.shape[1] / batch_size)
    n_batches_ts = int(X_TS.shape[1] / batch_size)

    for epoch in iterable:
        model.train()
        train_batch_accuracy = 0
        for i in range(n_batches_tr):
            start_batch = i * batch_size
            end_batch = (i + 1) * batch_size
            optimizer.zero_grad()
            pred_tr = model(X_TR[:, start_batch: end_batch])
            TR_LABEL_BATCH = Y_TR[start_batch: end_batch]
            loss_tr = criterion(pred_tr, TR_LABEL_BATCH)
            loss_tr.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1)
            optimizer.step()
            train_batch_accuracy += accuracy(TR_LABEL_BATCH, pred_tr)
        train_accuracy = train_batch_accuracy / n_batches_tr

        model.eval()
        test_batch_accuracy = 0
        for i in range(n_batches_ts):
            start_batch = i * batch_size
            end_batch = (i + 1) * batch_size
            pred_vl = model(X_TS[:, start_batch: end_batch])
            test_batch_accuracy += accuracy(Y_TS[start_batch: end_batch], pred_vl)
        test_accuracy = test_batch_accuracy / n_batches_ts

        if verbose:
          print(f'epoch: {epoch + 1}/{epochs}', f'train_accuracy: {train_accuracy}', f'test_accuracy: {test_accuracy}', sep='\t')

    return train_accuracy, test_accuracy

Gridsearch function able to find the best configuration for a model created in a `train_func` function callback, train the model with the best configuration and test it on test set.

In [6]:
def gridsearch(
        train_func: Callable,
        configs: dict,
        TR: tuple[torch.Tensor, torch.Tensor],
        TS: tuple[torch.Tensor, torch.Tensor],
        epochs: int = 100,
        vl_portion: float = 0.2,
        attempts_for_config: int = 1,
        Ng: int = 1,
        verbose: bool = False,
) -> tuple:
    """
    Gridsearch function able to find the best hyperparameters configuration, train the model with the best config and test it.

    train_func: Function able to create a model and train it given a config, a train and validation set and a number of epochs.
    configs: Hyperparameters configurations to investigate to find the best one that minimizes the loss on validation set. In particular this is a dictionary of lists for each hyperparam to investigate that is transformed by this function in a list of dictionaries.
    TR: Training set data (X, Y).
    TS: test set data (X, Y).
    epochs: Number of epochs of training both for model selection and model evaluation.
    vl_portion: Portion of example to use in validation set of model selection phase. It is useful to split training set in training and validation set.
    attempts_for_config: Number of attempts to do for each configuration. The loss that it's minimized is the mean of each loss of each attempt.
    Ng: Number of attempts in model assessment.

    returns: A tuple of 4 variables related to the result of training function during the model evaluation phase (mean and std of training and ts accuracy).
    """
    if isinstance(configs, dict):
        configs = [dict(zip(configs.keys(), t)) for t in itertools.product(*configs.values())]
    best_config = {}
    best_accuracy = None
    X_TR, Y_TR = TR
    vl_size = int(X_TR.shape[1] * vl_portion)
    for i, config in enumerate(tqdm(configs, desc='model evaluation')):
        vl_accuracy = 0
        for j in range(attempts_for_config):
            _, eval_accuracy = train_func(
                config,
                (X_TR[:, :-vl_size], Y_TR[:-vl_size]),
                (X_TR[:, -vl_size:], Y_TR[-vl_size:]),
                epochs=epochs,
                verbose=verbose,
            )
            vl_accuracy += eval_accuracy
        vl_accuracy /= attempts_for_config
        print(f'{i + 1}/{len(configs)} - Tried config {config} with accuracy {vl_accuracy}')
        if best_accuracy is None or vl_accuracy > best_accuracy:
            best_config = config
            best_accuracy = vl_accuracy
    print(f'Best config: {best_config} with accuracy {best_accuracy}')

    print('Retraining...')
    train_accuracies, test_accuracies = [], []
    for i in tqdm(range(Ng), desc='model assessment'):
        tr_accuracy, ts_accuracy = train_func(
            best_config,
            TR,
            TS,
            epochs=epochs,
            verbose=verbose,
        )
        train_accuracies.append(tr_accuracy)
        test_accuracies.append(ts_accuracy)
    train_accuracy_mean = np.mean(train_accuracies)
    train_accuracy_std = np.std(train_accuracies)
    test_accuracy_mean = np.mean(test_accuracies)
    test_accuracy_std = np.std(test_accuracies)

    return train_accuracy_mean, train_accuracy_std, test_accuracy_mean, test_accuracy_std

### RNN Model


Antisymmetric rnn layer built as a torch module used to construct an antisymmetric recurrent neural network.

In [7]:
class AntisymmetricRNNLayer(torch.nn.Module):
    """
    Antisymmetric rnn layer class.
    """

    def __init__(
            self,
            input_size: int,
            hidden_size: int,
            eps: float = 0.01,
            diffusion_coef: float = 0.01,
            num_layers: int = 1,
            bidirectional: bool = False,
            nonlinearity: str = 'tanh'
    ) -> None:
        """
        Antisymmetric rnn layer constructor.

        input_size: Input size.
        hidden_size: Hidden size.
        eps: Step size parameter.
        diffusion_coef: Diffusion coefficient of antisymmetric layer.
        num_layers: Number of layers to have a deep version of the model.
        bidirectional: Flag to create the bidirectional version of the model.
        nonlinearity: Non linearity function.
        """
        super(AntisymmetricRNNLayer, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.bidirectional = bidirectional
        self.D = 2 if self.bidirectional else 1
        self.nonlinearity = self.__map_nonlinearity(nonlinearity)
        self.diffusion = diffusion_coef * torch.eye(hidden_size).to(device)
        self.eps = eps
        self.weight_in, self.weight_hh, self.bias = self.__init_weights()

    def __init_weights(self) -> tuple[list, list, list]:
        """
        Private method able to initialize model layers.

        returns:
          tuple[list, list, list]: Lists of layers:
            - weight_in: Input weights.
            - weight_hh: Hidden weights.
            - bias: Bias.
        """
        weight_in = [
            torch.nn.Parameter(torch.randn(self.input_size if i < self.D else self.hidden_size, self.hidden_size)).to(device)
            for i in range(self.num_layers * self.D)
        ]
        weight_hh = [
            torch.nn.Parameter(torch.randn(self.hidden_size, self.hidden_size)).to(device)
            for _ in range(self.num_layers * self.D)
        ]
        bias = [
            torch.nn.Parameter(torch.randn(1, self.hidden_size)).to(device)
            for _ in range(self.num_layers * self.D)
        ]
        return weight_in, weight_hh, bias
    
    def __map_nonlinearity(self, nonlinearity_name: str) -> Callable:
        """
        Function able to map the nonlinearity_name in its function.

        nonlinearity_name: Name of nonlinearity to map.

        returns:
            Nonlinearity function.
        """
        if nonlinearity_name == 'tanh':
            return torch.nn.functional.tanh
        elif nonlinearity_name == 'relu':
            return torch.nn.functional.relu

    def forward(
            self,
            ts: torch.Tensor,
            H: torch.Tensor = None
    ) -> tuple[torch.Tensor, torch.Tensor]:
        """
        Forward function used to the forward phase of pytorch module.

        ts: Time series input data.
        H: Previous hidden state.

        returns:
          tuple[torch.Tensor, torch.Tensor]: Output data:
            - output: Output states of last layer.
            - hidden: Hidden states of last time steps of each layer.
        """
        layer_states = None
        if H is None:
            H = torch.zeros(self.num_layers * self.D, ts.shape[1], self.hidden_size).to(device)
        if self.bidirectional:
            ts = torch.cat((ts, ts), dim=-1)
        for l in range(0, self.num_layers, self.D):
            dim_split = int(ts.shape[-1] / self.D)
            layer_states = self.__forward_layer(ts[:, :, :dim_split], H, l)
            if self.bidirectional:
                layer_states_2 = self.__forward_layer(ts[:, :, dim_split:].flip(0), H, l + 1)
                layer_states = torch.cat((layer_states, layer_states_2.flip(0)), dim=-1)
            ts = layer_states
        return layer_states, H

    def __forward_layer(
            self,
            ts: torch.Tensor,
            H: torch.Tensor,
            l: int
    ) -> torch.Tensor:
        """
        Private method able to run. the forward for a single layer.

        ts: Time series in input.
        H: hidden states.
        l: number of layer.

        returns:
          torch.Tensor: Hidden states computed.
        """
        layer_states = []
        for x in ts:
            H_old = H[l].clone()
            H[l] = H_old + self.eps * self.nonlinearity(
                x @ self.weight_in[l] + H_old @ (self.weight_hh[l] - self.weight_hh[l].T - self.diffusion) + self.bias[l]
            )
            layer_states.append(H[l])
        return torch.stack(layer_states)

RNN pytorch model. This is able to take in input the parameter `recurrent_layer` that defines the type of recurrent layer. In this notebook are used:
- torch.nn.RNN
- torch.nn.LSTM
- torch.nn.GRU
- AntisymmetricRNNLayer (custom layer)

In [8]:
class RNN(torch.nn.Module):
    """
    Class of RNN model.
    """

    def __init__(
            self,
            input_size: int,
            hidden_size: int,
            output_size: int,
            recurrent_layer: torch.nn.RNN = torch.nn.RNN,
            n_layers: int = 1,
            bidirectional= False,
            device: str = 'cpu',
    ) -> None:
        """
        RNN constructor method.

        input_size: Size of input value.
        hidden_size: Size of hidden state.
        output_size: Size of output value.
        n_layers: Number of hidden layers. Default this is 1.
        device: Name of device to use for computation.
        """
        super(RNN, self).__init__()
        self.recoursive_layer = recurrent_layer(
            input_size,
            hidden_size,
            num_layers=n_layers,
            bidirectional=bidirectional,
        ).to(device)
        D = 2 if bidirectional else 1
        self.output_layer = torch.nn.Linear(hidden_size * D, output_size).to(device)
        self.recurrent_states = None

    def forward(self, X: torch.Tensor) -> torch.Tensor:
        """
        Forward function used to the forward phase of pytorch module.

        X: Input data.

        returns:
            torch.Tensor: Output data.
        """
        out_state, recurrent_states = self.recoursive_layer(X, self.recurrent_states)
        return self.output_layer(out_state[-1])


RNN(1, 100, 1)

RNN(
  (recoursive_layer): RNN(1, 100)
  (output_layer): Linear(in_features=100, out_features=1, bias=True)
)

RNN train function.

In [9]:
def train_rnn(
        config: dict,
        TR: tuple[torch.Tensor, torch.Tensor],
        TS: tuple[torch.Tensor, torch.Tensor],
        epochs: int = 10,
        batch_size: int = 64,
        tqdm=None,
        verbose=False,
) -> tuple:
    """
    Function used to train the RNN model. It wraps the general train function.

    config: Dictionary of hyperparameters.
    TR: Training set.
    TS: Test set.
    epochs: Number of epochs.
    batch_size: Dimension of a batch.
    tqdm: Object used to show the progressbar.

    returns:
        tuple: Train results.
    """
    model = RNN(TR[0].shape[-1], config['hidden_size'], TR[1].shape[-1], recurrent_layer=config['recurrent_layer'],
                n_layers=config['n_layers'], bidirectional=config['bidirectional'], device=device)
    return train(model, TR, TS, epochs=epochs, tqdm=tqdm, batch_size=config['batch_size'], sgd_config={
        'lr': config['lr'],
    }, verbose=verbose)

Function able to perform RNN gridsearch.

In [10]:
def perform_rnn_gs(
        TR: tuple[torch.Tensor, torch.Tensor],
        TS: tuple[torch.Tensor, torch.Tensor],
        recurrent_layer: torch.nn.Module = None,
        lr: float = None,
        epochs: int = None,
        save_name: str = None,
        verbose: bool = False,
) -> None:
    """
    Function able to perform RNN gridsearch.

    TR: Training set.
    TS: Test set.
    recurrent_layer: Recurrent layer module.
    lr: Gridsearch learning rate.
    epochs: Gridsearch epochs.
    save_name: Name given to the file where is saved the model.
    """
    tr_acc_mean, tr_acc_std, ts_acc_mean, ts_acc_std = gridsearch(
        train_func=train_rnn,
        configs=dict(
            n_layers=[1, 2],
            bidirectional=[False, True],
            lr=[0.001 if lr is None else lr],
            hidden_size=[10],
            recurrent_layer=[recurrent_layer],
            batch_size=[512],
        ),
        TR=TR,
        TS=TS,
        epochs=500 if epochs is None else epochs,
        vl_portion=0.2,
        attempts_for_config=1,
        Ng=5,
        verbose=verbose,
    )
    accuracy_results = dict(
        training_accuracy_mean=tr_acc_mean,
        training_accuracy_std=tr_acc_std,
        test_accuracy_mean=ts_acc_mean,
        test_accuracy_std=ts_acc_std,
    )
    print('Accuracy results')
    print(json.dumps(accuracy_results, indent=2))
    if save_name is not None:
      if os.path.exists(TABLE_FILE_PATH):
          with open(TABLE_FILE_PATH) as file:
              accuracy_table = json.load(file)
      else:
          accuracy_table = {}
      accuracy_table[save_name] = accuracy_results
      with open(TABLE_FILE_PATH, 'w') as file:
          json.dump(accuracy_table, file)

## Sequential MNIST model selection and model evaluation results

### Vanilla RNN

In [None]:
perform_rnn_gs(
    (TR_DATA_MNIST, TR_LABELS_MNIST),
    (TS_DATA_MNIST, TS_LABELS_MNIST),
    recurrent_layer=torch.nn.RNN,
    save_name='rnn_seq_mnist'
)

model evaluation:   0%|          | 0/4 [00:00<?, ?it/s]

1/4 - Tried config {'n_layers': 1, 'bidirectional': False, 'lr': 0.001, 'hidden_size': 10, 'recurrent_layer': <class 'torch.nn.modules.rnn.RNN'>, 'batch_size': 512} with accuracy 0.35886548913043476
2/4 - Tried config {'n_layers': 1, 'bidirectional': True, 'lr': 0.001, 'hidden_size': 10, 'recurrent_layer': <class 'torch.nn.modules.rnn.RNN'>, 'batch_size': 512} with accuracy 0.4129585597826087
3/4 - Tried config {'n_layers': 2, 'bidirectional': False, 'lr': 0.001, 'hidden_size': 10, 'recurrent_layer': <class 'torch.nn.modules.rnn.RNN'>, 'batch_size': 512} with accuracy 0.41618546195652173
4/4 - Tried config {'n_layers': 2, 'bidirectional': True, 'lr': 0.001, 'hidden_size': 10, 'recurrent_layer': <class 'torch.nn.modules.rnn.RNN'>, 'batch_size': 512} with accuracy 0.3784816576086957
Best config: {'n_layers': 2, 'bidirectional': False, 'lr': 0.001, 'hidden_size': 10, 'recurrent_layer': <class 'torch.nn.modules.rnn.RNN'>, 'batch_size': 512} with accuracy 0.41618546195652173
Retraining...


model assessment:   0%|          | 0/5 [00:00<?, ?it/s]

Accuracy results
{
  "training_accuracy_mean": 0.39903178418803414,
  "training_accuracy_std": 0.04037131894211016,
  "test_accuracy_mean": 0.40133634868421053,
  "test_accuracy_std": 0.03460252556727745
}


### LSTM

In [None]:
perform_rnn_gs(
    (TR_DATA_MNIST, TR_LABELS_MNIST),
    (TS_DATA_MNIST, TS_LABELS_MNIST),
    recurrent_layer=torch.nn.LSTM,
    save_name='lstm_seq_mnist'
)

model evaluation:   0%|          | 0/4 [00:00<?, ?it/s]

1/4 - Tried config {'n_layers': 1, 'bidirectional': False, 'lr': 0.001, 'hidden_size': 10, 'recurrent_layer': <class 'torch.nn.modules.rnn.LSTM'>, 'batch_size': 512} with accuracy 0.5734544836956522
2/4 - Tried config {'n_layers': 1, 'bidirectional': True, 'lr': 0.001, 'hidden_size': 10, 'recurrent_layer': <class 'torch.nn.modules.rnn.LSTM'>, 'batch_size': 512} with accuracy 0.44344429347826086
3/4 - Tried config {'n_layers': 2, 'bidirectional': False, 'lr': 0.001, 'hidden_size': 10, 'recurrent_layer': <class 'torch.nn.modules.rnn.LSTM'>, 'batch_size': 512} with accuracy 0.8840013586956522
4/4 - Tried config {'n_layers': 2, 'bidirectional': True, 'lr': 0.001, 'hidden_size': 10, 'recurrent_layer': <class 'torch.nn.modules.rnn.LSTM'>, 'batch_size': 512} with accuracy 0.9245074728260869
Best config: {'n_layers': 2, 'bidirectional': True, 'lr': 0.001, 'hidden_size': 10, 'recurrent_layer': <class 'torch.nn.modules.rnn.LSTM'>, 'batch_size': 512} with accuracy 0.9245074728260869
Retraining...

model assessment:   0%|          | 0/5 [00:00<?, ?it/s]

Accuracy results
{
  "training_accuracy_mean": 0.9259915865384615,
  "training_accuracy_std": 0.041518066858795596,
  "test_accuracy_mean": 0.9243832236842107,
  "test_accuracy_std": 0.04214340289538302
}


### GRU

In [None]:
perform_rnn_gs(
    (TR_DATA_MNIST, TR_LABELS_MNIST),
    (TS_DATA_MNIST, TS_LABELS_MNIST),
    recurrent_layer=torch.nn.GRU,
    save_name='gru_seq_mnist'
)

model evaluation:   0%|          | 0/4 [00:00<?, ?it/s]

1/4 - Tried config {'n_layers': 1, 'bidirectional': False, 'lr': 0.001, 'hidden_size': 10, 'recurrent_layer': <class 'torch.nn.modules.rnn.GRU'>, 'batch_size': 512} with accuracy 0.6815557065217391
2/4 - Tried config {'n_layers': 1, 'bidirectional': True, 'lr': 0.001, 'hidden_size': 10, 'recurrent_layer': <class 'torch.nn.modules.rnn.GRU'>, 'batch_size': 512} with accuracy 0.802734375
3/4 - Tried config {'n_layers': 2, 'bidirectional': False, 'lr': 0.001, 'hidden_size': 10, 'recurrent_layer': <class 'torch.nn.modules.rnn.GRU'>, 'batch_size': 512} with accuracy 0.9352921195652174
4/4 - Tried config {'n_layers': 2, 'bidirectional': True, 'lr': 0.001, 'hidden_size': 10, 'recurrent_layer': <class 'torch.nn.modules.rnn.GRU'>, 'batch_size': 512} with accuracy 0.9546535326086957
Best config: {'n_layers': 2, 'bidirectional': True, 'lr': 0.001, 'hidden_size': 10, 'recurrent_layer': <class 'torch.nn.modules.rnn.GRU'>, 'batch_size': 512} with accuracy 0.9546535326086957
Retraining...


model assessment:   0%|          | 0/5 [00:00<?, ?it/s]

Accuracy results
{
  "training_accuracy_mean": 0.9425647702991453,
  "training_accuracy_std": 0.03019879549669112,
  "test_accuracy_mean": 0.9409128289473685,
  "test_accuracy_std": 0.027277307854995908
}


### Antisymmetric RNN

In [None]:
perform_rnn_gs(
    (TR_DATA_MNIST, TR_LABELS_MNIST),
    (TS_DATA_MNIST, TS_LABELS_MNIST),
    recurrent_layer=AntisymmetricRNNLayer,
    lr=0.1,
    epochs=10,
    save_name='antisymmetric_rnn_seq_mnist'
)

model evaluation:   0%|          | 0/4 [00:00<?, ?it/s]

1/4 - Tried config {'n_layers': 1, 'bidirectional': False, 'lr': 0.1, 'hidden_size': 10, 'recurrent_layer': <class '__main__.AntisymmetricRNNLayer'>, 'batch_size': 512} with accuracy 0.559952445652174
2/4 - Tried config {'n_layers': 1, 'bidirectional': True, 'lr': 0.1, 'hidden_size': 10, 'recurrent_layer': <class '__main__.AntisymmetricRNNLayer'>, 'batch_size': 512} with accuracy 0.7094938858695652
3/4 - Tried config {'n_layers': 2, 'bidirectional': False, 'lr': 0.1, 'hidden_size': 10, 'recurrent_layer': <class '__main__.AntisymmetricRNNLayer'>, 'batch_size': 512} with accuracy 0.3997961956521739
4/4 - Tried config {'n_layers': 2, 'bidirectional': True, 'lr': 0.1, 'hidden_size': 10, 'recurrent_layer': <class '__main__.AntisymmetricRNNLayer'>, 'batch_size': 512} with accuracy 0.7060971467391305
Best config: {'n_layers': 1, 'bidirectional': True, 'lr': 0.1, 'hidden_size': 10, 'recurrent_layer': <class '__main__.AntisymmetricRNNLayer'>, 'batch_size': 512} with accuracy 0.7094938858695652


model assessment:   0%|          | 0/5 [00:00<?, ?it/s]

Accuracy results
{
  "training_accuracy_mean": 0.6714910523504274,
  "training_accuracy_std": 0.01752997165844483,
  "test_accuracy_mean": 0.6799753289473685,
  "test_accuracy_std": 0.01585305779459319
}


## Permuted sequential MNIST model selection and model evaluation results

MNIST dataset permutation of data to perform permuted sequential MNSIT.

In [11]:
perm = np.random.permutation(28 * 28)

TR_DATA_PMNIST = TR_DATA_MNIST[perm]
TS_DATA_PMNIST = TS_DATA_MNIST[perm]

TR_DATA_PMNIST.shape, TS_DATA_PMNIST.shape

(torch.Size([784, 60000, 1]), torch.Size([784, 10000, 1]))

### Vanilla RNN

In [None]:
perform_rnn_gs(
    (TR_DATA_PMNIST, TR_LABELS_MNIST),
    (TS_DATA_PMNIST, TS_LABELS_MNIST),
    recurrent_layer=torch.nn.RNN,
    save_name='rnn_permuted_mnist'
)

model evaluation:   0%|          | 0/4 [00:00<?, ?it/s]

1/4 - Tried config {'n_layers': 1, 'bidirectional': False, 'lr': 0.001, 'hidden_size': 10, 'recurrent_layer': <class 'torch.nn.modules.rnn.RNN'>, 'batch_size': 512} with accuracy 0.5868716032608695
2/4 - Tried config {'n_layers': 1, 'bidirectional': True, 'lr': 0.001, 'hidden_size': 10, 'recurrent_layer': <class 'torch.nn.modules.rnn.RNN'>, 'batch_size': 512} with accuracy 0.5333729619565217
3/4 - Tried config {'n_layers': 2, 'bidirectional': False, 'lr': 0.001, 'hidden_size': 10, 'recurrent_layer': <class 'torch.nn.modules.rnn.RNN'>, 'batch_size': 512} with accuracy 0.6244904891304348
4/4 - Tried config {'n_layers': 2, 'bidirectional': True, 'lr': 0.001, 'hidden_size': 10, 'recurrent_layer': <class 'torch.nn.modules.rnn.RNN'>, 'batch_size': 512} with accuracy 0.5946841032608695
Best config: {'n_layers': 2, 'bidirectional': False, 'lr': 0.001, 'hidden_size': 10, 'recurrent_layer': <class 'torch.nn.modules.rnn.RNN'>, 'batch_size': 512} with accuracy 0.6244904891304348
Retraining...


model assessment:   0%|          | 0/5 [00:00<?, ?it/s]

Accuracy results
{
  "training_accuracy_mean": 0.6098724626068376,
  "training_accuracy_std": 0.015637687521806356,
  "test_accuracy_mean": 0.61484375,
  "test_accuracy_std": 0.021497444321879677
}


### LSTM

In [12]:
perform_rnn_gs(
    (TR_DATA_PMNIST, TR_LABELS_MNIST),
    (TS_DATA_PMNIST, TS_LABELS_MNIST),
    recurrent_layer=torch.nn.LSTM,
    save_name='lstm_permuted_mnist'
)

model evaluation:   0%|          | 0/4 [00:00<?, ?it/s]

1/4 - Tried config {'n_layers': 1, 'bidirectional': False, 'lr': 0.001, 'hidden_size': 10, 'recurrent_layer': <class 'torch.nn.modules.rnn.LSTM'>, 'batch_size': 512} with accuracy 0.4832710597826087
2/4 - Tried config {'n_layers': 1, 'bidirectional': True, 'lr': 0.001, 'hidden_size': 10, 'recurrent_layer': <class 'torch.nn.modules.rnn.LSTM'>, 'batch_size': 512} with accuracy 0.4779211956521739
3/4 - Tried config {'n_layers': 2, 'bidirectional': False, 'lr': 0.001, 'hidden_size': 10, 'recurrent_layer': <class 'torch.nn.modules.rnn.LSTM'>, 'batch_size': 512} with accuracy 0.6274626358695652
4/4 - Tried config {'n_layers': 2, 'bidirectional': True, 'lr': 0.001, 'hidden_size': 10, 'recurrent_layer': <class 'torch.nn.modules.rnn.LSTM'>, 'batch_size': 512} with accuracy 0.7498301630434783
Best config: {'n_layers': 2, 'bidirectional': True, 'lr': 0.001, 'hidden_size': 10, 'recurrent_layer': <class 'torch.nn.modules.rnn.LSTM'>, 'batch_size': 512} with accuracy 0.7498301630434783
Retraining...


model assessment:   0%|          | 0/5 [00:00<?, ?it/s]

Accuracy results
{
  "training_accuracy_mean": 0.7112179487179487,
  "training_accuracy_std": 0.07035245873901629,
  "test_accuracy_mean": 0.7126644736842105,
  "test_accuracy_std": 0.07001669681200388
}


### GRU

In [None]:
perform_rnn_gs(
    (TR_DATA_PMNIST, TR_LABELS_MNIST),
    (TS_DATA_PMNIST, TS_LABELS_MNIST),
    recurrent_layer=torch.nn.GRU,
    save_name='gru_permuted_mnist'
)

model evaluation:   0%|          | 0/4 [00:00<?, ?it/s]

1/4 - Tried config {'n_layers': 1, 'bidirectional': False, 'lr': 0.001, 'hidden_size': 10, 'recurrent_layer': <class 'torch.nn.modules.rnn.GRU'>, 'batch_size': 512} with accuracy 0.595703125
2/4 - Tried config {'n_layers': 1, 'bidirectional': True, 'lr': 0.001, 'hidden_size': 10, 'recurrent_layer': <class 'torch.nn.modules.rnn.GRU'>, 'batch_size': 512} with accuracy 0.5215692934782609
3/4 - Tried config {'n_layers': 2, 'bidirectional': False, 'lr': 0.001, 'hidden_size': 10, 'recurrent_layer': <class 'torch.nn.modules.rnn.GRU'>, 'batch_size': 512} with accuracy 0.6786684782608695
4/4 - Tried config {'n_layers': 2, 'bidirectional': True, 'lr': 0.001, 'hidden_size': 10, 'recurrent_layer': <class 'torch.nn.modules.rnn.GRU'>, 'batch_size': 512} with accuracy 0.7548403532608695
Best config: {'n_layers': 2, 'bidirectional': True, 'lr': 0.001, 'hidden_size': 10, 'recurrent_layer': <class 'torch.nn.modules.rnn.GRU'>, 'batch_size': 512} with accuracy 0.7548403532608695
Retraining...


model assessment:   0%|          | 0/5 [00:00<?, ?it/s]

Accuracy results
{
  "training_accuracy_mean": 0.7817307692307692,
  "training_accuracy_std": 0.032459793770602756,
  "test_accuracy_mean": 0.7890419407894737,
  "test_accuracy_std": 0.027907218072774208
}


### Antisymmetric RNN

In [None]:
perform_rnn_gs(
    (TR_DATA_PMNIST, TR_LABELS_MNIST),
    (TS_DATA_PMNIST, TS_LABELS_MNIST),
    recurrent_layer=AntisymmetricRNNLayer,
    lr=0.1,
    epochs=10,
    save_name='antisymmetric_rnn_permuted_mnist'
)

model evaluation:   0%|          | 0/4 [00:00<?, ?it/s]

1/4 - Tried config {'n_layers': 1, 'bidirectional': False, 'lr': 0.1, 'hidden_size': 10, 'recurrent_layer': <class '__main__.AntisymmetricRNNLayer'>, 'batch_size': 512} with accuracy 0.43605638586956524
2/4 - Tried config {'n_layers': 1, 'bidirectional': True, 'lr': 0.1, 'hidden_size': 10, 'recurrent_layer': <class '__main__.AntisymmetricRNNLayer'>, 'batch_size': 512} with accuracy 0.5307404891304348
3/4 - Tried config {'n_layers': 2, 'bidirectional': False, 'lr': 0.1, 'hidden_size': 10, 'recurrent_layer': <class '__main__.AntisymmetricRNNLayer'>, 'batch_size': 512} with accuracy 0.3168308423913043
4/4 - Tried config {'n_layers': 2, 'bidirectional': True, 'lr': 0.1, 'hidden_size': 10, 'recurrent_layer': <class '__main__.AntisymmetricRNNLayer'>, 'batch_size': 512} with accuracy 0.5481487771739131
Best config: {'n_layers': 2, 'bidirectional': True, 'lr': 0.1, 'hidden_size': 10, 'recurrent_layer': <class '__main__.AntisymmetricRNNLayer'>, 'batch_size': 512} with accuracy 0.548148777173913

model assessment:   0%|          | 0/5 [00:00<?, ?it/s]

Accuracy results
{
  "training_accuracy_mean": 0.4996227297008547,
  "training_accuracy_std": 0.01264944952827538,
  "test_accuracy_mean": 0.500719572368421,
  "test_accuracy_std": 0.008603728703551222
}


## Table of results

In [9]:
pd.read_json(TABLE_FILE_PATH).T

Unnamed: 0,training_accuracy_mean,training_accuracy_std,test_accuracy_mean,test_accuracy_std
rnn_seq_mnist,0.399032,0.040371,0.401336,0.034603
lstm_seq_mnist,0.925992,0.041518,0.924383,0.042143
gru_seq_mnist,0.942565,0.030199,0.940913,0.027277
antisymmetric_rnn_seq_mnist,0.671491,0.01753,0.679975,0.015853
rnn_permuted_mnist,0.609872,0.015638,0.614844,0.021497
lstm_permuted_mnist,0.711218,0.070352,0.712664,0.070017
gru_permuted_mnist,0.781731,0.03246,0.789042,0.027907
antisymmetric_rnn_permuted_mnist,0.499623,0.012649,0.50072,0.008604
