# Coursework 1

This notebook is intended to be used as a starting point for your experiments. The instructions can be found in the instructions file located under spec/coursework1.pdf. The methods provided here are just helper functions. If you want more complex graphs such as side by side comparisons of different experiments you should learn more about matplotlib and implement them. Before each experiment remember to re-initialize neural network weights and reset the data providers so you get a properly initialized experiment. For each experiment try to keep most hyperparameters the same except the one under investigation so you can understand what the effects of each are.

## Imports

### External Imports

In [None]:
from __future__ import annotations
from typing import TYPE_CHECKING, Any, Callable

import logging
from operator import itemgetter

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')

if TYPE_CHECKING:
    from matplotlib.axes._subplots import AxesSubplot
    from matplotlib.lines import Line2D

    _1DArray = np.ndarray[tuple[int], np.dtype[np.float64]]
    _2DArray = np.ndarray[tuple[int, int], np.dtype[np.float64]]
    _NDArray = np.ndarray[tuple[int, ...], np.dtype[np.float64]]
    _TrainResult = tuple[_2DArray, dict[str, int], float]


### Internal Imports

In [None]:
from mlp.data_providers import DataProvider, EMNISTDataProvider, MNISTDataProvider
from mlp.errors import CrossEntropySoftmaxError
from mlp.initialisers import ConstantInit, GlorotUniformInit
from mlp.layers import AffineLayer, DropoutLayer, ReluLayer, SigmoidLayer, SoftmaxLayer
from mlp.learning_rules import AdamLearningRule, GradientDescentLearningRule
from mlp.models import MultipleLayerModel
from mlp.optimisers import Optimiser
from mlp.penalties import L1Penalty, L2Penalty


## Initialize Variables

### Set up RNG, Logger, and Data

In [None]:
# The below code will set up the data providers, random number
# generator and logger objects needed for training runs. As
# loading the data from file take a little while you generally
# will probably not want to reload the data providers on
# every training run. If you wish to reset their state you
# should instead use the .reset() method of the data providers.

# Seed a random number generator
seed = 11102019
rng = np.random.RandomState(seed)
batch_size = 100
# Set up a logger object to print info about the training run to stdout
logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.handlers = [logging.StreamHandler()]

# Create data provider objects for the MNIST data set
train_data = EMNISTDataProvider("train", batch_size=batch_size, rng=rng)
valid_data = EMNISTDataProvider("valid", batch_size=batch_size, rng=rng)


### Initialize Metavariables

In [None]:
# The model set up code below is provided as a starting point.
# You will probably want to add further code cells for the
# different experiments you run.

# setup hyperparameters
learning_rate = 0.1
num_epochs = 100
stats_interval = 1
# input_dim, output_dim, hidden_dim = 784, 47, 100

# weights_init = GlorotUniformInit(rng=rng)
# biases_init = ConstantInit(0.)
# model = MultipleLayerModel([
#     AffineLayer(input_dim, hidden_dim, weights_init, biases_init),
#     ReluLayer(),
#     AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init),
#     ReluLayer(),
#     AffineLayer(hidden_dim, output_dim, weights_init, biases_init)
# ])

# error = CrossEntropySoftmaxError()
# # Use a basic gradient descent learning rule
# learning_rule = AdamLearningRule()

# #Remember to use notebook=False when you write a script to be run in a terminal
# _ = train_model_and_plot_stats(
#     model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True)


### Restore Heavy Data

In [None]:
data_hidden_dim = None
data_hidden_layer = None

data_dropout = None
data_l1 = None
data_l2 = None

data_combined_1 = None
data_combined_2 = None
data_combined_3 = None
data_combined_4 = None
data_combined_5 = None
data_combined_6 = None
data_combined_7 = None
data_combined_8 = None

data_dropout_slow = None
data_l1_slow = None
data_l2_slow = None

data_combined_slow_1 = None
data_combined_slow_2 = None
data_combined_slow_3 = None
data_combined_slow_4 = None
data_combined_slow_5 = None
data_combined_slow_6 = None

%store -r


## Helper Functions

### Create and Run Models

In [None]:
def run_model(
    hidden_dim: int,
    hidden_layer_count: int,
    *,
    learning_rate: float = 1e-3,
    dropout: None | Callable[[], DropoutLayer] = None,
    penalty: Callable[[], None | L1Penalty | L2Penalty] = lambda: None,
) -> _TrainResult:
    """
    Run different models specified in the question.

    Args:
        hidden_dim (int):
            Hidden layer dimension
        hidden_layer_count (int):
            Number of hidden layers. At least 1 hidden layer is required
        learning_rate (float):
            Learning rate. Default 1e-3
        dropout (None | (() -> DropoutLayer)):
            Dropout layer constructor. If `None`, no dropout layers are
            included
        penalty (() -> (None | L1Penalty | L2Penalty)):
            Penalty constructor. If the constructor returns `None`, no penalty
            is applied

    Returns:
        (np.ndarray):
            2D array of statistics
        (dict[str, int]):
            Key-index mapping of all the indices
        (float):
            Run time for this model
    """
    if hidden_layer_count < 1:
        raise ValueError("There must be at least one hidden layer")

    input_dim, output_dim = 784, 47

    weights_init = GlorotUniformInit(rng=rng)
    biases_init = ConstantInit(0.0)

    layers: list[object] = [
        AffineLayer(
            input_dim,
            hidden_dim,
            weights_init,
            biases_init,
            weights_penalty=penalty(),
            biases_penalty=penalty(),
        ),
        ReluLayer(),
    ]
    if dropout is not None:
        layers.append(dropout())
    for _ in range(hidden_layer_count - 1):
        layers += [
            AffineLayer(
                hidden_dim,
                hidden_dim,
                weights_init,
                biases_init,
                weights_penalty=penalty(),
                biases_penalty=penalty(),
            ),
            ReluLayer(),
        ]
        if dropout is not None:
            layers.append(dropout())
    layers.append(
        AffineLayer(
            hidden_dim,
            output_dim,
            weights_init,
            biases_init,
            weights_penalty=penalty(),
            biases_penalty=penalty(),
        )
    )
    model = MultipleLayerModel(layers)

    error = CrossEntropySoftmaxError()
    # Use a basic gradient descent learning rule
    learning_rule = AdamLearningRule(learning_rate=learning_rate)

    # Remember to use notebook=False when you write a script to be run in a terminal
    return train_model_and_plot_stats(
        model,
        error,
        learning_rule,
        train_data,
        valid_data,
        num_epochs,
        stats_interval,
        notebook=True,
    )


### Train Models and Plot Stats

In [None]:
def train_model_and_plot_stats(
    model: MultipleLayerModel,
    error: CrossEntropySoftmaxError,
    learning_rule: GradientDescentLearningRule,
    train_data: DataProvider,
    valid_data: DataProvider,
    num_epochs: int,
    stats_interval: int,
    notebook: bool = True,
) -> _TrainResult:
    """
    Train model and plot related statistics.

    Args:
        model (MultipleLayerModel):
            A model that is to be trained
        error (CrossEntropySoftmaxError):
            Error calculator
        learning_rule (GradientDescentLearningRule):
            Learning rule
        train_data (DataProvider):
            Data to be used for training
        valid_data (DataProvider):
            Data to be used for validation
        num_epochs (int):
            Number of epochs
        stats_interval (int):
            How often training stats are recorded. Stats are recorded every
            `stats_interval` epochs
        notebook (bool):
            Whether the code is run in a Jupyter notebook. Default `True`

    Returns:
        (np.ndarray):
            2D array of statistics
        (dict[str, int]):
            Key-index mapping of all the indices
        (float):
            Run time for this model
    """
    # As well as monitoring the error over training also monitor classification
    # accuracy i.e. proportion of most-probable predicted classes being equal to targets
    data_monitors: dict[str, Callable[[_NDArray, _NDArray], float]] = {
        "acc": lambda y, t: (y.argmax(-1) == t.argmax(-1)).mean()
    }

    # Use the created objects to initialise a new Optimiser instance.
    optimiser = Optimiser(
        model,
        error,
        learning_rule,
        train_data,
        valid_data,
        data_monitors,
        notebook=notebook,
    )

    # Run the optimiser for 5 epochs (full passes through the training set)
    # printing statistics every epoch.
    stats: _2DArray
    keys: dict[str, int]
    stats, keys, run_time = optimiser.train(
        num_epochs=num_epochs, stats_interval=stats_interval
    )

    # # Plot the change in the validation and training set error over training.
    # fig_1 = plt.figure(figsize=(8, 4))
    # ax_1 = fig_1.add_subplot(111)
    # for k in ['error(train)', 'error(valid)']:
    #     ax_1.plot(np.arange(1, stats.shape[0]) * stats_interval,
    #               stats[1:, keys[k]], label=k)
    # ax_1.legend(loc=0)
    # ax_1.set_xlabel('Epoch number')

    # # Plot the change in the validation and training set accuracy over training.
    # fig_2 = plt.figure(figsize=(8, 4))
    # ax_2 = fig_2.add_subplot(111)
    # for k in ['acc(train)', 'acc(valid)']:
    #     ax_2.plot(np.arange(1, stats.shape[0]) * stats_interval,
    #               stats[1:, keys[k]], label=k)
    # ax_2.legend(loc=0)
    # ax_2.set_xlabel('Epoch number')

    # return stats, keys, run_time, fig_1, ax_1, fig_2, ax_2
    return stats, keys, run_time


### Generate Pretty Strings

#### Stats String

In [None]:
def gen_stat_str(data: _TrainResult) -> str:
    """
    Generate stat string.
    """
    stats, keys, *_ = data
    # Only print the first time
    sorted_keys = sorted(keys.items(), key=itemgetter(1))
    return ", ".join(f"{k[0]}={v:.2e}" for (k, v) in zip(sorted_keys, stats[-1]))


#### Accuracy Gap String

In [None]:
def gen_acc_gap_str(data: _TrainResult) -> str:
    """
    Generate accuracy and gap string.
    """
    stat: _1DArray = data[0][-1]
    keys = data[1]
    acc = stat[keys["acc(valid)"]]
    gap = stat[keys["error(valid)"]] - stat[keys["error(train)"]]
    return f"Accuracy {acc:.2e}, Gap {gap:.2e}"


### Plot Figures

#### Plot Figure 2 & 3

In [None]:
def plot_fig_2_3(data: dict[int, _TrainResult], prefix: str) -> None:
    """
    Plot utility function for Fig. 2 and 3.
    """
    # Plot the change in the validation and training set error over training.
    fig_1 = plt.figure(figsize=(8, 4))
    ax_1 = fig_1.add_subplot(111)
    # Plot the change in the validation and training set accuracy over training.
    fig_2 = plt.figure(figsize=(8, 4))
    ax_2 = fig_2.add_subplot(111)

    for i, (t, linestyle) in enumerate({"(train)": "-", "(valid)": "--"}.items()):
        for w, datum in data.items():
            stats, keys, *_ = datum
            if not i:
                # Only print the first time
                print(f"{w}: {gen_stat_str(datum)}")
            ax_1.plot(
                np.arange(1, stats.shape[0]) * stats_interval,
                stats[1:, keys[f"error{t}"]],
                label=f"{prefix} {w}{t}",
                linestyle=linestyle,
            )
            ax_2.plot(
                np.arange(1, stats.shape[0]) * stats_interval,
                stats[1:, keys[f"acc{t}"]],
                label=f"{prefix} {w}{t}",
                linestyle=linestyle,
            )

    ax_1.legend(loc=0)
    ax_1.set_xlabel("Epoch number")
    ax_2.legend(loc=0)
    ax_2.set_xlabel("Epoch number")


#### Plot Figure 4 for Dropout

In [None]:
def plot_fig_4_dropout(data: dict[float, _TrainResult], learning_rate: float) -> None:
    """
    Plot utility function for Fig. 4 dropout.
    """
    fig = plt.figure(figsize=(8, 4))
    ax1 = fig.add_subplot(111)
    ax2 = ax1.twinx()

    accs: list[float] = []
    gaps: list[float] = []
    for dropout, datum in data.items():
        print(
            f"Dropout {dropout} with learning rate {learning_rate}: "
            f"{gen_acc_gap_str(datum)}"
        )
        stat: _1DArray = datum[0][-1]
        keys = datum[1]
        accs.append(stat[keys["acc(valid)"]])
        gaps.append(stat[keys["error(valid)"]] - stat[keys["error(train)"]])

    l1 = ax1.plot(data.keys(), accs, "r", label="Val. Acc.")
    l2 = ax2.plot(data.keys(), gaps, "b", label="Gap")
    lines = l1 + l2
    labels = [l.get_label() for l in lines]

    ax1.legend(lines, labels, loc=0)
    ax1.set_xlabel("Dropout value")
    ax1.set_ylabel("Accuracy")
    ax2.set_ylabel("Generalization gap")


#### Plot Figure 4 for Penalty

In [None]:
def plot_fig_4_penalty_l(
    data: dict[float, _TrainResult],
    type_: str,
    learning_rate: float,
    axes: tuple[AxesSubplot, AxesSubplot],
) -> tuple[list[Line2D], list[Line2D]]:
    """
    Plot utility function for Fig.4 penalty for one of L1/L2.
    """
    accs: list[float] = []
    gaps: list[float] = []
    for value, datum in data.items():
        print(
            f"{type_} {value} with learning rate {learning_rate}: "
            f"{gen_acc_gap_str(datum)}"
        )
        stat: _1DArray = datum[0][-1]
        keys = datum[1]
        accs.append(stat[keys["acc(valid)"]])
        gaps.append(stat[keys["error(valid)"]] - stat[keys["error(train)"]])

    ax1, ax2 = axes
    acc = ax1.plot(data.keys(), accs, label=f"{type_} Val. Acc.")
    gap = ax2.plot(data.keys(), gaps, label=f"{type_} Gap")
    return acc, gap


def plot_fig_4_penalty(
    data_l1: dict[float, _TrainResult],
    data_l2: dict[float, _TrainResult],
    learning_rate: float,
) -> None:
    """
    Plot utility function for Fig. 4 penalty.
    """
    fig = plt.figure(figsize=(8, 4))
    ax1 = fig.add_subplot(111)
    ax2 = ax1.twinx()

    l1_acc, l1_gap = plot_fig_4_penalty_l(data_l1, "L1", learning_rate, (ax1, ax2))
    l2_acc, l2_gap = plot_fig_4_penalty_l(data_l2, "L2", learning_rate, (ax1, ax2))

    lines = l1_acc + l1_gap + l2_acc + l2_gap
    labels = [l.get_label() for l in lines]

    ax1.legend(lines, labels, loc=0)
    ax1.set_xscale("log")
    ax1.set_xlabel("Penalty value")
    ax1.set_ylabel("Accuracy")
    ax2.set_ylabel("Generalization gap")


#### Plot Validation Data Stats

In [None]:
def plot_fig_valid(data: dict[float, _TrainResult], prefix: str) -> None:
    """
    Plot utility function accuracy and error stats for validation data.
    """
    # Plot the change in the validation and training set error over training.
    fig_1 = plt.figure(figsize=(8, 4))
    ax_1 = fig_1.add_subplot(111)
    # Plot the change in the validation and training set accuracy over training.
    fig_2 = plt.figure(figsize=(8, 4))
    ax_2 = fig_2.add_subplot(111)

    for w, datum in data.items():
        stats, keys, *_ = datum
        # Only print the first time
        print(f"{w}: {gen_stat_str(datum)}")
        ax_1.plot(
            np.arange(1, stats.shape[0]) * stats_interval,
            stats[1:, keys["error(valid)"]],
            label=f"{prefix} {w}(valid)",
        )
        ax_2.plot(
            np.arange(1, stats.shape[0]) * stats_interval,
            stats[1:, keys["acc(valid)"]],
            label=f"{prefix} {w}(valid)",
        )

    ax_1.legend(loc=0)
    ax_1.set_xlabel("Epoch number")
    ax_2.legend(loc=0)
    ax_2.set_xlabel("Epoch number")


## Run Models

### 0.1 Learning Rate

#### Varying Hidden Layer Dimensions

In [None]:
if data_hidden_dim is None:
    data_hidden_dim = {
        32: run_model(32, 1),
        64: run_model(64, 1),
        128: run_model(128, 1),
    }
    %store data_hidden_dim
for dim, data in data_hidden_dim.items():
    print(f"Dimension {dim} run time: {data[2]:.3f} seconds")
plot_fig_2_3(data_hidden_dim, "width")


#### Varying the Number of Hidden Layers

In [None]:
if data_hidden_layer is None:
    data_hidden_layer = {
        1: data_hidden_dim[128],
        2: run_model(128, 2),
        3: run_model(128, 3),
    }
    %store data_hidden_layer
for layer, data in data_hidden_layer.items():
    print(f"Layer {layer} run time: {data[2]:.3f} seconds")
plot_fig_2_3(data_hidden_layer, "depth")


#### Add Dropout

In [None]:
if data_dropout is None:
    data_dropout = {
        prob: run_model(128, 3, dropout=lambda: DropoutLayer(incl_prob=prob))
        for prob in [0.1, 0.3, 0.5, 0.7, 0.9]
    }
    %store data_dropout
for prob, data in data_dropout.items():
    print(
        f"Dropout probability {prob} with 1e-3 learning rate run time: "
        f"{data[2]:.3f} seconds"
    )
plot_fig_4_dropout(data_dropout, 1e-3)
plot_fig_valid(data_dropout, "dropout")


#### Add Penalties

In [None]:
if data_l1 is None:
    data_l1 = {
        pen: run_model(128, 3, penalty=lambda: L1Penalty(pen))
        for pen in [1e-5, 1e-4, 1e-3, 1e-2, 1e-1]
    }
    %store data_l1
for pen, data in data_l1.items():
    print(f"L1 penalty {pen} run time: {data[2]:.3f} seconds")

if data_l2 is None:
    data_l2 = {
        pen: run_model(128, 3, penalty=lambda: L2Penalty(pen))
        for pen in [1e-5, 1e-4, 1e-3, 1e-2, 1e-1]
    }
    %store data_l2
for pen, data in data_l2.items():
    print(f"L2 penalty {pen} run time: {data[2]:.3f} seconds")

plot_fig_4_penalty(data_l1, data_l2, 1e-3)
plot_fig_valid(data_l1, "l1")
plot_fig_valid(data_l2, "l2")


#### Custom Models

##### Custom Model 1

In [None]:
if data_combined_1 is None:
    data_combined_1 = run_model(
        128,
        3,
        dropout=lambda: DropoutLayer(incl_prob=0.5),
        penalty=lambda: L1Penalty(1e-4),
    )
    %store data_combined_1
data_combined_1_name = "Dropout 0.5 & L1 1e-4"
print(f"{data_combined_1_name}: Run time {data_combined_1[2]:.3f} seconds")
print(f"{data_combined_1_name}: {gen_acc_gap_str(data_combined_1)}")
print(f"{data_combined_1_name}: {gen_stat_str(data_combined_1)}")


##### Custom Model 2

In [None]:
if data_combined_2 is None:
    data_combined_2 = run_model(
        128,
        3,
        dropout=lambda: DropoutLayer(incl_prob=0.5),
        penalty=lambda: L2Penalty(1e-3),
    )
    %store data_combined_2
data_combined_2_name = "Dropout 0.5 & L2 1e-3"
print(f"{data_combined_2_name}: Run time {data_combined_2[2]:.3f} seconds")
print(f"{data_combined_2_name}: {gen_acc_gap_str(data_combined_2)}")
print(f"{data_combined_2_name}: {gen_stat_str(data_combined_2)}")


##### Custom Model 3

In [None]:
if data_combined_3 is None:
    data_combined_3 = run_model(
        128,
        3,
        dropout=lambda: DropoutLayer(incl_prob=0.7),
        penalty=lambda: L1Penalty(1e-4),
    )
    %store data_combined_3
data_combined_3_name = "Dropout 0.7 & L1 1e-4"
print(f"{data_combined_3_name}: Run time {data_combined_3[2]:.3f} seconds")
print(f"{data_combined_3_name}: {gen_acc_gap_str(data_combined_3)}")
print(f"{data_combined_3_name}: {gen_stat_str(data_combined_3)}")


##### Custom Model 4

In [None]:
if data_combined_4 is None:
    data_combined_4 = run_model(
        128,
        3,
        dropout=lambda: DropoutLayer(incl_prob=0.7),
        penalty=lambda: L2Penalty(1e-3),
    )
    %store data_combined_4
data_combined_4_name = "Dropout 0.7 & L2 1e-3"
print(f"{data_combined_4_name}: Run time {data_combined_4[2]:.3f} seconds")
print(f"{data_combined_4_name}: {gen_acc_gap_str(data_combined_4)}")
print(f"{data_combined_4_name}: {gen_stat_str(data_combined_4)}")


##### Custom Model 5

In [None]:
if data_combined_5 is None:
    data_combined_5 = run_model(
        128,
        3,
        dropout=lambda: DropoutLayer(incl_prob=0.5),
        penalty=lambda: L1Penalty(1e-5),
    )
    %store data_combined_5
data_combined_5_name = "Dropout 0.5 & L1 1e-5"
print(f"{data_combined_5_name}: Run time {data_combined_5[2]:.3f} seconds")
print(f"{data_combined_5_name}: {gen_acc_gap_str(data_combined_5)}")
print(f"{data_combined_5_name}: {gen_stat_str(data_combined_5)}")


##### Custom Model 6

In [None]:
if data_combined_6 is None:
    data_combined_6 = run_model(
        128,
        3,
        dropout=lambda: DropoutLayer(incl_prob=0.5),
        penalty=lambda: L2Penalty(1e-4),
    )
    %store data_combined_6
data_combined_6_name = "Dropout 0.5 & L2 1e-4"
print(f"{data_combined_6_name}: Run time {data_combined_6[2]:.3f} seconds")
print(f"{data_combined_6_name}: {gen_acc_gap_str(data_combined_6)}")
print(f"{data_combined_6_name}: {gen_stat_str(data_combined_6)}")


##### Custom Model 7

In [None]:
if data_combined_7 is None:
    data_combined_7 = run_model(
        128,
        3,
        dropout=lambda: DropoutLayer(incl_prob=0.7),
        penalty=lambda: L1Penalty(1e-5),
    )
    %store data_combined_7
data_combined_7_name = "Dropout 0.7 & L1 1e-5"
print(f"{data_combined_7_name}: Run time {data_combined_7[2]:.3f} seconds")
print(f"{data_combined_7_name}: {gen_acc_gap_str(data_combined_7)}")
print(f"{data_combined_7_name}: {gen_stat_str(data_combined_7)}")


##### Custom Model 8

In [None]:
if data_combined_8 is None:
    data_combined_8 = run_model(
        128,
        3,
        dropout=lambda: DropoutLayer(incl_prob=0.7),
        penalty=lambda: L2Penalty(1e-4),
    )
    %store data_combined_8
data_combined_8_name = "Dropout 0.5 & L2 1e-4"
print(f"{data_combined_8_name}: Run time {data_combined_8[2]:.3f} seconds")
print(f"{data_combined_8_name}: {gen_acc_gap_str(data_combined_8)}")
print(f"{data_combined_8_name}: {gen_stat_str(data_combined_8)}")


### 1e-4 Learning Rate

#### Add Dropout

In [None]:
if data_dropout_slow is None:
    data_dropout_slow = {
        prob: run_model(
            128, 3, learning_rate=1e-4, dropout=lambda: DropoutLayer(incl_prob=prob)
        )
        for prob in [0.7, 0.9, 0.95]
    }
    %store data_dropout_slow
for prob, data in data_dropout_slow.items():
    print(
        "Dropout probability {prob} with 1e-4 learning rate run time: "
        f"{data[2]:.3f} seconds"
    )
plot_fig_4_dropout(data_dropout_slow, 1e-4)
plot_fig_valid(data_dropout_slow, "dropout")


#### Add Penalties

In [None]:
if data_l1_slow is None:
    data_l1_slow = {
        pen: run_model(128, 3, learning_rate=1e-4, penalty=lambda: L1Penalty(pen))
        for pen in [1e-4, 1e-3, 1e-1]
    }
    %store data_l1_slow
for pen, data in data_l1_slow.items():
    print(f"L1 penalty {pen} with 1e-4 learning rate run time: {data[2]:.3f} seconds")

if data_l2_slow is None:
    data_l2_slow = {
        pen: run_model(128, 3, learning_rate=1e-4, penalty=lambda: L2Penalty(pen))
        for pen in [1e-4, 1e-3, 1e-1]
    }
    %store data_l2_slow
for pen, data in data_l2_slow.items():
    print(f"L2 penalty {pen} with 1e-4 learning rate run time: {data[2]:.3f} seconds")

plot_fig_4_penalty(data_l1_slow, data_l2_slow, 1e-4)
plot_fig_valid(data_l1_slow, "l1")
plot_fig_valid(data_l2_slow, "l2")


#### Custom Models

##### Custom Model 1

In [None]:
if data_combined_slow_1 is None:
    data_combined_slow_1 = run_model(
        128,
        3,
        learning_rate=1e-4,
        dropout=lambda: DropoutLayer(incl_prob=0.7),
        penalty=lambda: L1Penalty(1e-4),
    )
    %store data_combined_slow_1
data_combined_slow_1_name = "Dropout 0.7 & L1 1e-4 with 1e-4 learning rate"
print(f"{data_combined_slow_1_name}: Run time {data_combined_slow_1[2]:.3f} seconds")
print(f"{data_combined_slow_1_name}: {gen_acc_gap_str(data_combined_slow_1)}")
print(f"{data_combined_slow_1_name}: {gen_stat_str(data_combined_slow_1)}")


##### Custom Model 2

In [None]:
if data_combined_slow_2 is None:
    data_combined_slow_2 = run_model(
        128,
        3,
        learning_rate=1e-4,
        dropout=lambda: DropoutLayer(incl_prob=0.7),
        penalty=lambda: L1Penalty(1e-3),
    )
    %store data_combined_slow_2
data_combined_slow_2_name = "Dropout 0.7 & L1 1e-3 with 1e-4 learning rate"
print(f"{data_combined_slow_2_name}: Run time {data_combined_slow_2[2]:.3f} seconds")
print(f"{data_combined_slow_2_name}: {gen_acc_gap_str(data_combined_slow_2)}")
print(f"{data_combined_slow_2_name}: {gen_stat_str(data_combined_slow_2)}")


##### Custom Model 3

In [None]:
if data_combined_slow_3 is None:
    data_combined_slow_3 = run_model(
        128,
        3,
        learning_rate=1e-4,
        dropout=lambda: DropoutLayer(incl_prob=0.7),
        penalty=lambda: L2Penalty(1e-3),
    )
    %store data_combined_slow_3
data_combined_slow_3_name = "Dropout 0.7 & L2 1e-3 with 1e-4 learning rate"
print(f"{data_combined_slow_3_name}: Run time {data_combined_slow_3[2]:.3f} seconds")
print(f"{data_combined_slow_3_name}: {gen_acc_gap_str(data_combined_slow_3)}")
print(f"{data_combined_slow_3_name}: {gen_stat_str(data_combined_slow_3)}")


##### Custom Model 4

In [None]:
if data_combined_slow_4 is None:
    data_combined_slow_4 = run_model(
        128,
        3,
        learning_rate=1e-4,
        dropout=lambda: DropoutLayer(incl_prob=0.9),
        penalty=lambda: L1Penalty(1e-4),
    )
    %store data_combined_slow_4
data_combined_slow_4_name = "Dropout 0.9 & L1 1e-4 with 1e-4 learning rate"
print(f"{data_combined_slow_4_name}: Run time {data_combined_slow_4[2]:.3f} seconds")
print(f"{data_combined_slow_4_name}: {gen_acc_gap_str(data_combined_slow_4)}")
print(f"{data_combined_slow_4_name}: {gen_stat_str(data_combined_slow_4)}")


##### Custom Model 5

In [None]:
if data_combined_slow_5 is None:
    data_combined_slow_5 = run_model(
        128,
        3,
        learning_rate=1e-4,
        dropout=lambda: DropoutLayer(incl_prob=0.9),
        penalty=lambda: L1Penalty(1e-3),
    )
    %store data_combined_slow_5
data_combined_slow_5_name = "Dropout 0.9 & L1 1e-3 with 1e-4 learning rate"
print(f"{data_combined_slow_5_name}: Run time {data_combined_slow_5[2]:.3f} seconds")
print(f"{data_combined_slow_5_name}: {gen_acc_gap_str(data_combined_slow_5)}")
print(f"{data_combined_slow_5_name}: {gen_stat_str(data_combined_slow_5)}")


##### Custom Model 6

In [None]:
if data_combined_slow_6 is None:
    data_combined_slow_6 = run_model(
        128,
        3,
        learning_rate=1e-4,
        dropout=lambda: DropoutLayer(incl_prob=0.9),
        penalty=lambda: L2Penalty(1e-3),
    )
    %store data_combined_slow_6
data_combined_slow_6_name = "Dropout 0.9 & L2 1e-3 with 1e-4 learning rate"
print(f"{data_combined_slow_6_name}: Run time {data_combined_slow_6[2]:.3f} seconds")
print(f"{data_combined_slow_6_name}: {gen_acc_gap_str(data_combined_slow_6)}")
print(f"{data_combined_slow_6_name}: {gen_stat_str(data_combined_slow_6)}")
