This notebook contains experiments for:

* Loss functions
* Learning rate decay
* Optimizers
* Weight initialization

# `dlfs_kuoste` imports

In [1]:
import numpy as np

# from lincoln import activations
# from lincoln import layers
# from lincoln import losses
# from lincoln import optimizers
# from lincoln import network
# from lincoln import train
# from lincoln.utils import mnist

from dlfs_kuoste import operations
from dlfs_kuoste import layers
from dlfs_kuoste import losses
from dlfs_kuoste import optimizers
from dlfs_kuoste import helpers
from dlfs_kuoste import NeuralNetwork
from dlfs_kuoste import Trainer

RANDOM_SEED = 190119

In [2]:
X_train, y_train, X_test, y_test = helpers.mnist_load()

In [3]:
num_labels = len(y_train)
num_labels

60000

In [4]:
# one-hot encode
num_labels = len(y_train)
train_labels = np.zeros((num_labels, 10))
for i in range(num_labels):
    train_labels[i][y_train[i]] = 1

num_labels = len(y_test)
test_labels = np.zeros((num_labels, 10))
for i in range(num_labels):
    test_labels[i][y_test[i]] = 1

# MNIST Demos

# Scale data to mean 0, variance 1

In [5]:
X_train, X_test = X_train - np.mean(X_train), X_test - np.mean(X_train)

In [6]:
np.min(X_train), np.max(X_train), np.min(X_test), np.max(X_test)

(np.float64(-33.318421449829934),
 np.float64(221.68157855017006),
 np.float64(-33.318421449829934),
 np.float64(221.68157855017006))

In [7]:
X_train, X_test = X_train / np.std(X_train), X_test / np.std(X_train)

In [8]:
np.min(X_train), np.max(X_train), np.min(X_test), np.max(X_test)

(np.float64(-0.424073894391566),
 np.float64(2.821543345689335),
 np.float64(-0.424073894391566),
 np.float64(2.821543345689335))

In [9]:
def calc_accuracy_model(model, test_set):
    return print(
        '''The model validation accuracy is: {0:.2f}%'''.format(
            np.equal(np.argmax(model.forward(test_set), axis=1), y_test).sum()
            * 100.0
            / test_set.shape[0]
        )
    )

## Softmax cross entropy

### Trying sigmoid activation

In [10]:
model = NeuralNetwork(
    layers=[
        layers.Dense(neurons=89, activation=operations.Sigmoid()),
        layers.Dense(neurons=10, activation=operations.Sigmoid()),
    ],
    loss=losses.MeanSquaredError(normalize=False),
    seed=RANDOM_SEED,
)

trainer = Trainer(model, optimizers.Sgd(0.1))
trainer.fit(
    X_train,
    train_labels,
    X_test,
    test_labels,
    epochs=50,
    eval_every=5,
    seed=RANDOM_SEED,
    batch_size=60,
)

calc_accuracy_model(model, X_test)

Validation loss after 5 epochs is 0.836
Validation loss after 10 epochs is 0.718
Validation loss after 15 epochs is 0.659
Validation loss after 20 epochs is 0.638
Validation loss after 25 epochs is 0.627
Validation loss after 30 epochs is 0.619
Validation loss after 35 epochs is 0.558
Validation loss after 40 epochs is 0.506
Validation loss after 45 epochs is 0.499
Validation loss after 50 epochs is 0.495
The model validation accuracy is: 57.16%


Note: even if we normalize the outputs of a classification model with mean squared error loss, it still doesn't help:

In [11]:
model = NeuralNetwork(
    layers=[
        layers.Dense(neurons=89, activation=operations.Sigmoid()),
        layers.Dense(neurons=10, activation=operations.Sigmoid()),
    ],
    loss=losses.MeanSquaredError(normalize=True),
    seed=RANDOM_SEED,
)

trainer = Trainer(model, optimizers.Sgd(0.1))
trainer.fit(
    X_train,
    train_labels,
    X_test,
    test_labels,
    epochs=50,
    eval_every=5,
    seed=RANDOM_SEED,
    batch_size=60,
)

calc_accuracy_model(model, X_test)

Validation loss after 5 epochs is 0.573

Loss increased after epoch 10, final loss was 0.573, 
using the model from epoch 5
The model validation accuracy is: 62.54%


The reason is that we should be using softmax cross entropy loss!

In [12]:
model = NeuralNetwork(
    layers=[
        layers.Dense(neurons=89, activation=operations.Sigmoid()),
        layers.Dense(neurons=10, activation=operations.Linear()),
    ],
    loss=losses.SoftmaxCrossEntropy(),
    seed=RANDOM_SEED,
)

trainer = Trainer(model, optimizers.Sgd(0.1))
trainer.fit(
    X_train,
    train_labels,
    X_test,
    test_labels,
    epochs=50,
    eval_every=5,
    seed=RANDOM_SEED,
    batch_size=60,
)
print()
calc_accuracy_model(model, X_test)

Validation loss after 5 epochs is 0.719
Validation loss after 10 epochs is 0.611
Validation loss after 15 epochs is 0.559
Validation loss after 20 epochs is 0.530
Validation loss after 25 epochs is 0.505
Validation loss after 30 epochs is 0.488
Validation loss after 35 epochs is 0.475
Validation loss after 40 epochs is 0.467
Validation loss after 45 epochs is 0.459
Validation loss after 50 epochs is 0.453

The model validation accuracy is: 92.61%


## SGD Momentum

In [10]:
model = NeuralNetwork(
    layers=[
        layers.Dense(neurons=89, activation=operations.Sigmoid()),
        layers.Dense(neurons=10, activation=operations.Linear()),
    ],
    loss=losses.SoftmaxCrossEntropy(),
    seed=RANDOM_SEED,
)

optim = optimizers.SgdMomentum(0.1, momentum=0.9)

trainer = Trainer(model, optim)
trainer.fit(
    X_train,
    train_labels,
    X_test,
    test_labels,
    epochs=50,
    eval_every=5,
    seed=RANDOM_SEED,
    batch_size=60,
)

calc_accuracy_model(model, X_test)

Validation loss after 5 epochs is 0.412
Validation loss after 10 epochs is 0.348

Loss increased after epoch 15, final loss was 0.348, 
using the model from epoch 10
The model validation accuracy is: 94.26%


## Different weight decay

In [11]:
model = NeuralNetwork(
    layers=[
        layers.Dense(neurons=89, activation=operations.Sigmoid()),
        layers.Dense(neurons=10, activation=operations.Linear()),
    ],
    loss=losses.SoftmaxCrossEntropy(),
    seed=RANDOM_SEED,
)

optimizer = optimizers.SgdMomentum(0.15, momentum=0.9, final_lr=0.05, decay_type=optimizers.DecayType.LINEAR)

trainer = Trainer(model, optimizer)
trainer.fit(
    X_train,
    train_labels,
    X_test,
    test_labels,
    epochs=25,
    eval_every=5,
    seed=RANDOM_SEED,
    batch_size=60,
)

calc_accuracy_model(model, X_test)

Validation loss after 5 epochs is 0.375
Validation loss after 10 epochs is 0.333

Loss increased after epoch 15, final loss was 0.333, 
using the model from epoch 10
The model validation accuracy is: 94.96%


In [12]:
model = NeuralNetwork(
    layers=[
        layers.Dense(neurons=89, activation=operations.Sigmoid()),
        layers.Dense(neurons=10, activation=operations.Linear()),
    ],
    loss=losses.SoftmaxCrossEntropy(),
    seed=RANDOM_SEED,
)

optimizer = optimizers.SgdMomentum(0.2, momentum=0.9, final_lr=0.05, decay_type=optimizers.DecayType.EXPONENTIAL)   

trainer = Trainer(model, optimizer)
trainer.fit(
    X_train,
    train_labels,
    X_test,
    test_labels,
    epochs=25,
    eval_every=5,
    seed=RANDOM_SEED,
    batch_size=60,
)

calc_accuracy_model(model, X_test)

Validation loss after 5 epochs is 0.376
Validation loss after 10 epochs is 0.301

Loss increased after epoch 15, final loss was 0.301, 
using the model from epoch 10
The model validation accuracy is: 95.29%


## Changing weight init

In [None]:
model = NeuralNetwork(
    layers=[
        layers.Dense(neurons=89, activation=operations.Sigmoid(), weight_init="glorot"),
        layers.Dense(neurons=10, activation=operations.Linear(), weight_init="glorot"),
    ],
    loss=losses.SoftmaxCrossEntropy(),
    seed=RANDOM_SEED,
)

optimizer = optimizers.SgdMomentum(0.2, momentum=0.9, final_lr=0.05, decay_type='exponential')

trainer = Trainer(model, optimizer)
trainer.fit(
    X_train,
    train_labels,
    X_test,
    test_labels,
    epochs=25,
    eval_every=5,
    seed=RANDOM_SEED,
    batch_size=60,
)

calc_accuracy_model(model, X_test)

Validation loss after 5 epochs is 0.169
Validation loss after 10 epochs is 0.160

Loss increased after epoch 15, final loss was 0.160, 
using the model from epoch 10
The model validation accuracy is: 97.46%
