In [2]:
from loop import TrainingLoop
import tensorflow as tf

length = 10

@tf.function
def calc_loss(x_train, y_train):
    with tf.GradientTape() as tape:
        logits = model(x_train, training=False)
        loss_value = loss_function(y_train, logits)
    return loss_value


def batch_selector(data, idx):
    largest_loss = 0
    largest_loss_idx = idx

    if idx < len(data) - length:
        for i in range(idx, idx+length):
            x_batch_train = data[i][0]
            y_batch_train = data[i][1]
            loss = calc_loss(x_batch_train, y_batch_train)
            if loss > largest_loss:
                largest_loss = loss
                largest_loss_idx = i
        return largest_loss_idx
    else:
        loss = calc_loss(data[idx][0], data[idx][1])
        return idx

In [3]:
import os
import numpy as np

# Function to load dataset from file. This is needed so we can easily load the two datasets without copy pasteing.
def load_data( name ):
    X_train = np.load(os.path.join('data', name, name + '_train_vectors.npy'))
    X_test = np.load(os.path.join('data', name, name + '_test_vectors.npy'))
    Y_train = np.load(os.path.join('data', name, name + '_train_labels.npy'))
    Y_test = np.load(os.path.join('data', name, name + '_test_labels.npy'))

    # The images need to have shape (28, 28, 1), we didn't take care of this in preprocessing.
    X_train = np.expand_dims(X_train, -1)
    X_test = np.expand_dims(X_test, -1)

    return X_train, Y_train, X_test, Y_test

# The same model is used for both datasets so it is more convenient to make them in a funtion.
def make_model(X_train, Y_train):

    # This is a simple convolutional neural network. It isn't the best possible network for MNIST
    # but the point here is to test how much batch selection methods will speed up a CNN, not the CNN itself.
    model = Sequential()
    model.add(layers.Input(shape = (28, 28, 1,)))
    model.add(layers.Conv2D(64, kernel_size = (3, 3), activation = "relu"))
    model.add(layers.MaxPooling2D( pool_size = (2, 2)))
    model.add(layers.Conv2D(64, kernel_size = (3, 3), activation = "relu"))
    model.add(layers.MaxPooling2D(pool_size = (2, 2)))
    model.add(layers.Flatten())
    model.add(layers.Dense(10, activation = "softmax"))
    
    # Put the model in our custom training loop.
    training = TrainingLoop(
        model = model,
        X = X_train,
        y = Y_train,
        optimizer = keras.optimizers.Adam(),
        loss_function = keras.losses.CategoricalCrossentropy(from_logits=True),
        batch_size = 64,
        train_metrics = tf.keras.metrics.CategoricalAccuracy(),
        val_metrics = tf.keras.metrics.CategoricalAccuracy(),
        validation_split = 0.2,
        batch_selection = batch_selector,
    )

    # We still have to compile the model for the test evaluation.
    model.compile(loss = "categorical_crossentropy", optimizer = "adam", metrics=["accuracy"])

    return model, training


In [4]:
from loop import TrainingLoop
import tensorflow as tf

# These lines will make the gpu not give errors.
gpus= tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(gpus[0], True)

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential

# Set random seed so the comparison of different solutions won't be affected by it.
tf.random.set_seed(42)
np.random.seed(42)


In [6]:
# Load and train the MNIST dataset.
X_train, Y_train, X_test, Y_test = load_data( "mnist" )
model, training = make_model( X_train, Y_train )

loss_function = training.LossFunction

training.train(epochs = 20)

Epoch 1/20	Loss: 1.5052	Metrics: 0.8748: 	Validation metrics: 0.9642: 	100% | 750/750 [00:09<00:00, 82.65it/s] 
Epoch 2/20	Loss: 1.4952	Metrics: 0.9712: 	Validation metrics: 0.9777: 	100% | 750/750 [00:03<00:00, 222.78it/s]
Epoch 3/20	Loss: 1.4818	Metrics: 0.9780: 	Validation metrics: 0.9822: 	100% | 750/750 [00:03<00:00, 216.87it/s]
Epoch 4/20	Loss: 1.4821	Metrics: 0.9828: 	Validation metrics: 0.9826: 	100% | 750/750 [00:03<00:00, 213.20it/s]
Epoch 5/20	Loss: 1.4797	Metrics: 0.9852: 	Validation metrics: 0.9811: 	100% | 750/750 [00:03<00:00, 218.22it/s]
Epoch 6/20	Loss: 1.4629	Metrics: 0.9869: 	Validation metrics: 0.9835: 	100% | 750/750 [00:03<00:00, 215.39it/s]
Epoch 7/20	Loss: 1.4672	Metrics: 0.9882: 	Validation metrics: 0.9848: 	100% | 750/750 [00:03<00:00, 226.16it/s]
Epoch 8/20	Loss: 1.4625	Metrics: 0.9892: 	Validation metrics: 0.9843: 	100% | 750/750 [00:03<00:00, 217.44it/s]
Epoch 9/20	Loss: 1.4679	Metrics: 0.9898: 	Validation metrics: 0.9818: 	100% | 750/750 [00:03<00:00, 209.

In [7]:
# Evaluate the MNIST model.
model.evaluate( X_test, Y_test )



[0.0780782550573349, 0.9886000156402588]