In [2]:
from loop import TrainingLoop
import os
import numpy as np
import tensorflow as tf

# These lines will make the gpu not give errors.
gpus= tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(gpus[0], True)

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential

# Set random seed so the comparison of different solutions won't be affected by it.
tf.random.set_seed(42)
np.random.seed(42)


In [None]:
@tf.function
def calc_loss(x_train, y_train):
    with tf.GradientTape() as tape:
        logits = model(x_train, training=False)
        loss_value = keras.losses.CategoricalCrossentropy(from_logits=True)(y_train, logits)
    return loss_value


def batch_selector(data, idx):
    largest_loss = 0
    largest_loss_idx = idx

    if idx < len(data) - length:
        for i in range(idx, idx+length):
            x_batch_train = data[i][0]
            y_batch_train = data[i][1]
            loss = calc_loss(x_batch_train, y_batch_train)
            if loss > largest_loss:
                largest_loss = loss
                largest_loss_idx = i
        return largest_loss_idx
    else:
        loss = calc_loss(data[idx][0], data[idx][1])
        return idx

In [3]:
length = 75
log_path = 'logs/original/mnist.csv'


# Function to load dataset from file. This is needed so we can easily load the two datasets without copy pasteing.
def load_data( name ):
    X_train = np.load(os.path.join('data', name, name + '_train_vectors.npy'))
    X_test = np.load(os.path.join('data', name, name + '_test_vectors.npy'))
    Y_train = np.load(os.path.join('data', name, name + '_train_labels.npy'))
    Y_test = np.load(os.path.join('data', name, name + '_test_labels.npy'))

    # The images need to have shape (28, 28, 1), we didn't take care of this in preprocessing.
    X_train = np.expand_dims(X_train, -1)
    X_test = np.expand_dims(X_test, -1)

    return X_train, Y_train, X_test, Y_test

# The same model is used for both datasets so it is more convenient to make them in a funtion.
def make_model(X_train, Y_train):

    # This is a simple convolutional neural network. It isn't the best possible network for MNIST
    # but the point here is to test how much batch selection methods will speed up a CNN, not the CNN itself.
    model = Sequential()
    model.add(layers.Input(shape = (28, 28, 1,)))
    model.add(layers.Conv2D(64, kernel_size = (3, 3), activation = "relu"))
    model.add(layers.MaxPooling2D( pool_size = (2, 2)))
    model.add(layers.Conv2D(64, kernel_size = (3, 3), activation = "relu"))
    model.add(layers.MaxPooling2D(pool_size = (2, 2)))
    model.add(layers.Flatten())
    model.add(layers.Dense(10, activation = "softmax"))
    
    # Put the model in our custom training loop.
    training = TrainingLoop(
        model = model,
        X = X_train,
        y = Y_train,
        optimizer = keras.optimizers.Adam(),
        loss_function = keras.losses.CategoricalCrossentropy(from_logits=True),
        batch_size = 64,
        train_metrics = tf.keras.metrics.CategoricalAccuracy(),
        val_metrics = tf.keras.metrics.CategoricalAccuracy(),
        validation_split = 0.2,
        batch_selection = batch_selector,
        length=length
    )

    # We still have to compile the model for the test evaluation.
    model.compile(loss = "categorical_crossentropy", optimizer = "adam", metrics=["accuracy"], log_file=log_path)

    return model, training



In [4]:
# Load and train the MNIST dataset.
X_train, Y_train, X_test, Y_test = load_data( "mnist" )
model, training = make_model( X_train, Y_train )

training.train(epochs = 20)

TypeError: __init__() got an unexpected keyword argument 'length'

In [19]:
# Evaluate the MNIST model.
model.evaluate( X_test, Y_test )



[0.08575702458620071, 0.9886000156402588]

In [22]:
# Load and train the Fashion-MNIST dataset.
X_train_f, Y_train_f, X_test_f, Y_test_f = load_data( "fashion_mnist" )
model_f, training_f = make_model( X_train_f, Y_train_f )

training_f.train(epochs = 20)

Epoch 1/20	Loss: 1.7991	Metrics: 0.7199: 	Validation metrics: 0.8159: 	100% | 750/750 [00:03<00:00, 212.10it/s]
Epoch 2/20	Loss: 1.7035	Metrics: 0.8386: 	Validation metrics: 0.8521: 	100% | 750/750 [00:03<00:00, 236.91it/s]
Epoch 3/20	Loss: 1.6982	Metrics: 0.8594: 	Validation metrics: 0.8611: 	100% | 750/750 [00:03<00:00, 241.20it/s]
Epoch 4/20	Loss: 1.6852	Metrics: 0.8693: 	Validation metrics: 0.8684: 	100% | 750/750 [00:03<00:00, 248.74it/s]
Epoch 5/20	Loss: 1.6428	Metrics: 0.8760: 	Validation metrics: 0.8741: 	100% | 750/750 [00:02<00:00, 259.69it/s]
Epoch 6/20	Loss: 1.6612	Metrics: 0.8815: 	Validation metrics: 0.8759: 	100% | 750/750 [00:02<00:00, 266.28it/s]
Epoch 7/20	Loss: 1.6398	Metrics: 0.8863: 	Validation metrics: 0.8815: 	100% | 750/750 [00:02<00:00, 270.67it/s]
Epoch 8/20	Loss: 1.6129	Metrics: 0.8909: 	Validation metrics: 0.8857: 	100% | 750/750 [00:02<00:00, 266.67it/s]
Epoch 9/20	Loss: 1.6226	Metrics: 0.8959: 	Validation metrics: 0.8871: 	100% | 750/750 [00:02<00:00, 268.

In [28]:
# Evaluate the Fashion-MNIST dataset.
model_f.evaluate( X_test_f, Y_test_f )



[1.0972380638122559, 0.8967999815940857]