In [1]:
import os
import numpy as np
import tensorflow as tf
from keras import models, layers

from loop import TrainingLoop
from batch_selection import windowed_batch_selector, sorting_batch_selector

In [2]:
# Loading the dataset from the files saved in the preprocessing notebook.
path = 'data/wine'
prefix = 'wine_'
X_train = np.load(os.path.join(path, prefix+'train_vectors.npy'))
y_train = np.load(os.path.join(path, prefix+'train_labels.npy'))
X_test  = np.load(os.path.join(path, prefix+'test_vectors.npy'))
y_test  = np.load(os.path.join(path, prefix+'test_labels.npy'))

In [3]:
def build_model():
    # Setting up the model.
    model = models.Sequential()
    model.add(layers.Dense(15, activation='relu', input_shape=(X_train.shape[1],)))
    model.add(layers.Dense(8, activation='relu'))
    model.add(layers.Dense(y_train.shape[1], activation='softmax'))
    return model

In [4]:
def train(model, X_train, y_train, batch_selection, epochs):
    log_dir = {windowed_batch_selector: 'windowed', sorting_batch_selector: 'sorting', None: 'original'}
    # Put the model in our custom training loop.
    TrainingLoop(
        model, 
        X_train, 
        y_train, 
        validation_split = 0.1,
        batch_size = 8,
        optimizer = tf.keras.optimizers.Adam(),
        loss_function = tf.keras.losses.CategoricalCrossentropy(),
        train_metrics = tf.keras.metrics.CategoricalAccuracy(),
        val_metrics = tf.keras.metrics.CategoricalAccuracy(),
        batch_selection = batch_selection,
        log_file = os.path.join('logs', log_dir[batch_selection], 'wine_quality.csv')
    ).train(epochs)  # Training the model.

In [5]:
# Set random seed so the comparison of different solutions won't be affected by it.
tf.random.set_seed(42)
np.random.seed(42)

# Train model with default batch selection.
model = build_model()
train(model, X_train, y_train, batch_selection=None, epochs=20)

# We still have to compile the model for the test evaluation.
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# After compiling we can run the evaluation.
model.evaluate(X_test, y_test)

Epoch 1/20	Loss: 1.2288	Metrics: 0.3083: 	Validation metrics: 0.5: 	100% | 15/15 [00:08<00:00,  1.77it/s]
Epoch 2/20	Loss: 1.0411	Metrics: 0.3333: 	Validation metrics: 0.5: 	100% | 15/15 [00:00<00:00, 20.49it/s]
Epoch 3/20	Loss: 0.9005	Metrics: 0.4000: 	Validation metrics: 0.625: 	100% | 15/15 [00:00<00:00, 33.71it/s]
Epoch 4/20	Loss: 0.7932	Metrics: 0.5416: 	Validation metrics: 0.625: 	100% | 15/15 [00:00<00:00, 21.19it/s]
Epoch 5/20	Loss: 0.6862	Metrics: 0.6000: 	Validation metrics: 0.75: 	100% | 15/15 [00:00<00:00, 18.29it/s]
Epoch 6/20	Loss: 0.5969	Metrics: 0.6333: 	Validation metrics: 0.75: 	100% | 15/15 [00:00<00:00, 33.63it/s]
Epoch 7/20	Loss: 0.5225	Metrics: 0.6499: 	Validation metrics: 0.875: 	100% | 15/15 [00:00<00:00, 27.73it/s]
Epoch 8/20	Loss: 0.4646	Metrics: 0.6750: 	Validation metrics: 0.875: 	100% | 15/15 [00:00<00:00, 25.73it/s]
Epoch 9/20	Loss: 0.4231	Metrics: 0.7166: 	Validation metrics: 1.0: 	100% | 15/15 [00:00<00:00, 33.26it/s]
Epoch 10/20	Loss: 0.3914	Metrics: 0.

[0.24152790009975433, 0.9722222089767456]

In [6]:
# Set random seed so the comparison of different solutions won't be affected by it.
tf.random.set_seed(42)
np.random.seed(42)

# Train model with windowed batch selection algorithm.
model = build_model()
train(model, X_train, y_train, batch_selection=windowed_batch_selector, epochs=20)

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.evaluate(X_test, y_test)

Epoch 1/20	Loss: 1.1952	Metrics: 0.3083: 	Validation metrics: 0.5: 	100% | 15/15 [00:03<00:00,  3.77it/s]
Epoch 2/20	Loss: 1.0161	Metrics: 0.3083: 	Validation metrics: 0.625: 	100% | 15/15 [00:00<00:00, 20.03it/s]
Epoch 3/20	Loss: 0.9030	Metrics: 0.3083: 	Validation metrics: 0.625: 	100% | 15/15 [00:00<00:00, 30.67it/s]
Epoch 4/20	Loss: 0.8162	Metrics: 0.375: 	Validation metrics: 0.75: 	100% | 15/15 [00:00<00:00, 37.60it/s]
Epoch 5/20	Loss: 0.7363	Metrics: 0.4583: 	Validation metrics: 0.875: 	100% | 15/15 [00:00<00:00, 46.15it/s]
Epoch 6/20	Loss: 0.6665	Metrics: 0.4833: 	Validation metrics: 1.0: 	100% | 15/15 [00:00<00:00, 48.23it/s]
Epoch 7/20	Loss: 0.6088	Metrics: 0.5416: 	Validation metrics: 1.0: 	100% | 15/15 [00:00<00:00, 49.02it/s]
Epoch 8/20	Loss: 0.5553	Metrics: 0.6166: 	Validation metrics: 1.0: 	100% | 15/15 [00:00<00:00, 39.27it/s]
Epoch 9/20	Loss: 0.5043	Metrics: 0.6916: 	Validation metrics: 1.0: 	100% | 15/15 [00:00<00:00, 36.50it/s]
Epoch 10/20	Loss: 0.4605	Metrics: 0.7749

[0.2500136196613312, 0.9722222089767456]

In [7]:
# Set random seed so the comparison of different solutions won't be affected by it.
tf.random.set_seed(42)
np.random.seed(42)

# Train model with sorting batch selection algorithm.
model = build_model()
train(model, X_train, y_train, batch_selection=sorting_batch_selector, epochs=20)

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.evaluate(X_test, y_test)

Epoch 1/50	Loss: 0.8694	Metrics: 0.3083: 	Validation metrics: 0.5: 	100% | 15/15 [00:05<00:00,  2.67it/s]
Epoch 2/50	Loss: 0.8017	Metrics: 0.3249: 	Validation metrics: 0.5: 	100% | 15/15 [00:00<00:00, 43.23it/s]
Epoch 3/50	Loss: 0.7428	Metrics: 0.4000: 	Validation metrics: 0.625: 	100% | 15/15 [00:00<00:00, 53.96it/s]
Epoch 4/50	Loss: 0.6960	Metrics: 0.5083: 	Validation metrics: 0.75: 	100% | 15/15 [00:00<00:00, 51.37it/s]
Epoch 5/50	Loss: 0.6533	Metrics: 0.5916: 	Validation metrics: 0.75: 	100% | 15/15 [00:00<00:00, 61.23it/s]
Epoch 6/50	Loss: 0.6109	Metrics: 0.6416: 	Validation metrics: 0.75: 	100% | 15/15 [00:00<00:00, 34.56it/s]
Epoch 7/50	Loss: 0.5723	Metrics: 0.6583: 	Validation metrics: 0.875: 	100% | 15/15 [00:00<00:00, 47.62it/s]
Epoch 8/50	Loss: 0.4817	Metrics: 0.6750: 	Validation metrics: 0.875: 	100% | 15/15 [00:00<00:00, 39.27it/s]
Epoch 9/50	Loss: 0.4356	Metrics: 0.7083: 	Validation metrics: 1.0: 	100% | 15/15 [00:00<00:00, 36.59it/s]
Epoch 10/50	Loss: 0.4018	Metrics: 0.7

[0.02011900767683983, 1.0]