In [1]:
import numpy as np
import tensorflow as tf

In [2]:
### Load the .npz data and convert to tensors ###

npz_data = np.load('data/audiobooks_data_split.npz')

X_train = tf.convert_to_tensor(npz_data['X_train'], dtype=tf.float64)
X_val = tf.convert_to_tensor(npz_data['X_val'], dtype=tf.float64)
X_test = tf.convert_to_tensor(npz_data['X_test'], dtype=tf.float64)
y_train = tf.convert_to_tensor(npz_data['y_train'], dtype=tf.int8)
y_val = tf.convert_to_tensor(npz_data['y_val'], dtype=tf.int8)
y_test = tf.convert_to_tensor(npz_data['y_test'], dtype=tf.int8)

print(f'{X_train.shape=}, {X_val.shape=}, {X_test.shape=}')
print(f'{y_train.shape=}, {y_val.shape=}, {y_test.shape=}')

X_train.shape=TensorShape([3579, 10]), X_val.shape=TensorShape([447, 10]), X_test.shape=TensorShape([448, 10])
y_train.shape=TensorShape([3579]), y_val.shape=TensorShape([447]), y_test.shape=TensorShape([448])


In [3]:
### Train the baseline model ###

MAX_EPOCHS = 100

input_size = 10
output_size = 2
hidden_layer_size = 64
batch_size = 128
activation='relu'

model = tf.keras.Sequential(
    [
        tf.keras.layers.Dense(hidden_layer_size, activation=activation),
        tf.keras.layers.Dense(hidden_layer_size, activation=activation),
        tf.keras.layers.Dense(output_size, activation='softmax')
    ]
)

# Note: Using sparse_categorical_crossentropy (instead of binary_crossentropy), since the former applies one-hot encoding to the targets
model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True,
)

history = model.fit(
    X_train,
    y_train,
    epochs=MAX_EPOCHS,
    batch_size=batch_size,
    validation_data=(X_val, y_val),
    verbose=2,
    callbacks=[early_stopping]
)

# Testing baseline model (only to see how much better the model is after hyperparameter tuning)
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print('-----------------------------------------------------------')
print('Note: Below metrics on test dataset are only used here to see how much better the hyperparameter-tuned model performs.')
print(f'Test loss: {test_loss:.4f}. Test accuracy: {test_accuracy*100:.2f}%.')


Epoch 1/100
28/28 - 1s - 34ms/step - accuracy: 0.6818 - loss: 0.6165 - val_accuracy: 0.6980 - val_loss: 0.5666
Epoch 2/100
28/28 - 0s - 2ms/step - accuracy: 0.7605 - loss: 0.4984 - val_accuracy: 0.7226 - val_loss: 0.4954
Epoch 3/100
28/28 - 0s - 2ms/step - accuracy: 0.7762 - loss: 0.4443 - val_accuracy: 0.7383 - val_loss: 0.4645
Epoch 4/100
28/28 - 0s - 2ms/step - accuracy: 0.7865 - loss: 0.4186 - val_accuracy: 0.7494 - val_loss: 0.4497
Epoch 5/100
28/28 - 0s - 2ms/step - accuracy: 0.7910 - loss: 0.4030 - val_accuracy: 0.7562 - val_loss: 0.4398
Epoch 6/100
28/28 - 0s - 2ms/step - accuracy: 0.8002 - loss: 0.3883 - val_accuracy: 0.7606 - val_loss: 0.4240
Epoch 7/100
28/28 - 0s - 3ms/step - accuracy: 0.8072 - loss: 0.3783 - val_accuracy: 0.7673 - val_loss: 0.4369
Epoch 8/100
28/28 - 0s - 2ms/step - accuracy: 0.8033 - loss: 0.3738 - val_accuracy: 0.7629 - val_loss: 0.4165
Epoch 9/100
28/28 - 0s - 2ms/step - accuracy: 0.8108 - loss: 0.3654 - val_accuracy: 0.7740 - val_loss: 0.4092
Epoch 10/