In [1]:
import numpy as np
import tensorflow as tf

In [2]:
npz = np.load('Audiobooks_data_train.npz')

train_inputs = npz['inputs'].astype(np.float)
train_targets = npz['targets'].astype(np.int)
# we expect all inputs to be floats so we specify 'np.ndarray.astype()' to create a copy of the array, cast to a specific type

npz = np.load('Audiobooks_data_validation.npz')
validation_inputs, validation_targets = npz['inputs'].astype(np.float), npz['targets'].astype(np.int)

npz = np.load('Audiobooks_data_test.npz')
test_inputs, test_targets = npz['inputs'].astype(np.float), npz['targets'].astype(np.int)

### Model
##### Outline optimizers, loss, early stopping and training

In [3]:
input_size = 10
# 10 predictors

output_size = 2
# 2 outputs (0s and 1s)

hidden_layer_size = 50

model = tf.keras.Sequential([
                            tf.keras.layers.Dense(hidden_layer_size, activation = 'relu'),
                            tf.keras.layers.Dense(hidden_layer_size, activation = 'relu'),
                            tf.keras.layers.Dense(output_size, activation = 'softmax'),
                            ])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
# 'sparse categorical crossentropy applies one-hot encoding to the targets'

batch_size = 100
max_epochs = 100

early_stopping = tf.keras.callbacks.EarlyStopping()
# this object will monitor the validation loss and stop the training process the first time the validation loss starts increasing
# (patience) in the early stopping mechanism lets us decide how many consecutive increases we can tolerate
# now we can be certain when the model starts to overfit (as a single increase may be due to chance)

model.fit(train_inputs,
         train_targets,
         batch_size = batch_size,
         epochs = max_epochs,
         callbacks=[early_stopping],
         validation_data = (validation_inputs, validation_targets),
         verbose=2)
# we could feed a 2-tuple object, or 2 simple arrays

Epoch 1/100
36/36 - 2s - loss: 0.5274 - accuracy: 0.7175 - val_loss: 0.4609 - val_accuracy: 0.7494
Epoch 2/100
36/36 - 0s - loss: 0.4360 - accuracy: 0.7723 - val_loss: 0.4174 - val_accuracy: 0.7919
Epoch 3/100
36/36 - 0s - loss: 0.4026 - accuracy: 0.7846 - val_loss: 0.3939 - val_accuracy: 0.8143
Epoch 4/100
36/36 - 0s - loss: 0.3816 - accuracy: 0.7927 - val_loss: 0.3772 - val_accuracy: 0.8143
Epoch 5/100
36/36 - 0s - loss: 0.3713 - accuracy: 0.8013 - val_loss: 0.3649 - val_accuracy: 0.8009
Epoch 6/100
36/36 - 0s - loss: 0.3597 - accuracy: 0.8094 - val_loss: 0.3659 - val_accuracy: 0.8143


<tensorflow.python.keras.callbacks.History at 0x161c0656c40>

##### if we're given 10 customers and audiobook activity, we will be able to identify future customer bevahiour of around 8 of them
###### as our validation accuracy is around 80%
###### We have leveraged AI to reach a business insight!

### Test the Model

In [4]:
test_loss, test_accuracy = model.evaluate(test_inputs, test_targets)
#model.evaluate() returns the loss value and metrics values for the model in 'test mode'



In [5]:
# lets format it to make it appear neater and easier to read
print('Test loss: {0:.2f}. Test accuracy: {1:.2f}%'.format(test_loss, test_accuracy*100.))

Test loss: 0.35. Test accuracy: 79.91%


In [6]:
# Test accuracy is close to the validation accuracy as we didn't fiddle with the hyperparameters too much