## Audiobooks - Machine Learning Model

### 1. Load libraries and data 

In [1]:
# import libraries
import numpy as np
import tensorflow as tf

#load train data
npz = np.load('Audiobooks_data_train.npz')
train_inputs = npz['inputs'].astype(np.float)
train_targets = npz['targets'].astype(np.int)

#load validation data
npz = np.load('Audiobooks_data_validation.npz')
validation_inputs, validation_targets = npz['inputs'].astype(np.float), npz['targets'].astype(np.int)

#load test data
npz = np.load('Audiobooks_data_test.npz')
test_inputs, test_targets = npz['inputs'].astype(np.float), npz['targets'].astype(np.int)

### 2. First Model

Outline, optimizers, loss

In [None]:
input_size = 10
output_size = 2
hidden_layer_size = 50

model_1 = tf.keras.Sequential([
                            tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
                            tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
                            tf.keras.layers.Dense(output_size, activation='softmax')
                            ])

model_1.compile(optimizer='Adam',loss='sparse_categorical_crossentropy', metrics=['accuracy'])

batch_size = 100

max_epochs = 100

model_1.fit(train_inputs,
           train_targets,
           batch_size = batch_size,
           epochs = max_epochs,
           validation_data = (validation_inputs, validation_targets),
           verbose = 2)

Train on 3579 samples, validate on 447 samples
Epoch 1/100
3579/3579 - 1s - loss: 0.5230 - accuracy: 0.8449 - val_loss: 0.4137 - val_accuracy: 0.8501
Epoch 2/100
3579/3579 - 0s - loss: 0.3592 - accuracy: 0.8737 - val_loss: 0.3542 - val_accuracy: 0.8635
Epoch 3/100
3579/3579 - 0s - loss: 0.3155 - accuracy: 0.8838 - val_loss: 0.3285 - val_accuracy: 0.8770
Epoch 4/100
3579/3579 - 0s - loss: 0.2982 - accuracy: 0.8882 - val_loss: 0.3140 - val_accuracy: 0.8837
Epoch 5/100
3579/3579 - 0s - loss: 0.2835 - accuracy: 0.8938 - val_loss: 0.2903 - val_accuracy: 0.8904
Epoch 6/100
3579/3579 - 0s - loss: 0.2758 - accuracy: 0.8952 - val_loss: 0.2788 - val_accuracy: 0.8949
Epoch 7/100
3579/3579 - 0s - loss: 0.2656 - accuracy: 0.9011 - val_loss: 0.2701 - val_accuracy: 0.8971
Epoch 8/100
3579/3579 - 0s - loss: 0.2601 - accuracy: 0.9030 - val_loss: 0.2691 - val_accuracy: 0.8971
Epoch 9/100
3579/3579 - 0s - loss: 0.2544 - accuracy: 0.9050 - val_loss: 0.2577 - val_accuracy: 0.9038
Epoch 10/100
3579/3579 - 0

Epoch 80/100
3579/3579 - 0s - loss: 0.2115 - accuracy: 0.9201 - val_loss: 0.2537 - val_accuracy: 0.9083
Epoch 81/100
3579/3579 - 0s - loss: 0.2089 - accuracy: 0.9226 - val_loss: 0.2405 - val_accuracy: 0.9060
Epoch 82/100
3579/3579 - 0s - loss: 0.2083 - accuracy: 0.9206 - val_loss: 0.2418 - val_accuracy: 0.9016
Epoch 83/100
3579/3579 - 0s - loss: 0.2087 - accuracy: 0.9232 - val_loss: 0.2454 - val_accuracy: 0.9060
Epoch 84/100
3579/3579 - 0s - loss: 0.2073 - accuracy: 0.9232 - val_loss: 0.2432 - val_accuracy: 0.9038
Epoch 85/100
3579/3579 - 0s - loss: 0.2102 - accuracy: 0.9204 - val_loss: 0.2477 - val_accuracy: 0.9060
Epoch 86/100
3579/3579 - 0s - loss: 0.2107 - accuracy: 0.9215 - val_loss: 0.2487 - val_accuracy: 0.9038
Epoch 87/100
3579/3579 - 0s - loss: 0.2064 - accuracy: 0.9223 - val_loss: 0.2434 - val_accuracy: 0.9038
Epoch 88/100
3579/3579 - 0s - loss: 0.2077 - accuracy: 0.9218 - val_loss: 0.2496 - val_accuracy: 0.9083
Epoch 89/100
3579/3579 - 0s - loss: 0.2079 - accuracy: 0.9220 - 

Notice how the training loss keeps decreasing, while the validation loss fluctuates. This model overfits. 

### 3. Second Model with early stopping

Outline, optimizers, loss, early stopping and training

In [None]:
input_size = 10
output_size = 2
hidden_layer_size = 50

model_2 = tf.keras.Sequential([
                            tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
                            tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
                            tf.keras.layers.Dense(output_size, activation='softmax')
                            ])

model_2.compile(optimizer='Adam',loss='sparse_categorical_crossentropy', metrics=['accuracy'])

batch_size = 100

max_epochs = 100

early_stopping = tf.keras.callbacks.EarlyStopping(patience=2)

model_2.fit(train_inputs,
           train_targets,
           batch_size = batch_size,
           epochs = max_epochs,
           callbacks = [early_stopping],
           validation_data = (validation_inputs, validation_targets),
           verbose = 2)

### 3. Third Model

Outline, optimizers, loss, early stopping and training

In [None]:
input_size = 10
output_size = 2
hidden_layer_size = 200

model_3 = tf.keras.Sequential([
                            tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
                            tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
                            tf.keras.layers.Dense(output_size, activation='softmax')
                            ])

model_3.compile(optimizer='Adam',loss='sparse_categorical_crossentropy', metrics=['accuracy'])

batch_size = 50

max_epochs = 100

early_stopping = tf.keras.callbacks.EarlyStopping(patience=2)

model_3.fit(train_inputs,
           train_targets,
           batch_size = batch_size,
           epochs = max_epochs,
           callbacks = [early_stopping],
           validation_data = (validation_inputs, validation_targets),
           verbose = 2)

### 5. Test the model

In [None]:
test_loss_1,test_accuracy_1 = model_1.evaluate(test_inputs,test_targets,verbose=2)
print('Model 1. Test loss: {0:.2f}. Test accuracy: {1:.2f}%'.format(test_loss_1,test_accuracy_1*100.))

In [None]:
test_loss_2,test_accuracy_2 = model_2.evaluate(test_inputs,test_targets, verbose=2)
print('Model 2. Test loss: {0:.2f}. Test accuracy: {1:.2f}%'.format(test_loss_2,test_accuracy_2*100.))

In [None]:
test_loss_3,test_accuracy_3 = model_3.evaluate(test_inputs,test_targets, verbose=2)
print('Model 3. Test loss: {0:.2f}. Test accuracy: {1:.2f}%'.format(test_loss_3,test_accuracy_3*100.))