## Audiobooks - Machine Learning Model

### 1. Load libraries and data 

In [1]:
# import libraries
import numpy as np
import tensorflow as tf

#load train data
npz = np.load('Audiobooks_data_train.npz')
train_inputs = npz['inputs'].astype(np.float)
train_targets = npz['targets'].astype(np.int)

#load validation data
npz = np.load('Audiobooks_data_validation.npz')
validation_inputs, validation_targets = npz['inputs'].astype(np.float), npz['targets'].astype(np.int)

#load test data
npz = np.load('Audiobooks_data_test.npz')
test_inputs, test_targets = npz['inputs'].astype(np.float), npz['targets'].astype(np.int)

### 2. First Model

Outline, optimizers, loss

In [2]:
input_size = 10
output_size = 2
hidden_layer_size = 50

model_1 = tf.keras.Sequential([
                            tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
                            tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
                            tf.keras.layers.Dense(output_size, activation='softmax')
                            ])

model_1.compile(optimizer='Adam',loss='sparse_categorical_crossentropy', metrics=['accuracy'])

batch_size = 100

max_epochs = 100

model_1.fit(train_inputs,
           train_targets,
           batch_size = batch_size,
           epochs = max_epochs,
           validation_data = (validation_inputs, validation_targets),
           verbose = 2)

Train on 3579 samples, validate on 447 samples
Epoch 1/100
3579/3579 - 1s - loss: 0.5424 - accuracy: 0.7611 - val_loss: 0.4210 - val_accuracy: 0.8568
Epoch 2/100
3579/3579 - 0s - loss: 0.3591 - accuracy: 0.8776 - val_loss: 0.3456 - val_accuracy: 0.8725
Epoch 3/100
3579/3579 - 0s - loss: 0.3109 - accuracy: 0.8857 - val_loss: 0.3213 - val_accuracy: 0.8814
Epoch 4/100
3579/3579 - 0s - loss: 0.2934 - accuracy: 0.8913 - val_loss: 0.3032 - val_accuracy: 0.8814
Epoch 5/100
3579/3579 - 0s - loss: 0.2813 - accuracy: 0.8952 - val_loss: 0.2924 - val_accuracy: 0.8926
Epoch 6/100
3579/3579 - 0s - loss: 0.2720 - accuracy: 0.8997 - val_loss: 0.2874 - val_accuracy: 0.8926
Epoch 7/100
3579/3579 - 0s - loss: 0.2652 - accuracy: 0.9005 - val_loss: 0.2806 - val_accuracy: 0.8926
Epoch 8/100
3579/3579 - 0s - loss: 0.2593 - accuracy: 0.9025 - val_loss: 0.2700 - val_accuracy: 0.9016
Epoch 9/100
3579/3579 - 0s - loss: 0.2550 - accuracy: 0.9058 - val_loss: 0.2783 - val_accuracy: 0.9016
Epoch 10/100
3579/3579 - 0

Epoch 80/100
3579/3579 - 0s - loss: 0.2087 - accuracy: 0.9223 - val_loss: 0.2506 - val_accuracy: 0.9060
Epoch 81/100
3579/3579 - 0s - loss: 0.2085 - accuracy: 0.9232 - val_loss: 0.2794 - val_accuracy: 0.8949
Epoch 82/100
3579/3579 - 0s - loss: 0.2121 - accuracy: 0.9223 - val_loss: 0.2520 - val_accuracy: 0.9016
Epoch 83/100
3579/3579 - 0s - loss: 0.2122 - accuracy: 0.9195 - val_loss: 0.2499 - val_accuracy: 0.9016
Epoch 84/100
3579/3579 - 0s - loss: 0.2070 - accuracy: 0.9223 - val_loss: 0.2460 - val_accuracy: 0.8993
Epoch 85/100
3579/3579 - 0s - loss: 0.2081 - accuracy: 0.9218 - val_loss: 0.2531 - val_accuracy: 0.9038
Epoch 86/100
3579/3579 - 0s - loss: 0.2064 - accuracy: 0.9234 - val_loss: 0.2504 - val_accuracy: 0.9038
Epoch 87/100
3579/3579 - 0s - loss: 0.2089 - accuracy: 0.9193 - val_loss: 0.2498 - val_accuracy: 0.9060
Epoch 88/100
3579/3579 - 0s - loss: 0.2059 - accuracy: 0.9237 - val_loss: 0.2575 - val_accuracy: 0.9016
Epoch 89/100
3579/3579 - 0s - loss: 0.2099 - accuracy: 0.9223 - 

<tensorflow.python.keras.callbacks.History at 0x1cde0d5c608>

Notice how the training loss keeps decreasing, while the validation loss fluctuates. This model overfits. 

### 3. Second Model with early stopping

Outline, optimizers, loss, early stopping and training

In [3]:
input_size = 10
output_size = 2
hidden_layer_size = 50

model_2 = tf.keras.Sequential([
                            tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
                            tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
                            tf.keras.layers.Dense(output_size, activation='softmax')
                            ])

model_2.compile(optimizer='Adam',loss='sparse_categorical_crossentropy', metrics=['accuracy'])

batch_size = 100

max_epochs = 100

early_stopping = tf.keras.callbacks.EarlyStopping(patience=2)

model_2.fit(train_inputs,
           train_targets,
           batch_size = batch_size,
           epochs = max_epochs,
           callbacks = [early_stopping],
           validation_data = (validation_inputs, validation_targets),
           verbose = 2)

Train on 3579 samples, validate on 447 samples
Epoch 1/100
3579/3579 - 1s - loss: 0.6226 - accuracy: 0.6943 - val_loss: 0.4703 - val_accuracy: 0.8434
Epoch 2/100
3579/3579 - 0s - loss: 0.3932 - accuracy: 0.8690 - val_loss: 0.3561 - val_accuracy: 0.8658
Epoch 3/100
3579/3579 - 0s - loss: 0.3282 - accuracy: 0.8801 - val_loss: 0.3245 - val_accuracy: 0.8725
Epoch 4/100
3579/3579 - 0s - loss: 0.3032 - accuracy: 0.8888 - val_loss: 0.3037 - val_accuracy: 0.8837
Epoch 5/100
3579/3579 - 0s - loss: 0.2876 - accuracy: 0.8941 - val_loss: 0.2972 - val_accuracy: 0.8859
Epoch 6/100
3579/3579 - 0s - loss: 0.2785 - accuracy: 0.8947 - val_loss: 0.2789 - val_accuracy: 0.8926
Epoch 7/100
3579/3579 - 0s - loss: 0.2708 - accuracy: 0.8975 - val_loss: 0.2742 - val_accuracy: 0.8904
Epoch 8/100
3579/3579 - 0s - loss: 0.2665 - accuracy: 0.8994 - val_loss: 0.2741 - val_accuracy: 0.8949
Epoch 9/100
3579/3579 - 0s - loss: 0.2604 - accuracy: 0.9005 - val_loss: 0.2742 - val_accuracy: 0.8971
Epoch 10/100
3579/3579 - 0

<tensorflow.python.keras.callbacks.History at 0x1cde252d808>

### 3. Third Model

Outline, optimizers, loss, early stopping and training

In [4]:
input_size = 10
output_size = 2
hidden_layer_size = 200

model_3 = tf.keras.Sequential([
                            tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
                            tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
                            tf.keras.layers.Dense(output_size, activation='softmax')
                            ])

model_3.compile(optimizer='Adam',loss='sparse_categorical_crossentropy', metrics=['accuracy'])

batch_size = 50

max_epochs = 100

early_stopping = tf.keras.callbacks.EarlyStopping(patience=2)

model_3.fit(train_inputs,
           train_targets,
           batch_size = batch_size,
           epochs = max_epochs,
           callbacks = [early_stopping],
           validation_data = (validation_inputs, validation_targets),
           verbose = 2)

Train on 3579 samples, validate on 447 samples
Epoch 1/100
3579/3579 - 1s - loss: 0.3596 - accuracy: 0.8678 - val_loss: 0.2920 - val_accuracy: 0.8904
Epoch 2/100
3579/3579 - 0s - loss: 0.2767 - accuracy: 0.8975 - val_loss: 0.2727 - val_accuracy: 0.8926
Epoch 3/100
3579/3579 - 0s - loss: 0.2597 - accuracy: 0.9030 - val_loss: 0.2550 - val_accuracy: 0.9038
Epoch 4/100
3579/3579 - 0s - loss: 0.2519 - accuracy: 0.9067 - val_loss: 0.2543 - val_accuracy: 0.9016
Epoch 5/100
3579/3579 - 0s - loss: 0.2470 - accuracy: 0.9070 - val_loss: 0.2439 - val_accuracy: 0.8993
Epoch 6/100
3579/3579 - 0s - loss: 0.2428 - accuracy: 0.9128 - val_loss: 0.2492 - val_accuracy: 0.8971
Epoch 7/100
3579/3579 - 0s - loss: 0.2372 - accuracy: 0.9120 - val_loss: 0.2791 - val_accuracy: 0.9016


<tensorflow.python.keras.callbacks.History at 0x1cddfe1dc48>

### 5. Test the model

In [5]:
test_loss_1,test_accuracy_1 = model_1.evaluate(test_inputs,test_targets,verbose=2)
print('Model 1. Test loss: {0:.2f}. Test accuracy: {1:.2f}%'.format(test_loss_1,test_accuracy_1*100.))

448/1 - 0s - loss: 0.2281 - accuracy: 0.9152
Model 1. Test loss: 0.23. Test accuracy: 91.52%


In [6]:
test_loss_2,test_accuracy_2 = model_2.evaluate(test_inputs,test_targets, verbose=2)
print('Model 2. Test loss: {0:.2f}. Test accuracy: {1:.2f}%'.format(test_loss_2,test_accuracy_2*100.))

448/1 - 0s - loss: 0.2552 - accuracy: 0.9040
Model 2. Test loss: 0.25. Test accuracy: 90.40%


In [7]:
test_loss_3,test_accuracy_3 = model_3.evaluate(test_inputs,test_targets, verbose=2)
print('Model 3. Test loss: {0:.2f}. Test accuracy: {1:.2f}%'.format(test_loss_3,test_accuracy_3*100.))

448/1 - 0s - loss: 0.2531 - accuracy: 0.9040
Model 3. Test loss: 0.26. Test accuracy: 90.40%
