In [1]:
import numpy as np
import pickle
import tensorflow as tf

# Creatin a machine learning algorithm

### Data

In [2]:
npz = np.load('audiobooks_data_train.npz')

train_inputs = npz['inputs'].astype(np.float64)
train_targets = npz['targets'].astype(np.int64)

npz = np.load('audiobooks_data_validation.npz')
validation_inputs, validation_targets = npz['inputs'].astype(np.float64), npz['targets'].astype(np.int64)

npz = np.load('audiobooks_data_test.npz')
test_inputs, test_targets = npz['inputs'].astype(np.float64), npz['targets'].astype(np.int64)

### Model

In [8]:
# input_size =10 è automaticamente identificato. rappresenta la dimensionalità dei dati
output_size = 2
hidden_layer_size = 50

model = tf.keras.Sequential([
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(output_size, activation = 'softmax')
])

model.compile(optimizer = 'adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

batch_size = 100

max_epochs = 100

early_stopping = tf.keras.callbacks.EarlyStopping(patience = 5)

model.fit(train_inputs,
          train_targets,
          batch_size = batch_size,
          epochs = max_epochs,
          callbacks=[early_stopping],
          validation_data=(validation_inputs, validation_targets),
          verbose=2
)

Epoch 1/100
36/36 - 1s - loss: 0.6501 - accuracy: 0.6133 - val_loss: 0.5162 - val_accuracy: 0.8277 - 697ms/epoch - 19ms/step
Epoch 2/100
36/36 - 0s - loss: 0.4286 - accuracy: 0.8586 - val_loss: 0.3502 - val_accuracy: 0.8747 - 56ms/epoch - 2ms/step
Epoch 3/100
36/36 - 0s - loss: 0.3306 - accuracy: 0.8829 - val_loss: 0.3099 - val_accuracy: 0.8859 - 56ms/epoch - 2ms/step
Epoch 4/100
36/36 - 0s - loss: 0.3002 - accuracy: 0.8921 - val_loss: 0.2919 - val_accuracy: 0.8881 - 57ms/epoch - 2ms/step
Epoch 5/100
36/36 - 0s - loss: 0.2834 - accuracy: 0.8955 - val_loss: 0.2771 - val_accuracy: 0.8993 - 56ms/epoch - 2ms/step
Epoch 6/100
36/36 - 0s - loss: 0.2729 - accuracy: 0.8983 - val_loss: 0.2704 - val_accuracy: 0.9016 - 57ms/epoch - 2ms/step
Epoch 7/100
36/36 - 0s - loss: 0.2641 - accuracy: 0.9014 - val_loss: 0.2735 - val_accuracy: 0.8971 - 59ms/epoch - 2ms/step
Epoch 8/100
36/36 - 0s - loss: 0.2590 - accuracy: 0.9016 - val_loss: 0.2616 - val_accuracy: 0.9038 - 58ms/epoch - 2ms/step
Epoch 9/100
36

<keras.callbacks.History at 0x19b11a37c70>

### Test the model

E' fondamentale usare il test come prova finale delle capacità predittive del modello. Aggiustare il modello dopo il test causerà senz'altro overfitting

In [9]:
test_loss, test_accuracy = model.evaluate(test_inputs, test_targets)



In [13]:
print('\nTest loss: {0:.2f}. Test accuracy: {1:.2f}%'.format(test_loss, test_accuracy*100.))


Test loss: 0.27. Test accuracy: 89.73%


### Obtain the probability for a customer to convert

In [15]:
model.predict(test_inputs)[:,1].round(0)



array([0., 1., 1., 1., 0., 0., 1., 1., 0., 0., 0., 0., 1., 0., 0., 1., 1.,
       1., 0., 1., 1., 0., 1., 1., 1., 1., 1., 0., 0., 1., 0., 1., 1., 0.,
       1., 0., 1., 0., 1., 1., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0., 1.,
       1., 0., 0., 0., 1., 0., 1., 1., 1., 0., 1., 1., 0., 0., 1., 1., 1.,
       0., 0., 1., 1., 0., 0., 1., 0., 0., 0., 0., 1., 1., 1., 1., 0., 0.,
       0., 0., 0., 0., 1., 1., 1., 0., 0., 0., 1., 1., 0., 1., 1., 0., 0.,
       1., 1., 0., 0., 1., 0., 1., 1., 1., 0., 0., 1., 1., 1., 1., 0., 0.,
       1., 0., 0., 0., 0., 0., 0., 1., 0., 1., 0., 1., 0., 1., 0., 1., 1.,
       0., 0., 1., 1., 0., 0., 0., 1., 1., 0., 0., 1., 0., 0., 0., 0., 1.,
       0., 0., 0., 1., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 1., 0., 1.,
       0., 0., 0., 1., 1., 0., 0., 0., 0., 1., 0., 1., 1., 0., 0., 1., 1.,
       0., 1., 1., 0., 1., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 1., 0.,
       0., 0., 1., 0., 1., 1., 0., 1., 1., 0., 0., 0., 0., 0., 0., 1., 0.,
       1., 0., 0., 1., 1.

Se volessi il massimo tra le probabilità di classificare in una certa categoria il singolo record:

In [17]:
np.argmax(model.predict(test_inputs), axis=1)



array([0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0,
       1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1,
       0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1,
       1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0,
       0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1,
       1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0,
       1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0,
       0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0,
       0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0,
       0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1,
       0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1,
       0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0,
       1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1,
       1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0,

### Saving the model

pickle non è molto adotto quando si trattano diverse versioni dei modelli. Il metodo built-in di tf va bene

In [18]:
model.save('audiobooks_model.h5')