In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import tensorflow as tf

from tensorflow.keras.models import load_model
from sklearn.preprocessing import StandardScaler

In [2]:
raw_csv_data = np.loadtxt('Audiobooks_data.csv', delimiter = ',')
unscaled_inputs_all = raw_csv_data[:,1:-1]
targets_all = raw_csv_data[:,-1]

In [3]:
num_one_targets = int(np.sum(targets_all))
zero_targets_counter = 0
indices_to_remove = []

for i in range(targets_all.shape[0]):
    if targets_all[i] == 0:
        zero_targets_counter += 1
        if zero_targets_counter > num_one_targets:
            indices_to_remove.append(i)

unscaled_inputs_equal_priors = np.delete(unscaled_inputs_all, indices_to_remove, axis= 0)
targets_equal_priors = np.delete(targets_all, indices_to_remove, axis = 0)

### Standardise the inputs

In [4]:
scaler_deep_learning = StandardScaler()
scaled_inputs = scaler_deep_learning.fit_transform(unscaled_inputs_equal_priors)

### Shuffling the data

In [5]:
shuffled_indices = np.arange(scaled_inputs.shape[0])
np.random.shuffle(shuffled_indices)

shuffled_inputs = scaled_inputs[shuffled_indices]
shuffled_targets = targets_equal_priors[shuffled_indices]

### Split the dataset into train, validation and test sets

In [6]:
samples_count = shuffled_inputs.shape[0]

train_samples_count = int(0.8 * samples_count)
validation_samples_count = int(0.1 * samples_count)
test_samples_count = samples_count - train_samples_count - validation_samples_count

train_inputs = shuffled_inputs[:train_samples_count]
train_targets = shuffled_targets[:train_samples_count]

validation_inputs = shuffled_inputs[train_samples_count: train_samples_count + validation_samples_count]
validation_targets = shuffled_targets[train_samples_count: train_samples_count + validation_samples_count]

test_inputs = shuffled_inputs[train_samples_count + validation_samples_count:]
test_targets = shuffled_targets[train_samples_count + validation_samples_count:]

print(np.sum(train_targets), train_samples_count, np.sum(train_targets) / train_samples_count)
print(np.sum(validation_targets), validation_samples_count, np.sum(validation_targets) / validation_samples_count)
print(np.sum(test_targets), test_samples_count, np.sum(test_targets) / test_samples_count)

1757.0 3579 0.49091925118748253
244.0 447 0.5458612975391499
236.0 448 0.5267857142857143


### Save the three datasets in .npz

In [7]:
np.savez('Audiobooks_data_train', inputs = train_inputs, targets = train_targets)
np.savez('Audiobooks_data_validation', inputs = validation_inputs, targets = validation_targets)
np.savez('Audiobooks_data_test', inputs = test_inputs, targets = test_targets)

### Save the Scaler

In [8]:
pickle.dump(scaler_deep_learning, open('scaler_deep_learning.pickle', 'wb'))

---

# Model

In [9]:
#activation = 'sigmoid' & 'tanh'
#input_size = 10
output_size = 2
hidden_layer_size = 50

model = tf.keras.Sequential([
                            tf.keras.layers.Dense(hidden_layer_size, activation ='relu'),
                            tf.keras.layers.Dense(hidden_layer_size, activation ='relu'),
                            tf.keras.layers.Dense(output_size, activation = 'softmax')
                            ])

model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])

batch_size = 100

max_epochs = 100

early_stopping = tf.keras.callbacks.EarlyStopping(patience = 2)

model.fit(train_inputs,
         train_targets,
         batch_size = batch_size,
         epochs = max_epochs,
         callbacks = [early_stopping],
         validation_data = (validation_inputs, validation_targets),
         verbose = 2
         )

Epoch 1/100
36/36 - 0s - loss: 0.5205 - accuracy: 0.8276 - val_loss: 0.4051 - val_accuracy: 0.8814
Epoch 2/100
36/36 - 0s - loss: 0.3643 - accuracy: 0.8757 - val_loss: 0.3201 - val_accuracy: 0.8926
Epoch 3/100
36/36 - 0s - loss: 0.3193 - accuracy: 0.8843 - val_loss: 0.2938 - val_accuracy: 0.8971
Epoch 4/100
36/36 - 0s - loss: 0.2999 - accuracy: 0.8905 - val_loss: 0.2820 - val_accuracy: 0.8971
Epoch 5/100
36/36 - 0s - loss: 0.2866 - accuracy: 0.8938 - val_loss: 0.2702 - val_accuracy: 0.9016
Epoch 6/100
36/36 - 0s - loss: 0.2757 - accuracy: 0.8969 - val_loss: 0.2606 - val_accuracy: 0.9060
Epoch 7/100
36/36 - 0s - loss: 0.2668 - accuracy: 0.8986 - val_loss: 0.2529 - val_accuracy: 0.9060
Epoch 8/100
36/36 - 0s - loss: 0.2611 - accuracy: 0.9011 - val_loss: 0.2532 - val_accuracy: 0.9060
Epoch 9/100
36/36 - 0s - loss: 0.2562 - accuracy: 0.9036 - val_loss: 0.2578 - val_accuracy: 0.9060


<tensorflow.python.keras.callbacks.History at 0x27e04e94ee0>

## Test the model

In [10]:
test_loss, test_accuracy = model.evaluate(test_inputs, test_targets)



In [11]:
print('Test loss: {0:.2f}. Test accuracy: {1:.2f}%'.format(test_loss, test_accuracy*100.))

Test loss: 0.28. Test accuracy: 89.06%


## Obtaining probability for a customer to convert

In [12]:
model.predict(test_inputs).round(2)

array([[0.23, 0.77],
       [0.96, 0.04],
       [0.93, 0.07],
       [0.98, 0.02],
       [0.81, 0.19],
       [0.49, 0.51],
       [0.  , 1.  ],
       [0.05, 0.95],
       [0.55, 0.45],
       [0.16, 0.84],
       [0.  , 1.  ],
       [0.  , 1.  ],
       [0.95, 0.05],
       [0.22, 0.78],
       [0.13, 0.87],
       [0.  , 1.  ],
       [0.22, 0.78],
       [0.23, 0.77],
       [1.  , 0.  ],
       [0.96, 0.04],
       [0.58, 0.42],
       [0.  , 1.  ],
       [0.01, 0.99],
       [0.36, 0.64],
       [0.36, 0.64],
       [0.  , 1.  ],
       [0.02, 0.98],
       [0.3 , 0.7 ],
       [0.19, 0.81],
       [0.87, 0.13],
       [1.  , 0.  ],
       [0.09, 0.91],
       [0.15, 0.85],
       [0.22, 0.78],
       [0.32, 0.68],
       [1.  , 0.  ],
       [1.  , 0.  ],
       [0.93, 0.07],
       [0.4 , 0.6 ],
       [0.23, 0.77],
       [0.  , 1.  ],
       [0.96, 0.04],
       [0.01, 0.99],
       [0.15, 0.85],
       [0.99, 0.01],
       [0.12, 0.88],
       [0.91, 0.09],
       [1.  ,

In [13]:
model.predict(test_inputs)[:,1].round(0)

array([1., 0., 0., 0., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1., 1., 1.,
       1., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 1., 1., 1.,
       1., 0., 0., 0., 1., 1., 1., 0., 1., 1., 0., 1., 0., 0., 0., 1., 0.,
       1., 1., 0., 0., 0., 1., 0., 1., 0., 0., 1., 1., 0., 1., 0., 1., 0.,
       1., 1., 0., 1., 1., 1., 0., 0., 1., 1., 0., 1., 0., 1., 1., 1., 1.,
       0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 1., 1.,
       0., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 1., 0., 0., 1., 1., 1.,
       1., 0., 1., 1., 1., 0., 0., 1., 0., 0., 0., 1., 1., 0., 1., 0., 0.,
       0., 1., 1., 1., 0., 0., 0., 1., 1., 1., 0., 1., 0., 0., 0., 1., 1.,
       0., 0., 0., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 1.,
       0., 1., 1., 0., 1., 1., 1., 1., 0., 0., 1., 0., 1., 0., 0., 0., 1.,
       0., 0., 1., 1., 0., 1., 1., 1., 1., 1., 0., 1., 1., 1., 1., 1., 1.,
       0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 1., 0., 0.,
       1., 1., 1., 0., 0.

In [14]:
#For multiclass problems
pm =np.argmax(model.predict(test_inputs), axis = 1)
pm

array([1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1,
       0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0,
       1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0,
       1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1,
       0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0,
       0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1,
       1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0,
       1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0,
       0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1,
       0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1,
       1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1,

In [15]:
np.unique(pm, return_counts = True)

(array([0, 1], dtype=int64), array([213, 235], dtype=int64))

## Save the model

In [16]:
model.save('audiobooks_model.h5')