## Importing relevant libraries

In [159]:
import numpy as np
from sklearn import preprocessing
import tensorflow as tf
import warnings
warnings.filterwarnings('ignore', category=UserWarning)

In [149]:
path = r"Audiobooks_data.csv"
raw_data = np.loadtxt(path, delimiter=',')
raw_data.shape

(14084, 12)

# Preprocessing

In [17]:
unscaled_inputs = raw_data[:, 1:-1]
targets = raw_data[:, -1]
unscaled_inputs

array([[2.160e+03, 2.160e+03, 1.013e+01, ..., 0.000e+00, 0.000e+00,
        0.000e+00],
       [1.404e+03, 2.808e+03, 6.660e+00, ..., 0.000e+00, 0.000e+00,
        1.820e+02],
       [3.240e+02, 3.240e+02, 1.013e+01, ..., 0.000e+00, 1.000e+00,
        3.340e+02],
       ...,
       [1.080e+03, 1.080e+03, 6.550e+00, ..., 0.000e+00, 0.000e+00,
        2.900e+01],
       [2.160e+03, 2.160e+03, 6.140e+00, ..., 0.000e+00, 0.000e+00,
        0.000e+00],
       [1.620e+03, 1.620e+03, 5.330e+00, ..., 0.000e+00, 0.000e+00,
        9.000e+01]])

In [39]:
tot_ones = int(targets.sum())
tot_ones

2237

In [27]:
zero_count = 0
indices_remove = []
for i in range(len(targets)):
    if targets[i] == 0:
        zero_count += 1
        if zero_count > tot_ones:
            indices_remove.append(i)

In [53]:
balanced_input = np.delete(unscaled_inputs, indices_remove, 0)
targets = np.delete(targets, indices_remove, 0)

In [61]:
inputs = preprocessing.scale(balanced_input)

In [67]:
shuffle_indices = np.arange(len(inputs))
np.random.shuffle(shuffle_indices)
inputs = inputs[shuffle_indices]
targets = targets[shuffle_indices]

In [75]:
count = len(inputs)
train_count = int(0.8 * count)
valid_count = int(0.1 * count)
test_count = count - train_count - valid_count

In [93]:
train_inputs = inputs[:train_count]
train_targets = targets[:train_count]

valid_inputs = inputs[train_count : train_count + valid_count]
valid_targets = targets[train_count : train_count + valid_count]

test_inputs = inputs[train_count + valid_count : ]
test_targets = targets[train_count + valid_count : ]

In [95]:
np.savez('Audiobooks_data_train', inputs=train_inputs, targets=train_targets)
np.savez('Audiobooks_data_validation', inputs=valid_inputs, targets=valid_targets)
np.savez('Audiobooks_data_test', inputs=test_inputs, targets=test_targets)

In [103]:
npz = np.load('Audiobooks_data_train.npz')

train_inputs = npz['inputs'].astype(float)
train_targets = npz['targets'].astype(int)

npz = np.load('Audiobooks_data_validation.npz')
validation_inputs, validation_targets = npz['inputs'].astype(float), npz['targets'].astype(int)

npz = np.load('Audiobooks_data_test.npz')
test_inputs, test_targets = npz['inputs'].astype(float), npz['targets'].astype(int)

# Building model

In [123]:
input_size = 10
output_size = 2
hidden_size = 200

model = tf.keras.Sequential([
    tf.keras.layers.Dense(hidden_size, activation= 'relu'),
    tf.keras.layers.Dense(hidden_size, activation= 'tanh'),
    tf.keras.layers.Dense(hidden_size, activation= 'relu'),
    tf.keras.layers.Dense(output_size, activation= 'softmax'),
])

model.compile(optimizer='adam', loss = 'sparse_categorical_crossentropy', metrics= ['accuracy'])

In [125]:
early_stop = tf.keras.callbacks.EarlyStopping(patience = 2, restore_best_weights=True)
model.fit(train_inputs,
          train_targets,
          batch_size=100,
          epochs=100,
          validation_data=(validation_inputs, validation_targets),
          callbacks=[early_stop],
          verbose=2)

Epoch 1/100
36/36 - 3s - 70ms/step - accuracy: 0.8642 - loss: 0.3654 - val_accuracy: 0.9016 - val_loss: 0.2661
Epoch 2/100
36/36 - 0s - 13ms/step - accuracy: 0.8969 - loss: 0.2760 - val_accuracy: 0.9128 - val_loss: 0.2421
Epoch 3/100
36/36 - 0s - 7ms/step - accuracy: 0.8908 - loss: 0.2694 - val_accuracy: 0.9060 - val_loss: 0.2667
Epoch 4/100
36/36 - 0s - 10ms/step - accuracy: 0.9008 - loss: 0.2538 - val_accuracy: 0.9083 - val_loss: 0.2375
Epoch 5/100
36/36 - 0s - 8ms/step - accuracy: 0.9058 - loss: 0.2506 - val_accuracy: 0.9150 - val_loss: 0.2356
Epoch 6/100
36/36 - 0s - 7ms/step - accuracy: 0.9098 - loss: 0.2443 - val_accuracy: 0.9172 - val_loss: 0.2443
Epoch 7/100
36/36 - 0s - 7ms/step - accuracy: 0.9058 - loss: 0.2456 - val_accuracy: 0.9172 - val_loss: 0.2479


<keras.src.callbacks.history.History at 0x1bd69422c30>

## Saving & Testing the Model

In [157]:
model.save('audiobook_prediction_model.keras')

In [163]:
load_model = tf.keras.models.load_model('audiobook_prediction_model.keras')
predictions = load_model.predict(test_inputs)
predicted_classes = np.argmax(predictions, axis=1)

for i, (prediction, actual) in enumerate(zip(predicted_classes, test_targets)):
    status_prediction = "Will Buy Again!" if prediction == 1 else "Wont't Buy Again"
    status_actual = "Will Buy Again!!" if actual == 1 else "Won't Buy Again"
 
    if prediction == actual:
        result = "Correct!"
    else:
        result = "Wrong"
 
    print(f"Customer {i+1}: Prediction = {status_prediction}, Actual = {status_actual} -> {result}")
print('\n')
test_loss, test_accuracy = model.evaluate(test_inputs, test_targets)
print('\nTest loss: {0:.2f}. Test accuracy: {1:.2f}%'.format(test_loss, test_accuracy*100.))

[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
Customer 1: Prediction = Wont't Buy Again, Actual = Won't Buy Again -> Correct!
Customer 2: Prediction = Wont't Buy Again, Actual = Won't Buy Again -> Correct!
Customer 3: Prediction = Will Buy Again!, Actual = Will Buy Again!! -> Correct!
Customer 4: Prediction = Wont't Buy Again, Actual = Won't Buy Again -> Correct!
Customer 5: Prediction = Will Buy Again!, Actual = Won't Buy Again -> Wrong
Customer 6: Prediction = Will Buy Again!, Actual = Will Buy Again!! -> Correct!
Customer 7: Prediction = Will Buy Again!, Actual = Will Buy Again!! -> Correct!
Customer 8: Prediction = Will Buy Again!, Actual = Will Buy Again!! -> Correct!
Customer 9: Prediction = Wont't Buy Again, Actual = Won't Buy Again -> Correct!
Customer 10: Prediction = Will Buy Again!, Actual = Will Buy Again!! -> Correct!
Customer 11: Prediction = Wont't Buy Again, Actual = Won't Buy Again -> Correct!
Customer 12: Prediction = Wont't Buy Again, Actu