In [1]:
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras

# Load the data
data = pd.read_csv('Audiobooks_data.csv')

# Rename the columns for clarity
data.columns = [
    'Customer ID',
    'Book length overall',
    'Book length avg',
    'Price paid overall',
    'Price paid avg',
    'Review',
    'Review out of 10',
    'Total minutes listened',
    'Completion',
    'Support requests',
    'Last visited minus purchase date',
    'Target'
]

# Drop the Customer ID (not useful for training)
inputs_all = data.iloc[:, 1:-1].values
targets_all = data.iloc[:, -1].values

# Balance the dataset: equal number of 0s and 1s
num_one_targets = int(np.sum(targets_all))
zero_targets_counter = 0
indices_to_remove = []

for i in range(targets_all.shape[0]):
    if targets_all[i] == 0:
        zero_targets_counter += 1
        if zero_targets_counter > num_one_targets:
            indices_to_remove.append(i)

inputs_balanced = np.delete(inputs_all, indices_to_remove, axis=0)
targets_balanced = np.delete(targets_all, indices_to_remove, axis=0)

# Scale the inputs
scaled_inputs = preprocessing.scale(inputs_balanced)

# Shuffle the data
shuffled_indices = np.arange(scaled_inputs.shape[0])
np.random.shuffle(shuffled_indices)

inputs_shuffled = scaled_inputs[shuffled_indices]
targets_shuffled = targets_balanced[shuffled_indices]

# Split into train (80%), validation (10%), and test (10%)
train_inputs, temp_inputs, train_targets, temp_targets = train_test_split(
    inputs_shuffled, targets_shuffled, test_size=0.2, random_state=42)

validation_inputs, test_inputs, validation_targets, test_targets = train_test_split(
    temp_inputs, temp_targets, test_size=0.5, random_state=42)

# Build the model
model = keras.Sequential([
    keras.layers.Dense(64, activation='relu', input_shape=(train_inputs.shape[1],)),
    keras.layers.Dense(32, activation='relu'),
    keras.layers.Dense(1, activation='sigmoid')  # Output: probability (0-1)
])

# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Train the model
history = model.fit(train_inputs, train_targets,
                    validation_data=(validation_inputs, validation_targets),
                    batch_size=32,
                    epochs=50,
                    verbose=1)

# Evaluate the model
test_loss, test_accuracy = model.evaluate(test_inputs, test_targets)
print(f'\nTest Accuracy: {test_accuracy:.2f}')
print(f'Test Loss: {test_loss:.2f}')


Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m112/112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 12ms/step - accuracy: 0.6840 - loss: 0.6037 - val_accuracy: 0.7248 - val_loss: 0.4429
Epoch 2/50
[1m112/112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7561 - loss: 0.4313 - val_accuracy: 0.7763 - val_loss: 0.3790
Epoch 3/50
[1m112/112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.7895 - loss: 0.3824 - val_accuracy: 0.7763 - val_loss: 0.3697
Epoch 4/50
[1m112/112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7981 - loss: 0.3622 - val_accuracy: 0.8009 - val_loss: 0.3496
Epoch 5/50
[1m112/112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8031 - loss: 0.3529 - val_accuracy: 0.8009 - val_loss: 0.3542
Epoch 6/50
[1m112/112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8047 - loss: 0.3554 - val_accuracy: 0.7987 - val_loss: 0.3483
Epoch 7/50
[1m112/112[0m [32m━━━━━━