In [1]:
import numpy as np
import tensorflow as tf

In [2]:
from sklearn import preprocessing

raw_csv_data = np.loadtxt('./Audiobooks_data.csv', delimiter=',')

unscaled_inputs_all = raw_csv_data[:, 1:-1]
targets_all = raw_csv_data[:, -1]

In [3]:
num_one_targets = int(np.sum(targets_all))

zero_targets_counter = 0

indices_to_remove = []
for i in range(targets_all.shape[0]):
    if targets_all[i] == 0:
        zero_targets_counter += 1
        if zero_targets_counter > num_one_targets:
            indices_to_remove.append(i)

unscaled_inputs_equal_priors = np.delete(unscaled_inputs_all, indices_to_remove, axis=0)
targets_equal_priors = np.delete(targets_all, indices_to_remove, axis=0)
scaled_inputs = preprocessing.scale(unscaled_inputs_equal_priors)

In [4]:
shuffled_indices = np.arange(scaled_inputs.shape[0])
np.random.shuffle(shuffled_indices)

shuffled_inputs = scaled_inputs[shuffled_indices]
shuffled_targets = targets_equal_priors[shuffled_indices]

In [5]:
samples_count = shuffled_inputs.shape[0]

train_samples_count = int(0.8*samples_count)
validation_samples_count = int(0.1*samples_count)
test_samples_count = samples_count - train_samples_count - validation_samples_count

train_inputs = shuffled_inputs[:train_samples_count]
train_targets = shuffled_targets[:train_samples_count]

validation_inputs = shuffled_inputs[train_samples_count:train_samples_count+validation_samples_count]
validation_targets = shuffled_targets[train_samples_count:train_samples_count+validation_samples_count]

test_inputs = shuffled_inputs[train_samples_count+validation_samples_count:]
test_targets = shuffled_targets[train_samples_count+validation_samples_count:]

print(np.sum(train_targets), train_samples_count, np.sum(train_targets) / train_samples_count)
print(np.sum(validation_targets), validation_samples_count, np.sum(validation_targets) / validation_samples_count)
print(np.sum(test_targets), test_samples_count, np.sum(test_targets) / test_samples_count)

1788.0 3579 0.49958088851634536
232.0 447 0.5190156599552572
217.0 448 0.484375


In [6]:
np.savez('train', inputs=train_inputs, targets=train_targets)
np.savez('valid', inputs=validation_inputs, targets=validation_targets)
np.savez('test', inputs=test_inputs, targets=test_targets)

In [7]:
npz = np.load('train.npz')

train_input = npz['inputs'].astype(np.float32)
train_traget = npz['targets'].astype(np.float32)

npz = np.load('valid.npz')
valid_input = npz['inputs'].astype(np.float32)
valid_target = npz['targets'].astype(np.float32)

npz = np.load('test.npz')
test_input = npz['inputs'].astype(np.float32)
test_target = npz['targets'].astype(np.float32)

In [8]:
input_size = 10
output_size = 2

model = tf.keras.Sequential([
    tf.keras.layers.Dense(20, activation='relu'),
    tf.keras.layers.Dense(20, activation='relu'),
    tf.keras.layers.Dense(output_size, activation='softmax')    
])

In [9]:
model.compile(optimizer='adam',loss="sparse_categorical_crossentropy", metrics=['accuracy'])

batch_size = 100
max_epochs = 100

earlt_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    min_delta=0.001,
    patience=5,
    verbose=1,
    mode='auto',
    baseline=None,
    restore_best_weights=True
)

model.fit(train_input,train_targets,
          batch_size=batch_size,
          epochs=max_epochs,
          validation_data=(valid_input, valid_target),
          callbacks=[earlt_stopping],
          verbose=2)

Epoch 1/100
36/36 - 1s - 34ms/step - accuracy: 0.4979 - loss: 0.7523 - val_accuracy: 0.6130 - val_loss: 0.6831
Epoch 2/100
36/36 - 0s - 4ms/step - accuracy: 0.7279 - loss: 0.6273 - val_accuracy: 0.7830 - val_loss: 0.5980
Epoch 3/100
36/36 - 0s - 4ms/step - accuracy: 0.8145 - loss: 0.5326 - val_accuracy: 0.8523 - val_loss: 0.4935
Epoch 4/100
36/36 - 0s - 4ms/step - accuracy: 0.8497 - loss: 0.4381 - val_accuracy: 0.8747 - val_loss: 0.3967
Epoch 5/100
36/36 - 0s - 4ms/step - accuracy: 0.8653 - loss: 0.3718 - val_accuracy: 0.8837 - val_loss: 0.3367
Epoch 6/100
36/36 - 0s - 5ms/step - accuracy: 0.8757 - loss: 0.3356 - val_accuracy: 0.8881 - val_loss: 0.3068
Epoch 7/100
36/36 - 0s - 5ms/step - accuracy: 0.8801 - loss: 0.3160 - val_accuracy: 0.8926 - val_loss: 0.2908
Epoch 8/100
36/36 - 0s - 4ms/step - accuracy: 0.8854 - loss: 0.3028 - val_accuracy: 0.8971 - val_loss: 0.2823
Epoch 9/100
36/36 - 0s - 4ms/step - accuracy: 0.8888 - loss: 0.2933 - val_accuracy: 0.8971 - val_loss: 0.2725
Epoch 10/

<keras.src.callbacks.history.History at 0x21948606480>

In [10]:
test_loss,test_accuracy = model.evaluate(test_input, test_target)

[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9257 - loss: 0.2312 


In [1]:
import numpy as np

# Set the customer ID you want to predict
customer_id_to_predict = 25164 # Replace with the actual customer ID

# Load the original data
raw_csv_data = np.loadtxt('./Audiobooks_data.csv', delimiter=',')

# Find the row with the matching customer ID
customer_row = raw_csv_data[raw_csv_data[:, 0] == customer_id_to_predict]

if customer_row.shape[0] == 0:
    print("Customer ID not found.")
else:
    # Extract features (columns 1 to -1, as in your training)
    customer_features = customer_row[:, 1:-1]

    # Scale features (using the same method as training)
    customer_features_scaled = preprocessing.scale(customer_features)

    # Predict
    prediction = model.predict(customer_features_scaled)
    predicted_class = np.argmax(prediction, axis=1)[0]

    print(f"Predicted class for customer {customer_id_to_predict}: {predicted_class}")

NameError: name 'preprocessing' is not defined

In [2]:
# import numpy as np

# # Load the original data
# raw_csv_data = np.loadtxt('./Audiobooks_data.csv', delimiter=',')

# # Get all customer IDs and features
# customer_ids = raw_csv_data[:, 0]
# customer_features = raw_csv_data[:, 1:-1]

# # Scale features (using the same method as training)
# customer_features_scaled = preprocessing.scale(customer_features)

# # Predict for all customers
# predictions = model.predict(customer_features_scaled)
# predicted_classes = np.argmax(predictions, axis=1)

# # Print customer IDs with their predicted class
# for idx, pred_class in enumerate(predicted_classes):
#     print(f"Customer ID: {int(customer_ids[idx])} - Predicted class: {pred_class}")