In [17]:
import numpy as np
import tensorflow as tf
from sklearn import preprocessing

In [18]:
raw_data = np.loadtxt('Audiobooks_data.csv', delimiter=',')
unscaled_input = raw_data[:,1:-1]
target = raw_data[:,-1]

## Balancing The data

In [19]:
num_one = int(np.sum(target))
zero_target_counter = 0
indices_to_remove = []

for i in range(target.shape[0]):
    if target[i] == 0:
        zero_target_counter += 1
        if zero_target_counter > num_one:
            indices_to_remove.append(i)

unscaled_input_equal = np.delete(unscaled_input, indices_to_remove, axis=0)
target_equal = np.delete(target, indices_to_remove, axis=0)


## Standardize The Inputs

In [20]:
scaled_input = preprocessing.scale(unscaled_input_equal)

## Shuffling Inputs And Targets

In [21]:
shuffle = np.arange(scaled_input.shape[0])
np.random.shuffle(shuffle)

shuffle_input = scaled_input[shuffle]
shuffle_target = target_equal[shuffle]

## Split to TRAIN, VALIDATION, TEST Dataset

In [22]:
sample_count = shuffle_input.shape[0]

train_sample = int(0.8 * sample_count)
val_sample = int(0.1 * sample_count)
test_sample = sample_count - train_sample - val_sample

train_input = shuffle_input[:train_sample]
train_target = shuffle_target[:train_sample]

val_input = shuffle_input[train_sample:train_sample + val_sample]
val_target = shuffle_target[train_sample:train_sample + val_sample]

test_input = shuffle_input[train_sample + val_sample:]
test_target = shuffle_target[train_sample + val_sample:]
print(np.sum(train_target), train_sample, np.sum(train_target)/train_sample)
print(np.sum(test_target), test_sample, np.sum(test_target)/test_sample)

1805.0 3579 0.5043308186644314
225.0 448 0.5022321428571429


## saves the dataset to npz

In [25]:
input_size = 10
output_size = 2
hidden_layer_size = 50
early_stopping = tf.keras.callbacks.EarlyStopping()
model = tf.keras.Sequential([
                             tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
                             tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
                             tf.keras.layers.Dense(output_size, activation='softmax')
                             ])

model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics=['accuracy'])

batchsize = 100
max_epoch = 100





In [26]:
model.fit(train_input, train_target,
          batch_size = batchsize,
          epochs = max_epoch,
          callbacks = [early_stopping],
          validation_data = (val_input, val_target),
          verbose=2)
       

Epoch 1/100


36/36 - 13s - loss: 0.6421 - accuracy: 0.6194 - val_loss: 0.5586 - val_accuracy: 0.7047 - 13s/epoch - 372ms/step
Epoch 2/100
36/36 - 0s - loss: 0.5024 - accuracy: 0.7443 - val_loss: 0.4699 - val_accuracy: 0.7494 - 142ms/epoch - 4ms/step
Epoch 3/100
36/36 - 0s - loss: 0.4346 - accuracy: 0.7737 - val_loss: 0.4317 - val_accuracy: 0.7718 - 118ms/epoch - 3ms/step
Epoch 4/100
36/36 - 0s - loss: 0.4014 - accuracy: 0.7921 - val_loss: 0.4108 - val_accuracy: 0.7830 - 129ms/epoch - 4ms/step
Epoch 5/100
36/36 - 0s - loss: 0.3830 - accuracy: 0.8008 - val_loss: 0.3882 - val_accuracy: 0.7785 - 131ms/epoch - 4ms/step
Epoch 6/100
36/36 - 0s - loss: 0.3711 - accuracy: 0.8030 - val_loss: 0.3772 - val_accuracy: 0.7897 - 156ms/epoch - 4ms/step
Epoch 7/100
36/36 - 0s - loss: 0.3602 - accuracy: 0.8089 - val_loss: 0.3736 - val_accuracy: 0.7875 - 124ms/epoch - 3ms/step
Epoch 8/100
36/36 - 0s - loss: 0.3543 - accuracy: 0.8108 - val_loss: 0.3802 - val_accuracy: 0.7897 - 120ms/epoch - 3ms/step


<keras.src.callbacks.History at 0x25cae19e990>

In [28]:
test_loss, test_accuracy = model.evaluate(test_input, test_target)



In [30]:
print(f"Test loss: {test_loss:.2f}. Test accuracy: {test_accuracy:.2f}")

Test loss: 0.35. Test accuracy: 0.82
