In [1]:
import numpy as np
import tensorflow as tf
from sklearn import preprocessing

In [2]:
raw_csv_data= np.loadtxt('Audiobooks_data.csv',delimiter=',')

In [3]:
unscaled_inputs_all=raw_csv_data[:,1:-1]
targets_all=raw_csv_data[:,-1]

In [4]:
num_one_targets= int(np.sum(targets_all))
num_one_targets


2237

In [5]:
zero_targets_counter=0
indices_to_remove=[]

for i in range(targets_all.shape[0]):
    if targets_all[i]==0:
        zero_targets_counter+=1
        if zero_targets_counter>num_one_targets:
            indices_to_remove.append(i)

inputs_equal_priors=np.delete(unscaled_inputs_all, indices_to_remove,axis=0)
targets_equal_priors=np.delete(targets_all, indices_to_remove, axis=0)

In [6]:
scaled_inputs= preprocessing.scale(inputs_equal_priors)

In [7]:
shuffled_indices=np.arange(scaled_inputs.shape[0])
np.random.shuffle(shuffled_indices)
shuffled_inputs= scaled_inputs[shuffled_indices]
shuffled_targets=targets_equal_priors[shuffled_indices]
shuffled_indices

array([1944, 1370, 1823, ..., 4377, 3611, 4432])

In [8]:
num_samples= shuffled_targets.shape[0]
num_samples

4474

In [9]:
num_train= int(0.8*num_samples)
num_validation= int(0.1*num_samples)
num_test= num_samples-num_train-num_validation

train_inputs= shuffled_inputs[:num_train]
train_targets= shuffled_targets[:num_train]
validation_inputs= shuffled_inputs[num_train:num_train+num_validation]
validation_targets= shuffled_targets[num_train:num_train+num_validation]
test_inputs=shuffled_inputs[num_train+num_validation:]
test_targets= shuffled_targets[num_train+num_validation:]

print(np.sum(train_targets), num_train, np.sum(train_targets)/num_train)
print(np.sum(validation_targets), num_validation, np.sum(validation_targets)/num_validation)
print(np.sum(test_targets), num_test, np.sum(test_targets)/num_test)

1792.0 3579 0.5006985191394244
219.0 447 0.4899328859060403
226.0 448 0.5044642857142857


In [10]:
np.savez('Audiobooks_train', inputs=train_inputs, targets=train_targets)
np.savez('Audiobooks_validation', inputs=validation_inputs, targets=validation_targets)
np.savez('Audiobooks_test', inputs=test_inputs, targets=test_targets)


In [11]:
npz= np.load('Audiobooks_train.npz')
train_inputs, train_targets= npz['inputs'].astype(np.float32), npz['targets'].astype(np.int64)
npz= np.load('Audiobooks_validation.npz')
validation_inputs, validation_targets= npz['inputs'].astype(np.float32), npz['targets'].astype(np.int64)
npz= np.load('Audiobooks_test.npz')
test_inputs, test_targets= npz['inputs'].astype(np.float32), npz['targets'].astype(np.int64)


In [20]:
input_size=10
output_size=2
hidden_layer_size=100

model= tf.keras.Sequential([
    
                            tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
                            tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    
                            
                            
    
                             tf.keras.layers.Dense(output_size, activation='softmax')
                    ])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
batch_size=100
max_epochs=100
early_stopping= tf.keras.callbacks.EarlyStopping(patience=2)

model.fit( train_inputs,
          train_targets,
          batch_size=batch_size,
          epochs=max_epochs,
          callbacks=[early_stopping],
          validation_data=(validation_inputs,validation_targets),
          verbose=2 )




Epoch 1/100
36/36 - 1s - loss: 0.5326 - accuracy: 0.7245 - val_loss: 0.4313 - val_accuracy: 0.7897 - 1s/epoch - 34ms/step
Epoch 2/100
36/36 - 0s - loss: 0.4101 - accuracy: 0.7843 - val_loss: 0.3762 - val_accuracy: 0.8009 - 125ms/epoch - 3ms/step
Epoch 3/100
36/36 - 0s - loss: 0.3766 - accuracy: 0.7999 - val_loss: 0.3593 - val_accuracy: 0.8166 - 156ms/epoch - 4ms/step
Epoch 4/100
36/36 - 0s - loss: 0.3647 - accuracy: 0.8041 - val_loss: 0.3454 - val_accuracy: 0.8076 - 134ms/epoch - 4ms/step
Epoch 5/100
36/36 - 0s - loss: 0.3530 - accuracy: 0.8033 - val_loss: 0.3407 - val_accuracy: 0.8143 - 156ms/epoch - 4ms/step
Epoch 6/100
36/36 - 0s - loss: 0.3469 - accuracy: 0.8153 - val_loss: 0.3444 - val_accuracy: 0.8098 - 172ms/epoch - 5ms/step
Epoch 7/100
36/36 - 0s - loss: 0.3445 - accuracy: 0.8094 - val_loss: 0.3310 - val_accuracy: 0.8188 - 125ms/epoch - 3ms/step
Epoch 8/100
36/36 - 0s - loss: 0.3414 - accuracy: 0.8167 - val_loss: 0.3419 - val_accuracy: 0.7942 - 125ms/epoch - 3ms/step
Epoch 9/10

<keras.src.callbacks.History at 0x2e5da418190>

In [21]:
test_loss, test_accuracy= model.evaluate(test_inputs, test_targets)
print(test_loss, test_accuracy)

0.33921000361442566 0.8058035969734192
