## Import relevant libraries

In [1]:
import numpy as np
from sklearn import preprocessing
import tensorflow as tf

## Load data

In [2]:
raw_audio_data = np.loadtxt('Audiobooks_data.csv', delimiter=',')
raw_audio_data

unscaled_inputs_all = raw_audio_data[:,1:-1]
targets_all = raw_audio_data[:,-1]

## Balance the dataset

In [3]:
num_one_targets = int(np.sum(targets_all))
zero_targets_counter = 0

indices_to_remove = []

for i in range (targets_all.shape[0]):
    if targets_all[i] == 0:
        zero_targets_counter += 1
        if zero_targets_counter > num_one_targets:
            indices_to_remove.append(i)
            
unscaled_inputs_equal_priors = np.delete(unscaled_inputs_all, indices_to_remove, axis = 0)
targets_equal_prior = np.delete(targets_all, indices_to_remove, axis = 0)

## Standardize the inputs

In [4]:
scaled_inputs = preprocessing.scale(unscaled_inputs_equal_priors)

## Shuffle the data

In [5]:
shuffled_indices = np.arange(scaled_inputs.shape[0])
np.random.shuffle(shuffled_indices)

shuffled_inputs = scaled_inputs[shuffled_indices]
shuffled_targets = targets_equal_prior[shuffled_indices]

## Split data into train, validation and test data

In [6]:
sample_count = shuffled_inputs.shape[0]

train_sample_count = int(0.8*sample_count)
validation_sample_count = int(0.1*sample_count)
test_sample_count = sample_count - train_sample_count - validation_sample_count

train_inputs = shuffled_inputs[:train_sample_count]
train_targets = shuffled_targets[:train_sample_count]

validation_inputs = shuffled_inputs[train_sample_count:validation_sample_count+train_sample_count]
validation_targets  = shuffled_targets[train_sample_count:validation_sample_count+train_sample_count]

test_inputs = shuffled_inputs[validation_sample_count+train_sample_count:]
test_targets = shuffled_targets[validation_sample_count+train_sample_count:]

print('Training Data: ', np.sum(train_targets),'  ',train_sample_count,'  ', np.sum(train_targets)/train_sample_count)
print('Validation Data: ',np.sum(validation_targets),'  ', validation_sample_count,'  ', np.sum(validation_targets)/validation_sample_count)
print('Test Data: ', np.sum(test_targets),'  ', test_sample_count,'  ', np.sum(test_targets)/test_sample_count)
    

Training Data:  1809.0    3579    0.5054484492875104
Validation Data:  214.0    447    0.47874720357941836
Test Data:  214.0    448    0.47767857142857145


## Save the three datasets in *.npz

In [7]:
np.savez('Audiobook_data_train', inputs = train_inputs, targets = train_targets)
np.savez('Audiobook_data_validation', inputs = validation_inputs, targets = validation_targets)
np.savez('Audiobook_data_test', inputs = test_inputs, targets = test_targets)

## Creating machine learning algorithm

### Data

In [8]:
npz = np.load('Audiobook_data_train.npz')

train_inputs = npz['inputs'].astype(np.float)
train_targets = npz['targets'].astype(np.int)

npz = np.load('Audiobook_data_validation.npz')

validation_inputs = npz['inputs'].astype(np.float)
validation_targets = npz['targets'].astype(np.int)

npz = np.load('Audiobook_data_test.npz')

test_inputs = npz['inputs'].astype(np.float)
test_targets = npz['targets'].astype(np.int)

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  train_inputs = npz['inputs'].astype(np.float)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  train_targets = npz['targets'].astype(np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  validation_inputs = npz['inputs'].astype(np.float)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  validation_targets = npz['targets'].astype(np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  test_inputs = npz['inputs'].astype(np.float)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  test_targets = np

### Building the model

In [9]:
input_size = 10
output_size = 2
hidden_layer_size = 100

model = tf.keras.Sequential([
                            tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
                            tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
                            tf.keras.layers.Dense(output_size, activation='softmax') 
                            ])
custom_optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)
model.compile(optimizer = custom_optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

### Fitting the model

In [10]:
batch_size = 1

max_epochs = 50

early_stopping = tf.keras.callbacks.EarlyStopping(patience = 2)

model.fit(train_inputs,
         train_targets,
         batch_size = batch_size,
         epochs = max_epochs,
         callbacks=[early_stopping],
         validation_data = (validation_inputs, validation_targets),
         verbose = 2)

Epoch 1/50
3579/3579 - 3s - loss: 0.4059 - accuracy: 0.8650 - val_loss: 0.3214 - val_accuracy: 0.9038 - 3s/epoch - 877us/step
Epoch 2/50
3579/3579 - 3s - loss: 0.3549 - accuracy: 0.8857 - val_loss: 0.3020 - val_accuracy: 0.8859 - 3s/epoch - 794us/step
Epoch 3/50
3579/3579 - 3s - loss: 0.3387 - accuracy: 0.8921 - val_loss: 0.3087 - val_accuracy: 0.8456 - 3s/epoch - 798us/step
Epoch 4/50
3579/3579 - 3s - loss: 0.2855 - accuracy: 0.9014 - val_loss: 0.2802 - val_accuracy: 0.8993 - 3s/epoch - 801us/step
Epoch 5/50
3579/3579 - 3s - loss: 0.4155 - accuracy: 0.8944 - val_loss: 0.3186 - val_accuracy: 0.8926 - 3s/epoch - 802us/step
Epoch 6/50
3579/3579 - 3s - loss: 0.3250 - accuracy: 0.8955 - val_loss: 0.2732 - val_accuracy: 0.9016 - 3s/epoch - 797us/step
Epoch 7/50
3579/3579 - 3s - loss: 0.2742 - accuracy: 0.9036 - val_loss: 0.2862 - val_accuracy: 0.8993 - 3s/epoch - 800us/step
Epoch 8/50
3579/3579 - 3s - loss: 0.2683 - accuracy: 0.9061 - val_loss: 0.2915 - val_accuracy: 0.8971 - 3s/epoch - 800

<keras.callbacks.History at 0x1d30236bca0>

## Test the model

In [11]:
test_loss, test_accuracy = model.evaluate(test_inputs, test_targets)



In [12]:
print('Test loss: {0:.2f}. Test accuracy: {1:.2f}%'.format(test_loss, test_accuracy * 100.))

Test loss: 0.29. Test accuracy: 91.29%
