# Audio Book Buisness Case Study

## Import Relevent Libraries

In [76]:
import numpy as np
from sklearn import preprocessing
import tensorflow as tf

## Import DataSet

In [77]:
raw_data = np.loadtxt('Audiobooks_data.csv', delimiter = ',')

unscaled_inputs_all = raw_data[:,1:-1]
targets_all = raw_data[:,-1]

## Balance the DataSet

In [78]:
num_one_targets = int(np.sum(targets_all))
zero_count = 0
indices_to_remove = []
for i in range(targets_all.shape[0]):
    if targets_all[i] == 0:
        zero_count += 1
        if num_one_targets < zero_count:
            indices_to_remove.append(i)

unscaled_input_equal_priors = np.delete(unscaled_inputs_all,indices_to_remove,axis=0)
targets_equal_prior = np.delete(targets_all , indices_to_remove, axis = 0)

## Standardize the Inputs

In [79]:
scaled_inputs = preprocessing.scale(unscaled_input_equal_priors)

## Shuffle the DataSet

In [80]:
shuffled_indices = np.arange(scaled_inputs.shape[0])
np.random.shuffle(shuffled_indices)

shuffled_inputs = scaled_inputs[shuffled_indices]
shuffled_targets = targets_equal_prior[shuffled_indices]

## Split the dataset into train, test and validation

In [81]:
sample_count = shuffled_inputs.shape[0]

train_sample_count = int(0.8*sample_count)
validation_sample_count = int(0.1 * sample_count)
test_sample_count = train_sample_count - validation_sample_count

#train data
train_input_data = shuffled_inputs[:train_sample_count]
train_target_data = shuffled_targets[:train_sample_count]

#validation data 
validation_inputs_data = shuffled_inputs[train_sample_count : train_sample_count+validation_sample_count]
validation_targets_data = shuffled_targets[train_sample_count : train_sample_count+validation_sample_count]

#test data 
test_inputs_data = shuffled_inputs[train_sample_count+validation_sample_count : train_sample_count+validation_sample_count + test_sample_count]
test_targets_data = shuffled_targets[train_sample_count+validation_sample_count : train_sample_count+validation_sample_count + test_sample_count]

print(np.sum(train_target_data),train_sample_count,np.sum(train_target_data)/train_sample_count)
print(np.sum(validation_targets_data),validation_sample_count,np.sum(validation_targets_data)/validation_sample_count)
print(np.sum(test_targets_data),test_sample_count,np.sum(test_targets_data)/test_sample_count)

1780.0 3579 0.4973456272701872
229.0 447 0.5123042505592841
228.0 3132 0.07279693486590039


# Create Machine Learning Algorithm

In [82]:
input_size = 10
hidden_layer_size = 50
output_size = 2

model = tf.keras.Sequential([

    tf.keras.layers.Dense(hidden_layer_size, activation = 'relu'),
    tf.keras.layers.Dense(hidden_layer_size, activation = 'relu'),
    tf.keras.layers.Dense(output_size , activation = 'softmax')
])


model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy' , metrics = ['accuracy'])

batch_size = 100

max_epochs = 100

early_stopping = tf.keras.callbacks.EarlyStopping(patience = 2)

model.fit(
    train_input_data,
    train_target_data,
    batch_size = batch_size,
    epochs = max_epochs,
    callbacks = [early_stopping],
    validation_data = (validation_inputs_data, validation_targets_data),
    verbose = 2)

Epoch 1/100
36/36 - 1s - 37ms/step - accuracy: 0.6887 - loss: 0.5984 - val_accuracy: 0.7673 - val_loss: 0.5090
Epoch 2/100
36/36 - 0s - 4ms/step - accuracy: 0.7656 - loss: 0.4718 - val_accuracy: 0.7763 - val_loss: 0.4359
Epoch 3/100
36/36 - 0s - 4ms/step - accuracy: 0.7849 - loss: 0.4196 - val_accuracy: 0.7852 - val_loss: 0.4054
Epoch 4/100
36/36 - 0s - 4ms/step - accuracy: 0.7999 - loss: 0.3941 - val_accuracy: 0.7718 - val_loss: 0.3923
Epoch 5/100
36/36 - 0s - 4ms/step - accuracy: 0.7952 - loss: 0.3802 - val_accuracy: 0.7987 - val_loss: 0.3789
Epoch 6/100
36/36 - 0s - 4ms/step - accuracy: 0.8053 - loss: 0.3686 - val_accuracy: 0.7852 - val_loss: 0.3716
Epoch 7/100
36/36 - 0s - 4ms/step - accuracy: 0.8089 - loss: 0.3602 - val_accuracy: 0.8031 - val_loss: 0.3651
Epoch 8/100
36/36 - 0s - 4ms/step - accuracy: 0.8069 - loss: 0.3574 - val_accuracy: 0.8054 - val_loss: 0.3591
Epoch 9/100
36/36 - 0s - 6ms/step - accuracy: 0.8013 - loss: 0.3553 - val_accuracy: 0.8166 - val_loss: 0.3555
Epoch 10/

<keras.src.callbacks.history.History at 0x25fd2618d30>

## Test The Model

In [83]:
test_loss , test_accuracy = model.evaluate(test_inputs_data, test_targets_data)

[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8426 - loss: 0.3039 


In [85]:
print('\n Test Loss : {0:.2f}. Test Accuracy : {0:.2f}%'.format(test_loss, test_accuracy*100.))


 Test Loss : 0.33. Test Accuracy : 0.33%
