# **Import the relevant libraries :**

In [1]:
import numpy as np
from sklearn import preprocessing
import tensorflow as tf

# **Extract the data from the csv :**

In [2]:
raw_csv_data = np.loadtxt('../input/audiobook/Audiobooks_data.csv', delimiter = ',')

unscaled_inputs_all = raw_csv_data[:,1:-1]
targets_all = raw_csv_data[:,-1]

# **Balancing the dataset :**

In [3]:
num_one_targets = int(np.sum(targets_all))
zero_targets_count = 0
indices_to_remove = []

for i in range(targets_all.shape[0]):
    if targets_all[i] == 0:
        zero_targets_count += 1
        if zero_targets_count > num_one_targets :
            indices_to_remove.append(i)
          

In [4]:
unscaled_inputs_equal_priors = np.delete(unscaled_inputs_all,indices_to_remove,axis = 0)

targets_equal_priors = np.delete(targets_all,indices_to_remove,axis = 0)


# **Standardizing the inputs :**

In [5]:
scaled_inputs = preprocessing.scale(unscaled_inputs_equal_priors)

# **Shuffle the data :**

In [6]:
shuffled_indices = np.arange(scaled_inputs.shape[0])
np.random.shuffle(shuffled_indices)

shuffled_inputs = scaled_inputs[shuffled_indices]

shuffled_targets = targets_equal_priors[shuffled_indices]

# **Split the dataset into training , validation and testing dataset :**

In [7]:
sample_count = shuffled_inputs.shape[0]
train_sample_count = int(0.8 * sample_count)
validation_sample_count = int(0.1 * sample_count)
test_sample_count = sample_count - train_sample_count - validation_sample_count

train_inputs = shuffled_inputs[:train_sample_count]
train_targets = shuffled_targets[:train_sample_count]

validation_inputs = shuffled_inputs[train_sample_count:train_sample_count + validation_sample_count]
validation_targets = shuffled_targets[train_sample_count:train_sample_count + validation_sample_count]

test_inputs = shuffled_inputs[train_sample_count + validation_sample_count:]
test_targets = shuffled_targets[train_sample_count + validation_sample_count:]

print(np.sum(train_targets),train_sample_count,np.sum(train_targets)/train_sample_count)
print(np.sum(validation_targets),validation_sample_count, np.sum(validation_targets)/validation_sample_count)
print(np.sum(test_targets),test_sample_count, np.sum(test_targets)/test_sample_count)

1799.0 3579 0.5026543727298128
205.0 447 0.45861297539149887
233.0 448 0.5200892857142857


# **Save the three datasets in *.npz format :**

In [8]:
np.savez('Audiobooks_data_train',inputs = train_inputs, targets = train_targets)
np.savez('Audiobooks_data_validation',inputs = validation_inputs, targets = validation_targets)
np.savez('Audiobooks_data_test',inputs = test_inputs, targets = test_targets)

# **Loading the *.npz format dataset :**

In [9]:
npz = np.load('Audiobooks_data_train.npz')
train_inputs = npz['inputs'].astype(np.float)
train_targets = npz['targets'].astype(np.int)

npz = np.load('Audiobooks_data_validation.npz')
validation_inputs = npz['inputs'].astype(np.float)
validation_targets = npz['targets'].astype(np.int)

npz = np.load('Audiobooks_data_test.npz')
test_inputs = npz['inputs'].astype(np.float)
test_targets = npz['targets'].astype(np.int)

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  This is separate from the ipykernel package so we can avoid doing imports until
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  import sys
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  # Remove the CWD from sys.path while we load stuff.
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  # This is added back by InteractiveShellApp.init_path()


# **Model :**

In [10]:
input_size = 10
output_size = 2
hidden_layer = 100

model = tf.keras.Sequential([
    tf.keras.layers.Dense(hidden_layer, activation = 'relu'),
    tf.keras.layers.Dense(hidden_layer, activation = 'relu'),
    tf.keras.layers.Dense(output_size,activation = 'softmax')
    ])

model.compile(optimizer = 'adam',
              loss='sparse_categorical_crossentropy',
              metrics = ['accuracy']
              )

batch_size = 100

max_epochs = 100

early_stopping = tf.keras.callbacks.EarlyStopping(patience = 2)

model.fit(train_inputs,
          train_targets,
          batch_size = batch_size,
          epochs = max_epochs,
          callbacks = early_stopping,
          validation_data = (validation_inputs,validation_targets),
          verbose = 2)

2022-04-11 01:48:23.205054: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.
2022-04-11 01:48:23.334553: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Epoch 1/100
36/36 - 1s - loss: 0.5595 - accuracy: 0.6913 - val_loss: 0.4612 - val_accuracy: 0.7472
Epoch 2/100
36/36 - 0s - loss: 0.4261 - accuracy: 0.7809 - val_loss: 0.3957 - val_accuracy: 0.8412
Epoch 3/100
36/36 - 0s - loss: 0.3871 - accuracy: 0.7860 - val_loss: 0.3679 - val_accuracy: 0.8412
Epoch 4/100
36/36 - 0s - loss: 0.3688 - accuracy: 0.7974 - val_loss: 0.3391 - val_accuracy: 0.8456
Epoch 5/100
36/36 - 0s - loss: 0.3609 - accuracy: 0.8033 - val_loss: 0.3309 - val_accuracy: 0.8456
Epoch 6/100
36/36 - 0s - loss: 0.3500 - accuracy: 0.8106 - val_loss: 0.3348 - val_accuracy: 0.8345
Epoch 7/100
36/36 - 0s - loss: 0.3494 - accuracy: 0.8047 - val_loss: 0.3433 - val_accuracy: 0.8143


<keras.callbacks.History at 0x7f85be78af50>

# **Test the model :**

In [11]:
test_loss,test_accuracy = model.evaluate(test_inputs,test_targets)

print('\nTest loss : {0:.2f}. Test accuracy : {1:.2f}%' . format(test_loss,test_accuracy * 100))


Test loss : 0.38. Test accuracy : 81.03%
