In [11]:
import numpy as np
from sklearn import preprocessing

raw_csv_data=np.loadtxt("Audiobooks_data.csv",delimiter=',')

unscaled_inputs_all=raw_csv_data[:,1:-1]
targets_all=raw_csv_data[:,-1]

In [12]:
#Balancing the dataset

ones_in_target=int(np.sum(targets_all))
zeros_counter=0
indices_to_remove=[]

for i in range(targets_all.shape[0]):
    if targets_all[i]==0:
        zeros_counter+=1
        if zeros_counter>ones_in_target:
            indices_to_remove.append(i)
            
            
unscaled_inputs_balanced=np.delete(unscaled_inputs_all,indices_to_remove,axis=0)
targets_all_balanced=np.delete(targets_all,indices_to_remove,axis=0)

In [13]:
#Scale the inputs
scaled_inputs=preprocessing.scale(unscaled_inputs_balanced)

In [14]:
#Shuffle the data
shuffled_indices=np.arange(scaled_inputs.shape[0])
np.random.shuffle(shuffled_indices)

shuffled_inputs=scaled_inputs[shuffled_indices]
shuffled_targets=targets_all_balanced[shuffled_indices]


In [15]:
#Split data into train-test-validation by 80-10-10

samples_count=shuffled_inputs.shape[0]

train_count=int(0.8*samples_count)
validation_count=int(0.1*samples_count)
test_count=samples_count-train_count-validation_count


train_inputs=shuffled_inputs[:train_count]
train_targets=shuffled_targets[:train_count]

validation_inputs=shuffled_inputs[train_count:train_count+validation_count]
validation_targets=shuffled_targets[train_count:train_count+validation_count]

test_inputs=shuffled_inputs[train_count+validation_count:]
test_targets=shuffled_targets[train_count+validation_count:]


In [16]:
#Save data in npz
np.savez("Audiobooks_data_train",inputs=train_inputs,targets=train_targets)
np.savez("Audiobooks_data_validation",inputs=validation_inputs,targets=validation_targets)
np.savez("Audiobooks_data_test",inputs=test_inputs,targets=test_targets)

# Machine Learning Algorithm


In [35]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Flatten,Dense
from tensorflow.keras import Sequential

In [36]:
npz=np.load('Audiobooks_data_train.npz')

train_inputs=npz['inputs'].astype(np.float)
train_targets=npz['targets'].astype(np.int)


npz=np.load('Audiobooks_data_validation.npz')

validation_inputs=npz['inputs'].astype(np.float)
validation_targets=npz['targets'].astype(np.int)


npz=np.load('Audiobooks_data_test.npz')

test_inputs=npz['inputs'].astype(np.float)
test_targets=npz['targets'].astype(np.int)

In [37]:
input_size=10
output_size=2
hidden_layer_size=50

model=Sequential([
    Dense(hidden_layer_size,activation='relu'),
    Dense(hidden_layer_size,activation='relu'),
    Dense(output_size,activation='softmax')
])


model.compile(optimizer='adam',loss="sparse_categorical_crossentropy",metrics=['accuracy'])



In [39]:
BATCH_SIZE=100
NUM_EPOCHS=100

#EARLY STOPPING

early_stopping=tf.keras.callbacks.EarlyStopping(patience=3)



model.fit(train_inputs,train_targets,
         batch_size=BATCH_SIZE,
         epochs=NUM_EPOCHS,
          callbacks=[early_stopping],
         validation_data=(validation_inputs,validation_targets),verbose=2)


Train on 3579 samples, validate on 447 samples
Epoch 1/100
3579/3579 - 0s - loss: 0.2412 - accuracy: 0.9078 - val_loss: 0.2724 - val_accuracy: 0.8926
Epoch 2/100
3579/3579 - 0s - loss: 0.2381 - accuracy: 0.9086 - val_loss: 0.2654 - val_accuracy: 0.8949
Epoch 3/100
3579/3579 - 0s - loss: 0.2347 - accuracy: 0.9098 - val_loss: 0.2645 - val_accuracy: 0.8949
Epoch 4/100
3579/3579 - 0s - loss: 0.2361 - accuracy: 0.9106 - val_loss: 0.2653 - val_accuracy: 0.9016
Epoch 5/100
3579/3579 - 0s - loss: 0.2334 - accuracy: 0.9111 - val_loss: 0.2576 - val_accuracy: 0.8971
Epoch 6/100
3579/3579 - 0s - loss: 0.2321 - accuracy: 0.9117 - val_loss: 0.2577 - val_accuracy: 0.8971
Epoch 7/100
3579/3579 - 0s - loss: 0.2306 - accuracy: 0.9103 - val_loss: 0.2581 - val_accuracy: 0.8971
Epoch 8/100
3579/3579 - 0s - loss: 0.2301 - accuracy: 0.9120 - val_loss: 0.2593 - val_accuracy: 0.8993


<tensorflow.python.keras.callbacks.History at 0x26b2a1e8a90>

In [40]:

print(model.evaluate(test_inputs,test_targets))


[0.21911550260015897, 0.92410713]
