In [1]:
import numpy as np
from sklearn import preprocessing
import tensorflow as tf

### Loading the data

In [2]:
data = np.loadtxt("Audiobooks_data.csv",delimiter=",")
data

array([[9.9400e+02, 1.6200e+03, 1.6200e+03, ..., 5.0000e+00, 9.2000e+01,
        0.0000e+00],
       [1.1430e+03, 2.1600e+03, 2.1600e+03, ..., 0.0000e+00, 0.0000e+00,
        0.0000e+00],
       [2.0590e+03, 2.1600e+03, 2.1600e+03, ..., 0.0000e+00, 3.8800e+02,
        0.0000e+00],
       ...,
       [3.1134e+04, 2.1600e+03, 2.1600e+03, ..., 0.0000e+00, 0.0000e+00,
        0.0000e+00],
       [3.2832e+04, 1.6200e+03, 1.6200e+03, ..., 0.0000e+00, 9.0000e+01,
        0.0000e+00],
       [2.5100e+02, 1.6740e+03, 3.3480e+03, ..., 0.0000e+00, 0.0000e+00,
        1.0000e+00]])

In [3]:
unScaledData = data[:,1:-1]
unScaledData

array([[1620.  , 1620.  ,   19.73, ..., 1603.8 ,    5.  ,   92.  ],
       [2160.  , 2160.  ,    5.33, ...,    0.  ,    0.  ,    0.  ],
       [2160.  , 2160.  ,    5.33, ...,    0.  ,    0.  ,  388.  ],
       ...,
       [2160.  , 2160.  ,    6.14, ...,    0.  ,    0.  ,    0.  ],
       [1620.  , 1620.  ,    5.33, ...,  615.6 ,    0.  ,   90.  ],
       [1674.  , 3348.  ,    5.33, ...,    0.  ,    0.  ,    0.  ]])

In [4]:
targets = data[:,-1]
targets

array([0., 0., 0., ..., 0., 0., 1.])

### Balancing the datasets

In [5]:
numOneTargets = int(np.sum(targets))
counter = 0
indicesToRemove = []

#Balancing the the 1s and 0s
for i in range(targets.shape[0]):
    if targets[i] == 0:
        counter +=1
        
        if counter > numOneTargets :
            indicesToRemove.append(i)
        
balancedData = np.delete(unScaledData,indicesToRemove,axis=0)        

### Standardization

In [6]:
preprocessedData = preprocessing.scale(balancedData)

### Shuffling the data

In [7]:
shuffledIndices = np.arange(preprocessedData.shape[0])
np.random.shuffle(shuffledIndices)

shuffledInputs = preprocessedData[shuffledIndices]
shuffledTargets = targets[shuffledIndices]

### Splitting the data into training and testing the data

In [15]:
m = shuffledInputs.shape[0]
trainSampleCount = int(0.8*m)
validationSampleCount = int(m*0.1)
testSampleCount = m - (trainSampleCount+validationSampleCount)

trainInputs = shuffledInputs[:trainSampleCount]
trainTargets = shuffledTargets[:trainSampleCount]

validationInputs = shuffledInputs[trainSampleCount:trainSampleCount+validationSampleCount]
validationTargets = shuffledTargets[trainSampleCount:trainSampleCount+validationSampleCount]


testInputs = shuffledInputs[trainSampleCount+validationSampleCount:]
testTargets = shuffledTargets[trainSampleCount+validationSampleCount:]

### Saving the preprocessed data

In [16]:
np.savez('Audiobook_TrainData',inputs=trainInputs,targets=trainTargets)
np.savez('Audiobook_TestData',inputs=testInputs,targets=testTargets)
np.savez('Audiobook_ValidationData',inputs=validationInputs,targets=validationTargets)

### Deep Learning

In [17]:
###Loading data
npzTrainData = np.load("Audiobook_TrainData.npz")
npzTestData = np.load("Audiobook_TestData.npz")
npzValData = np.load("Audiobook_ValidationData.npz")

In [23]:
X = npzTrainData['inputs'].astype(np.float64)
Y = npzTrainData['targets'].astype(np.int64)
valX = npzValData['inputs'].astype(np.float64)
valY = npzValData['targets'].astype(np.int64)
testX = npzTestData['inputs'].astype(np.float64)
testY = npzTestData['targets'].astype(np.int64)

### Creating Neural Network

In [22]:

inputSize = 10
outputSize = 2
hiddenLayerSize = 50

#Stopping before overfit
earlyStop = tf.keras.callbacks.EarlyStopping(patience=6)
model = tf.keras.Sequential([
                            tf.keras.layers.Dense(hiddenLayerSize,activation='relu'),
                            tf.keras.layers.Dense(hiddenLayerSize,activation='relu'),
                            tf.keras.layers.Dense(outputSize,activation='softmax')
                            ])
model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])

batchSize = 100
maxEpochs = 100
model.fit(X,
          Y,
          batch_size=batchSize,
          epochs=maxEpochs,
          validation_data=(valX,valY),
          verbose=2,
          callbacks=[earlyStop])

Epoch 1/100
36/36 - 1s - loss: 0.5210 - accuracy: 0.7977 - val_loss: 0.4783 - val_accuracy: 0.7875 - 1s/epoch - 32ms/step
Epoch 2/100
36/36 - 0s - loss: 0.4555 - accuracy: 0.8053 - val_loss: 0.4612 - val_accuracy: 0.7875 - 153ms/epoch - 4ms/step
Epoch 3/100
36/36 - 0s - loss: 0.4403 - accuracy: 0.8041 - val_loss: 0.4519 - val_accuracy: 0.7942 - 154ms/epoch - 4ms/step
Epoch 4/100
36/36 - 0s - loss: 0.4326 - accuracy: 0.8044 - val_loss: 0.4463 - val_accuracy: 0.7919 - 154ms/epoch - 4ms/step
Epoch 5/100
36/36 - 0s - loss: 0.4281 - accuracy: 0.8050 - val_loss: 0.4382 - val_accuracy: 0.8009 - 162ms/epoch - 5ms/step
Epoch 6/100
36/36 - 0s - loss: 0.4219 - accuracy: 0.8039 - val_loss: 0.4367 - val_accuracy: 0.7919 - 148ms/epoch - 4ms/step
Epoch 7/100
36/36 - 0s - loss: 0.4191 - accuracy: 0.8097 - val_loss: 0.4313 - val_accuracy: 0.7942 - 165ms/epoch - 5ms/step
Epoch 8/100
36/36 - 0s - loss: 0.4156 - accuracy: 0.8086 - val_loss: 0.4299 - val_accuracy: 0.7942 - 153ms/epoch - 4ms/step
Epoch 9/10

<keras.callbacks.History at 0x20013318bb0>

### Test on the data

In [27]:
testLoss,testAccuracy = model.evaluate(testX,testY)
print("\nTrain Loss: {}.\nTest accuracy: {}".format(round(testLoss,2),round(testAccuracy,2)))


Train Loss: 0.4.
Test accuracy: 0.81
