### Importing Packages:

In [1]:
import numpy as np
import tensorflow as tf
from sklearn import preprocessing

### Importing Dataset: 

In [2]:
# Importing dataset
raw_data = np.loadtxt('./Audiobooks_data.csv', delimiter=",")

### Balancing the dataset:

In [3]:
num_ones = np.sum(raw_data[:,-1])
indices_to_delete = []
zero_count = 0
for i in range(raw_data.shape[0]):
    if (raw_data[i,-1] == 0):
        zero_count += 1
        if zero_count > num_ones:
            indices_to_delete.append(i)
data = np.delete(raw_data, indices_to_delete, axis=0)
data.shape

(4474, 12)

### Shuffeling the dataset

In [4]:
#  shuffling the dataset
np.random.shuffle(data)

### Distributing the dataset into training, testing and validation sections:

In [5]:
# Dividing the dataset into training, testing and validation sections:
num_training = int(0.8*data.shape[0])
num_validate = int(0.1*data.shape[0])
num_testing = data.shape[0] - (num_training + num_validate)

input_data = data[:, 1:-1]
output_data = data[:,-1]


### Scaling the inputs:

In [6]:
#Scaling the input data before dividing into categories:
input_data = preprocessing.scale(input_data)

In [7]:

# Dividing the data into categories:
training_input = input_data[:num_training]
training_output = output_data[:num_training]

validating_input = input_data[num_training:num_training + num_validate]
validating_output = output_data[num_training:num_training + num_validate]

testing_input = input_data[num_training + num_validate:]
testing_output = output_data[num_training + num_validate:]

In [8]:
print((np.sum(training_output))/training_output.shape[0])
print((np.sum(validating_output))/validating_output.shape[0])
print((np.sum(testing_output))/testing_output.shape[0])

0.49678681195864766
0.5145413870246085
0.5111607142857143


### Saving the datasets

In [11]:
np.savez("Audiobook_training", inputs=training_input, outputs=training_output)
np.savez("Audiobook_validate", inputs=validating_input, outputs=validating_output)
np.savez("Audiobook_testing", inputs=testing_input, outputs=testing_output)
