In [25]:
from google.colab import files
uploaded = files.upload()

In [36]:
import os
import glob
import pandas as pd
import tensorflow

In [37]:
# Use glob to match the pattern ‘csv’
extension = 'csv'
all_filenames = [i for i in glob.glob('*.{}'.format(extension))]

# Combine all files in the list and export as CSV
df = pd.concat([pd.read_csv(f) for f in all_filenames ])
df.reset_index(drop=True, inplace=True)

In [38]:
df2 = df.drop(['track', 'artist', 'uri'], axis=1)

In [39]:
#Validation of correlation between variables
import numpy as np 
df3=df2.corr()

In [40]:
# Reformat data
data = df2.values
X = data[:, 0:15]  
y = data[:, 15]

print(data.shape, X.shape, y.shape)

(41106, 16) (41106, 15) (41106,)


In [41]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=10)

## Data Preprocessing

In [42]:
#Scale features (X) using MinMaxScaler
from sklearn.preprocessing import MinMaxScaler
X_scaler = MinMaxScaler(feature_range=(0,1)).fit(X_train)

X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test) 

X_train_scaled

array([[0.33299595, 0.9329815 , 0.        , ..., 0.8       , 0.08111681,
        0.0295858 ],
       [0.82591093, 0.61089261, 0.18181818, ..., 0.8       , 0.08631337,
        0.07100592],
       [0.77024291, 0.68991442, 1.        , ..., 0.8       , 0.16145375,
        0.0591716 ],
       ...,
       [0.82287449, 0.88196742, 0.63636364, ..., 0.8       , 0.0936038 ,
        0.07100592],
       [0.59412955, 0.04923759, 1.        , ..., 0.8       , 0.05270625,
        0.07100592],
       [0.52732794, 0.70191773, 0.        , ..., 0.8       , 0.05222867,
        0.0591716 ]])

One-hot encode the labels

In [43]:
#One-hot encode output labels (y)
from tensorflow.keras.utils import to_categorical
y_train_categorical = to_categorical(y_train)
y_test_categorical = to_categorical(y_test)

y_train_categorical

array([[1., 0.],
       [0., 1.],
       [0., 1.],
       ...,
       [0., 1.],
       [1., 0.],
       [0., 1.]], dtype=float32)

## Creating and defining our Deep Learning Model Architecture

In [44]:
#Create a sequential model
from tensorflow.keras.models import Sequential
model = Sequential() 

from tensorflow.keras.layers import Dense
number_inputs = 15  

#Create hidden layers
model.add(Dense(units=14,activation='relu', input_dim=number_inputs))
model.add(Dense(units=120,activation='relu'))
model.add(Dense(units=80,activation='relu'))

#Create output layer
number_classes = 2
model.add(Dense(units=number_classes, activation='softmax')) 

Number of Hidden Nodes 
https://stats.stackexchange.com/questions/181/how-to-choose-the-number-of-hidden-layers-and-nodes-in-a-feedforward-neural-netw#:~:text=The%20number%20of%20hidden%20neurons,size%20of%20the%20input%20layer.

In [45]:
#Model Summary
model.summary() 

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_4 (Dense)             (None, 14)                224       
                                                                 
 dense_5 (Dense)             (None, 120)               1800      
                                                                 
 dense_6 (Dense)             (None, 80)                9680      
                                                                 
 dense_7 (Dense)             (None, 2)                 162       
                                                                 
Total params: 11,866
Trainable params: 11,866
Non-trainable params: 0
_________________________________________________________________


In [46]:
#Compile the Model
import tensorflow as tf

opt = tf.keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

## Training the Model

In [47]:
#Training the Model
history = model.fit(X_train_scaled, y_train_categorical, epochs=500, batch_size=2000, shuffle=True, verbose=2)

Epoch 1/500
16/16 - 1s - loss: 0.6640 - accuracy: 0.6342 - 887ms/epoch - 55ms/step
Epoch 2/500
16/16 - 0s - loss: 0.6014 - accuracy: 0.6930 - 134ms/epoch - 8ms/step
Epoch 3/500
16/16 - 0s - loss: 0.5681 - accuracy: 0.7036 - 147ms/epoch - 9ms/step
Epoch 4/500
16/16 - 0s - loss: 0.5565 - accuracy: 0.7092 - 131ms/epoch - 8ms/step
Epoch 5/500
16/16 - 0s - loss: 0.5452 - accuracy: 0.7194 - 137ms/epoch - 9ms/step
Epoch 6/500
16/16 - 0s - loss: 0.5332 - accuracy: 0.7294 - 124ms/epoch - 8ms/step
Epoch 7/500
16/16 - 0s - loss: 0.5249 - accuracy: 0.7361 - 135ms/epoch - 8ms/step
Epoch 8/500
16/16 - 0s - loss: 0.5171 - accuracy: 0.7433 - 128ms/epoch - 8ms/step
Epoch 9/500
16/16 - 0s - loss: 0.5128 - accuracy: 0.7434 - 125ms/epoch - 8ms/step
Epoch 10/500
16/16 - 0s - loss: 0.5086 - accuracy: 0.7480 - 151ms/epoch - 9ms/step
Epoch 11/500
16/16 - 0s - loss: 0.5041 - accuracy: 0.7493 - 128ms/epoch - 8ms/step
Epoch 12/500
16/16 - 0s - loss: 0.5017 - accuracy: 0.7517 - 133ms/epoch - 8ms/step
Epoch 13/500

## Validation of the Model

In [48]:
#Evaluate the Model using the testing data
#Compare Model performace between training and testing data
model_loss_train, model_accuracy_train = model.evaluate(X_train_scaled, y_train_categorical, verbose=2)
model_loss, model_accuracy = model.evaluate(X_test_scaled, y_test_categorical, verbose=2)

print(f"TRAINING DATA --> Loss: {model_loss_train}, Accuracy: {model_accuracy_train}")    
print(f"TESTING DATA --> Loss: {model_loss}, Accuracy: {model_accuracy}")

964/964 - 1s - loss: 0.4340 - accuracy: 0.7952 - 1s/epoch - 1ms/step
322/322 - 0s - loss: 0.4817 - accuracy: 0.7689 - 375ms/epoch - 1ms/step
TRAINING DATA --> Loss: 0.4339863061904907, Accuracy: 0.7951928377151489
TESTING DATA --> Loss: 0.48166176676750183, Accuracy: 0.7689014077186584


## Saving the Trained Model

In [50]:
# Save the model
from google.colab import drive
drive.mount('/content/gdrive')
# model_save_name = 'classifier.pt'
path = F"/content/gdrive/My Drive/model.h5" 
# torch.save(model.state_dict(), path)

model.save(path)

Mounted at /content/gdrive
