## FUTURE CHANGES
* Remove test_train_split because this needs to be done before segmenting the datasets

In [30]:
import json
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow.keras as keras
import tensorflow as tf

tf.debugging.set_log_device_placement(True)

DATASET_PATH = "data_static_60_5.json"

# load data
def load_data(dataset_path):
    with open(dataset_path, "r") as fp:
        data = json.load(fp)
    
    # convert lists into numpy arrays
    inputs = np.array(data["mfcc"])
    targets = np.array(data["labels"])
    #print(inputs[:1])
    
    return inputs, targets

if __name__ == "__main__":
   
    # load data
    inputs, targets = load_data(DATASET_PATH)
    print(inputs.shape)
    print(targets.shape)
    
    # split the data into train and test sets
    inputs_train, inputs_test, targets_train, targets_test = train_test_split(inputs, targets, test_size=0.3)  # 30% used for test set, rest for train set
                                                  
    # build the network architecture
    model = keras.Sequential([
        # input layer
        keras.layers.Flatten(input_shape=(inputs.shape[1], inputs.shape[2])), 
            # take multidimensional array and flattens it out
            # remember that each track has many MFCC vectors at each hop length
            # first dimension is the interval, # second dimension is the amount of MFCCs
            # note: inputs is 3D array and [0] is the different segments, so we ignore
        
        # 1st hidden layer
        keras.layers.Dense(512, activation="relu"),
        
        # 2nd hidden layer
        keras.layers.Dense(256, activation="relu"),
        
        # 3rd hidden layer
        keras.layers.Dense(64, activation="relu"),
        
        # output layer 
        # 5 neurons because predicting 5 categories clacky ,....
        keras.layers.Dense(5, activation="softmax")
    ])
    
    # compile network
    optimizer = keras.optimizers.Adam(learning_rate=0.0001)
    model.compile(optimizer=optimizer,
                 loss="sparse_categorical_crossentropy",
                 metrics=['accuracy'])
    
    model.summary()
    
    # train network
    model.fit(inputs_train, 
              targets_train, 
              validation_data=(inputs_test, targets_test),
              epochs=50,
              batch_size=32) # important! # of samples in each of our batches
    
    
    
    
    
    
    

(2336, 22, 13)
(2336,)
Model: "sequential_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_11 (Flatten)         (None, 286)               0         
_________________________________________________________________
dense_40 (Dense)             (None, 512)               146944    
_________________________________________________________________
dense_41 (Dense)             (None, 256)               131328    
_________________________________________________________________
dense_42 (Dense)             (None, 64)                16448     
_________________________________________________________________
dense_43 (Dense)             (None, 5)                 325       
Total params: 295,045
Trainable params: 295,045
Non-trainable params: 0
_________________________________________________________________
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Ep