In [20]:
import keras
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten, LSTM
import pickle 

class MusicSentencesDataGenerator(keras.utils.Sequence):

    '''
    song_IDs : np.ndarray
        List of song IDs in the data
    labels : dict
        Dictionary mapping each song ID to time period label
    data_path : string
        Path of the .npy data files
    batch_size : int
        Batch size
    sentence_length : int (default 30)
        Number of seconds each data sample (.npy file) represents
    word_length : int (default 4)
        Number of seconds each word represents; each sentence is split into words of this length
    SR : int (default 44100)
        Sample rate; number of values per second
    '''
    def __init__(self, song_IDs, labels, data_path, batch_size, sentence_length=30,
                 word_length=5, SR=44100, shuffle=True):
        self.song_IDs = song_IDs
        self.labels = labels
        self.data_path = data_path
        self.batch_size = batch_size
        self.sentence_length = sentence_length
        self.word_length = word_length
        self.SR = SR
        self.data_dim = (sentence_length // word_length, word_length * SR)
        self.shuffle = shuffle
        self.labels2ind = list(set(labels.values()))
        self.on_epoch_end()
        
    def __len__(self):
        'Returns the number of batches per epoch'
        return int(np.floor(len(self.song_IDs) / self.batch_size))
    
    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of IDs
        song_IDs_temp = [self.song_IDs[k] for k in indexes]

        # Generate data
        X, y = self.__data_generation(song_IDs_temp)

        return X, y
    
    def __data_generation(self, song_IDs_temp):
        'Generates data containing batch_size samples' # X : (batch_size, num_words, word_length)
        # Initialization
        X = np.empty((self.batch_size, *self.data_dim))
        y = np.empty((self.batch_size), dtype=object)

        # Generate data
        for i, ID in enumerate(song_IDs_temp):
            # Store sample
            X[i,] = (np.load(self.data_path + str(ID) + '.npy')).reshape((self.data_dim))

            # Store class
            y[i] = self.labels[ID]
        
        # Categorical to numerical labels
        y = [self.labels2ind.index(a) for a in y]
        print(y)

        return X, y
    
    
    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.song_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.song_IDs)

In [2]:
# Generate npy files to test
for i in range(100):
    data = np.random.rand(10 * 30)
    np.save("sample_data/" + str(i), data)

In [22]:
# Testing DataGenerator
data_path = '/Users/estberg/Projects/490F/490_Deep_Learning/data/'
song_ids_file = '/Users/estberg/Projects/490F/490_Deep_Learning/data/song_ids.pkl'
labels_file = '/Users/estberg/Projects/490F/490_Deep_Learning/data/labels.pkl'

with open(song_ids_file, 'rb') as handle:
    ids = pickle.load(handle)

with open(labels_file, 'rb') as handle:
    labels = pickle.load(handle)

shuffled = np.random.permutation(ids)
train_ids = shuffled[:int(len(shuffled) * 0.8)]
test_ids = shuffled[int(len(shuffled) * 0.8):]

train_generator = MusicSentencesDataGenerator(train_ids, labels, data_path, batch_size=20, SR=44100)
test_generator = MusicSentencesDataGenerator(test_ids, labels, data_path, batch_size=20, SR=44100)

model = Sequential()
model.add(LSTM(100, input_shape=(6, 5*44100), return_sequences=True))
model.add(Flatten())
model.add(Dense(5, activation='sigmoid'))
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

EPOCHS = 100
BATCH_SIZE=20
model.fit_generator(generator=train_generator, validation_data=test_generator, epochs=EPOCHS)

scores = model.evaluate_generator(test_generator, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))

[4, 3, 3, 4, 2, 4, 4, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2]
[4, 4, 1, 3, 4, 2, 3, 1, 4, 2, 3, 2, 4, 4, 4, 3, 3, 4, 3, 4]
[3, 4, 3, 2, 1, 2, 3, 2, 4, 4, 4, 2, 4, 4, 4, 4, 3, 4, 4, 4]
[4, 2, 1, 4, 4, 3, 4, 4, 4, 0, 4, 4, 4, 4, 4, 4, 2, 4, 4, 4]
Epoch 1/100
[3, 3, 3, 3, 4, 4, 3, 4, 2, 2, 3, 4, 4, 3, 4, 2, 3, 4, 1, 0]
[4, 4, 4, 2, 4, 3, 2, 4, 3, 3, 4, 4, 4, 3, 4, 4, 3, 4, 4, 3]
[4, 4, 4, 4, 4, 4, 3, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 2, 2, 4]
[2, 4, 2, 4, 2, 3, 4, 4, 4, 2, 3, 3, 3, 1, 3, 4, 4, 4, 4, 2]
[3, 4, 4, 4, 3, 1, 4, 2, 4, 2, 4, 4, 4, 2, 4, 4, 2, 4, 2, 4]
[4, 2, 4, 3, 3, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 4, 3, 4]
[3, 4, 4, 3, 3, 3, 4, 3, 4, 3, 4, 3, 4, 3, 3, 2, 3, 4, 3, 4]
[4, 4, 2, 4, 2, 4, 4, 4, 4, 1, 4, 2, 4, 2, 4, 2, 4, 3, 4, 3]
[2, 4, 2, 4, 3, 3, 2, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 2, 3, 3]
[4, 4, 2, 4, 3, 4, 2, 4, 4, 1, 2, 4, 0, 3, 4, 2, 2, 4, 3, 2]
[1, 4, 4, 4, 1, 2, 4, 2, 2, 4, 4, 3, 1, 4, 4, 2, 4, 4, 4, 1]
[4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 2, 4, 4, 2, 2, 4]
[2, 4, 2, 4,