In [None]:
# import libraries
import tensorflow as tf
import numpy as np
import time
import os

In [None]:
# define hyperparameters
# file I/O parameters
INPUT_DATA_PATH = '../data/'
OUTPUT_MODEL_PATH = '../models/'
OUTPUT_MUSIC_PATH = '../music/'
OUTPUT_MUSIC_FORMAT = '.abc'

# tokenizer parameters
OOV_TOKEN = '<oov>'

# padding parameters
TRUNC = 'post'
PADDING = 'pre'
MAXLEN = 10
INP_LEN = 10

# embedding parameters
EMB_DIM = 16

# model compiling paramenters
OPTIMIZER = 'adam'
LOSS = 'sparse_categorical_crossentropy'
METRICS = ['accuracy']

In [None]:
# load data
filenames = os.listdir(INPUT_DATA_PATH)
corpus = ''
for file in filenames:
    with open(INPUT_DATA_PATH+file, 'r') as f:
        corpus += f.read()+'\n'
vocab = sorted(set(corpus))
VOCAB_SIZE = len(vocab)
print(VOCAB_SIZE)

In [None]:
# create dictionaries to change char to number and versa
char2idx = {c:i for i, c in enumerate(vocab)}
idx2char = {i:c for c, i in char2idx.items()}

In [None]:
# create array of numbers from corpus
n = len(corpus)
data = np.array([[car2idx[c] for c in corpus[i:i+INP_LEN]] for i in range(n-INP_LEN)])

In [None]:
# define input and output arrays
x = data[:, :-1]
y = data[:, -1]
print(x.shape)
print(y.shape)

In [None]:
# define model
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(VOCAB_SIZE, EMB_DIM),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(1024, return_sequences=True)),
    tf,keras.layers.Bidirectional(tf.keras.layers.LSTM(512)),
    tf.keras.layers.Dense(128, 'relu'),
    tf.keras.layers.Dense(VOCAB_SIZE, 'softmax')
])

In [None]:
# compile model
model.compile(optimizer=OPTIMIZER, loss=LOSS, metrics=METRICS)
model.summary()

In [None]:
# define callbacks
class myCallback(tf.keras.callbacks.Callback):
  def on_epoch_end(self, epoch, logs={}):
    if(logs.get('accuracy')>=0.8):
      print("Accuracy reached 80%. Stopping learning!")
      self.model.stop_training=True
callback = myCallback()

In [None]:
# fit model
model.fit(x, y, epochs=100, verbose=1, validation_split=0.1, batch_size=256, callbacks=[callback])

In [None]:
# save model 
output_file_name = time.strftime("%Y%m%d_%H%M%S") # TODO: change it to parameters of model instead of timestamp
model.save(OUTPUT_MODEL_PATH + output_file_name + '.h5')

In [None]:
# predict on new data

In [1]:
# create new music and save to file
MUSIC_LENGTH = 1000
seed = x[0]
music_string = ''
for c in corpus:
    if (c == 'K'):
        music_string+="K:\n"
        break
    else:
        music_string+=c

for i in range(MUSIC_LENGTH):
    n = model.predict(np.array([seed]))
    n = np.argmax(n)
    new_note = idx2char[n]
    music_string += new_note
    seed = np.append(seed,n)[1:]

output_music_file = time.strftime("%Y%m%d_%H%M%S") + OUTPUT_MUSIC_FORMAT
with open(OUTPUT_MUSIC_PATH+output_music_file, 'w+') as f:
    f.write(music_string)

NameError: name 'x' is not defined

In [None]:
# covert music from string to audio and play