In [0]:
import glob
import pickle
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from music21 import converter, instrument, note, chord, stream
from tensorflow.contrib import rnn

In [0]:
#Preprocessing
def get_notes():
    """ Get all the notes and chords from the midi files """
    notes = []

    for file in glob.glob("Pokemon MIDIs/*.mid"):
        midi = converter.parse(file)

        print("Parsing %s" % file)

        notes_to_parse = None

        try: # file has instrument parts
            s2 = instrument.partitionByInstrument(midi)
            notes_to_parse = s2.parts[0].recurse() 
        except: # file has notes in a flat structure
            notes_to_parse = midi.flat.notes
            
        for element in notes_to_parse:
            if isinstance(element, note.Note):
                notes.append(str(element.pitch))
            elif isinstance(element, chord.Chord):
                notes.append('.'.join(str(n) for n in element.normalOrder))

    return notes


def to_categorical(y, num_classes=None, dtype='float32'):
    y = np.array(y, dtype='int')
    input_shape = y.shape
    if input_shape and input_shape[-1] == 1 and len(input_shape) > 1:
        input_shape = tuple(input_shape[:-1])
    y = y.ravel()
    if not num_classes:
        num_classes = np.max(y) + 1
    n = y.shape[0]
    categorical = np.zeros((n, num_classes), dtype=dtype)
    categorical[np.arange(n), y] = 1
    output_shape = input_shape + (num_classes,)
    categorical = np.reshape(categorical, output_shape)
    return categorical


def prepare_sequences(notes, n_vocab):
    """ Prepare the sequences used by the Neural Network """
    sequence_length = 100

    # get all pitch names
    pitchnames = sorted(set(item for item in notes))

     # create a dictionary to map pitches to integers
    note_to_int = dict((note, number) for number, note in enumerate(pitchnames))

    network_input = []
    network_output = []

    # create input sequences and the corresponding outputs
    for i in range(0, len(notes) - sequence_length, 1):
        sequence_in = notes[i:i + sequence_length]
        sequence_out = notes[i + sequence_length]
        network_input.append([note_to_int[char] for char in sequence_in])
        network_output.append(note_to_int[sequence_out])

    # reshape the input into a format compatible with LSTM layers
    n_patterns = len(network_input)
    network_input = np.reshape(network_input, (n_patterns, sequence_length, 1))
    
    # normalize input between 0 and 1
    network_input = network_input / float(n_vocab)
    print(len(network_output))
    
    print(type(network_output))
    #network_output = np_utils.to_categorical(network_output)
    network_output = to_categorical(network_output)

    with open("storeinput.data","wb") as ip:
        pickle.dump(network_input,ip)
    with open("storeoutput.data","wb") as op:
        pickle.dump(network_output,op)
        
    return (network_input, network_output)

In [0]:
#First time preprocessing and storing

notes = get_notes()
n_vocab = len(set(notes))
network_input, network_output = prepare_sequences(notes, n_vocab)


Parsing Pokemon MIDIs/Pokemon RedBlueYellow - Viridian City.mid
Parsing Pokemon MIDIs/Pokemon XY - FIN.mid
Parsing Pokemon MIDIs/Pokemon - Pallet Town.mid
Parsing Pokemon MIDIs/Pokemon - Oracion.mid
Parsing Pokemon MIDIs/Pokemon RedBlueYellow - Game Corner Theme.mid
Parsing Pokemon MIDIs/Pokemon DiamondPearlPlatinum - Stark Mountain.mid
Parsing Pokemon MIDIs/Pokemon Mystery Dungeon BlueRed Rescue Team - Rescue Team Base.mid
Parsing Pokemon MIDIs/Pokemon Mystery Dungeon Explorers of TimeDarknessSky - Goodnight.mid
Parsing Pokemon MIDIs/Pokemon Black _ White - Village Bridge.mid
Parsing Pokemon MIDIs/Pokemon Mystery Dungeon Explorers of TimeDarknessSky - Temporal Tower.mid
Parsing Pokemon MIDIs/Pokemon RubySapphireEmerald - Pokemart.mid
Parsing Pokemon MIDIs/Pokemon XD Gale of Darkness - The Hexagon Bros.mid
Parsing Pokemon MIDIs/Pokemon BlackWhite - Ns Castle.mid
Parsing Pokemon MIDIs/Pokemon Ranger - Ranger Base.mid
Parsing Pokemon MIDIs/Pokemon RubySapphireEmerald - Sootopolis City.mi

In [0]:
#Reading preprocessed input and output from their respective files
network_input=[]
network_output=[]
with open("storeinput.data","rb") as ip:
    network_input=pickle.load(ip)
with open("storeoutput.data","rb") as op:
    network_output=pickle.load(op)
print(network_input.shape[1],network_input.shape[2])
print(network_output.shape[1])

100 1
497


In [0]:
def weight_variable(shape):
    """
    Create a weight variable with appropriate initialization
    :param name: weight name
    :param shape: weight shape
    :return: initialized weight variable
    """
    initer = tf.truncated_normal_initializer(stddev=0.01)
    return tf.get_variable('W',
                           dtype=tf.float32,
                           shape=shape,
                           initializer=initer)


def bias_variable(shape):
    """
    Create a bias variable with appropriate initialization
    :param name: bias variable name
    :param shape: bias variable shape
    :return: initialized bias variable
    """
    initial = tf.constant(0., shape=shape, dtype=tf.float32)
    return tf.get_variable('b',
                           dtype=tf.float32,
                           initializer=initial)
    

   
def BiRNN(x, weights,biases,timesteps, num_hidden):
    # Prepare data shape to match `rnn` function requirements
    # Current data input shape: (batch_size, timesteps, n_input)
    # Required shape: 'timesteps' tensors list of shape (batch_size, num_input)

    # Unstack to get a list of 'timesteps' tensors of shape (batch_size, num_input)
    x = tf.unstack(x, timesteps, 1)
    drop_prob=0.2
    # Define lstm cells with tensorflow
    # Forward direction cell
    lstm_fw_cell = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0)
    # Backward direction cell
    lstm_bw_cell = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0)

    # Get BiRNN cell output
    outputs1,_,_ = rnn.static_bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x,
                                                 dtype=tf.float32,scope='BLSTM_1')
    
    #Second layer
    lstm_fw_cell2 = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0)
    lstm_fw_cell2 = tf.nn.rnn_cell.DropoutWrapper(lstm_fw_cell2,output_keep_prob=1-drop_prob,input_keep_prob=1-drop_prob)
    lstm_bw_cell2 = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0)
    lstm_bw_cell2 = tf.nn.rnn_cell.DropoutWrapper(lstm_bw_cell2,output_keep_prob=1-drop_prob,input_keep_prob=1-drop_prob)
    outputs2,_,_ = rnn.static_bidirectional_rnn(lstm_fw_cell2, lstm_bw_cell2, outputs1,
                                                 dtype=tf.float32,scope='BLSTM_2')
    

    #Third layer
    lstm_fw_cell3 = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0)
    lstm_bw_cell3 = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0)
    outputs3,_,_ = rnn.static_bidirectional_rnn(lstm_fw_cell3, lstm_bw_cell3, outputs2,
                                                 dtype=tf.float32,scope='BLSTM_3')
    
    # Linear activation, using rnn inner loop last output
    return tf.matmul(outputs3[-1], weights) + biases

In [0]:
# Data Dimension
num_input = 1         # Each input is a single note
timesteps = 100          # Timesteps
n_classes = 497        # Output is a one-hot vector with 497 classes

#Hyperparameters
learning_rate = 0.001 # The optimization initial learning rate
epochs = 15           # Total number of training epochs
batch_size = 100      # Training batch size
display_freq = 100    # Frequency of displaying the training results

#Network parameter
num_hidden_units1 = 512  # Number of hidden units of the first RNN
num_hidden_units2 = 512  # Number of hidden units of the second RNN
num_hidden_units3 = 512  # Number of hidden units of the third RNN

In [0]:
# Placeholders for inputs (x) and outputs(y)
x = tf.placeholder(tf.float32, shape=(None, timesteps, num_input), name='X')
y = tf.placeholder(tf.float32, shape=(None, n_classes), name='Y')

# create weight matrix initialized randomely from N~(0, 0.01)
W = weight_variable(shape=[2*num_hidden_units1, n_classes])


# create bias vector initialized as zero
b = bias_variable(shape=[n_classes])


output_logits = BiRNN(x, W, b,timesteps, num_hidden_units1)
y_pred = tf.nn.softmax(output_logits)

Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Please use `keras.layers.Bidirectional(keras.layers.RNN(cell, unroll=True))`, which is equivalent to this API
Instructions for updating:
Please use `keras.layers.RNN(cell, unroll=True)`, which is equivalent to this API
Instructions for updating:
Please use `layer.add_weight` method instead.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


In [0]:
# Model predictions
cls_prediction = tf.argmax(output_logits, axis=1, name='predictions')

# Define the loss function, optimizer, and accuracy
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=output_logits), name='loss')
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, name='Adam-op').minimize(loss)
correct_prediction = tf.equal(tf.argmax(output_logits, 1), tf.argmax(y, 1), name='correct_pred')
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy')

# Creating the op for initializing all variables
init = tf.global_variables_initializer()

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.



In [0]:
def randomize(x, y):
    """ Randomizes the order of data samples and their corresponding labels"""
    permutation = np.random.permutation(y.shape[0])
    shuffled_x = x[permutation, :]
    shuffled_y = y[permutation]
    return shuffled_x, shuffled_y

def get_next_batch(x, y, start, end):
    x_batch = x[start:end]
    y_batch = y[start:end]
    return x_batch, y_batch

x_train, y_train = network_input, network_output
x_train, y_train = randomize(x_train, y_train)
print("x_train shape:",x_train.shape)
print("y_train shape:",y_train.shape)


start=0
end=batch_size
x_batch, y_batch = get_next_batch(x_train, y_train, start, end)
print("x_batch shape:",x_batch.shape,type(x_batch[0][0][0]))
print("y_batch shape:",y_batch.shape,type(y_batch[0][0]))

x_batch = x_batch.reshape((batch_size, timesteps, num_input))
print("x_batch after adjusting shape:",x_batch.shape)

feed_dict_batch = {x: x_batch, y: y_batch}
print(feed_dict_batch)

x_train shape: (163585, 100, 1)
y_train shape: (163585, 497)
x_batch shape: (100, 100, 1) <class 'numpy.float64'>
y_batch shape: (100, 497) <class 'numpy.float32'>
x_batch after adjusting shape: (100, 100, 1)
{<tf.Tensor 'X:0' shape=(?, 100, 1) dtype=float32>: array([[[0.63581489],
        [0.40442656],
        [0.63581489],
        ...,
        [0.89738431],
        [0.92555332],
        [0.63782696]],

       [[0.89537223],
        [0.96579477],
        [0.55533199],
        ...,
        [0.31991952],
        [0.9054326 ],
        [0.84708249]],

       [[0.06841046],
        [0.96579477],
        [0.92354125],
        ...,
        [0.55533199],
        [0.63581489],
        [0.88933602]],

       ...,

       [[0.83903421],
        [0.97987928],
        [0.33802817],
        ...,
        [0.9637827 ],
        [0.89336016],
        [0.95975855]],

       [[0.90342052],
        [0.27967807],
        [0.        ],
        ...,
        [0.62575453],
        [0.27967807],
        [0.2796

In [0]:
#Training 

x_train,y_train=network_input,network_output
sess = tf.InteractiveSession()
sess.run(init)
global_step = 0
# Number of training iterations in each epoch
num_tr_iter = int(len(y_train) / batch_size)
for epoch in range(epochs):
    print('Training epoch: {}'.format(epoch + 1))
    x_train, y_train = randomize(x_train, y_train)
    for iteration in range(num_tr_iter):
        global_step += 1
        start = iteration * batch_size
        end = (iteration + 1) * batch_size
        x_batch, y_batch = get_next_batch(x_train, y_train, start, end)
        x_batch = x_batch.reshape((batch_size, timesteps, num_input))
        # Run optimization op (backprop)
        feed_dict_batch = {x: x_batch, y: y_batch}
        sess.run(optimizer, feed_dict=feed_dict_batch)

        if iteration % display_freq == 0:
            # Calculate and display the batch loss and accuracy
            loss_batch, acc_batch = sess.run([loss, accuracy],
                                             feed_dict=feed_dict_batch)

            print("iter {0:3d}:\t Loss={1:.2f},\tTraining Accuracy={2:.01%}".
                  format(iteration, loss_batch, acc_batch))

    # Run validation after every epoch
"""
    feed_dict_valid = {x: x_test[:1000].reshape((-1, timesteps, num_input)), y: y_test[:1000]}
    loss_valid, acc_valid = sess.run([loss, accuracy], feed_dict=feed_dict_valid)
    print('---------------------------------------------------------')
    print("Epoch: {0}, validation loss: {1:.2f}, validation accuracy: {2:.01%}".
          format(epoch + 1, loss_valid, acc_valid))
    print('---------------------------------------------------------')
"""

Training epoch: 1
iter   0:	 Loss=6.17,	Training Accuracy=6.0%
iter 100:	 Loss=4.99,	Training Accuracy=1.0%
iter 200:	 Loss=4.83,	Training Accuracy=1.0%
iter 300:	 Loss=4.88,	Training Accuracy=2.0%
iter 400:	 Loss=4.72,	Training Accuracy=2.0%
iter 500:	 Loss=5.00,	Training Accuracy=2.0%
iter 600:	 Loss=4.94,	Training Accuracy=0.0%
iter 700:	 Loss=4.73,	Training Accuracy=0.0%
iter 800:	 Loss=4.89,	Training Accuracy=2.0%
iter 900:	 Loss=4.78,	Training Accuracy=2.0%
iter 1000:	 Loss=4.76,	Training Accuracy=2.0%
iter 1100:	 Loss=4.78,	Training Accuracy=3.0%
iter 1200:	 Loss=4.90,	Training Accuracy=1.0%
iter 1300:	 Loss=4.81,	Training Accuracy=1.0%
iter 1400:	 Loss=4.71,	Training Accuracy=6.0%
iter 1500:	 Loss=4.79,	Training Accuracy=2.0%
iter 1600:	 Loss=4.80,	Training Accuracy=0.0%
Training epoch: 2
iter   0:	 Loss=4.76,	Training Accuracy=2.0%
iter 100:	 Loss=4.59,	Training Accuracy=4.0%
iter 200:	 Loss=4.69,	Training Accuracy=3.0%
iter 300:	 Loss=4.74,	Training Accuracy=3.0%
iter 400:	 L

'\n    feed_dict_valid = {x: x_test[:1000].reshape((-1, timesteps, num_input)), y: y_test[:1000]}\n    loss_valid, acc_valid = sess.run([loss, accuracy], feed_dict=feed_dict_valid)\n    print(\'---------------------------------------------------------\')\n    print("Epoch: {0}, validation loss: {1:.2f}, validation accuracy: {2:.01%}".\n          format(epoch + 1, loss_valid, acc_valid))\n    print(\'---------------------------------------------------------\')\n'

In [0]:
#Saving model
saver = tf.train.Saver()
saver.save(sess, 'Bidirectionallstm')

'Bidirectionallstm'

In [0]:
#Loading model
saver2=tf.train.import_meta_graph('Bidirectionallstm.meta')
saver2.restore(sess,tf.train.latest_checkpoint('./'))

INFO:tensorflow:Restoring parameters from ./Bidirectionallstm


In [0]:
#Generation
notes = get_notes()

#y_pred=tf.Graph.get_tensor_by_name('y_pred:0')

pitchnames = sorted(set(item for item in notes))
int_to_note = dict((number, note) for number, note in enumerate(pitchnames))

def generate_seed():
  start_point = np.random.randint(0, len(network_input)-1)
  pattern = network_input[start_point]
  prediction_output=[]
  n_vocab = len(set(notes))
  for i in range(500):
    prediction_input = np.reshape(pattern, (1, len(pattern), 1))
    prediction_input = prediction_input / float(n_vocab)
    prediction = sess.run(y_pred,feed_dict={x:prediction_input})
    index = np.argmax(prediction)
    result = int_to_note[index]
    prediction_output.append(result)
          
    pattern = np.append(pattern,index)
    pattern = pattern[1:len(pattern)]
  return prediction_output




Parsing Pokemon MIDIs/Pokemon RedBlueYellow - Viridian City.mid
Parsing Pokemon MIDIs/Pokemon XY - FIN.mid
Parsing Pokemon MIDIs/Pokemon - Pallet Town.mid
Parsing Pokemon MIDIs/Pokemon - Oracion.mid
Parsing Pokemon MIDIs/Pokemon RedBlueYellow - Game Corner Theme.mid
Parsing Pokemon MIDIs/Pokemon DiamondPearlPlatinum - Stark Mountain.mid
Parsing Pokemon MIDIs/Pokemon Mystery Dungeon BlueRed Rescue Team - Rescue Team Base.mid
Parsing Pokemon MIDIs/Pokemon Mystery Dungeon Explorers of TimeDarknessSky - Goodnight.mid
Parsing Pokemon MIDIs/Pokemon Black _ White - Village Bridge.mid
Parsing Pokemon MIDIs/Pokemon Mystery Dungeon Explorers of TimeDarknessSky - Temporal Tower.mid
Parsing Pokemon MIDIs/Pokemon RubySapphireEmerald - Pokemart.mid
Parsing Pokemon MIDIs/Pokemon XD Gale of Darkness - The Hexagon Bros.mid
Parsing Pokemon MIDIs/Pokemon BlackWhite - Ns Castle.mid
Parsing Pokemon MIDIs/Pokemon Ranger - Ranger Base.mid
Parsing Pokemon MIDIs/Pokemon RubySapphireEmerald - Sootopolis City.mi

In [0]:
#Converting output to music files
def create_midi(prediction_output, filename):
    """ convert the output from the prediction to notes and create a midi file
        from the notes """
    offset = 0
    output_notes = []

    # create note and chord objects based on the values generated by the model
    for pattern in prediction_output:
        # pattern is a chord
        if ('.' in pattern) or pattern.isdigit():
            notes_in_chord = pattern.split('.')
            notes = []
            for current_note in notes_in_chord:
                new_note = note.Note(int(current_note))
                new_note.storedInstrument = instrument.Piano()
                notes.append(new_note)
            new_chord = chord.Chord(notes)
            new_chord.offset = offset
            output_notes.append(new_chord)
        # pattern is a note
        else:
            new_note = note.Note(pattern)
            new_note.offset = offset
            new_note.storedInstrument = instrument.Piano()
            output_notes.append(new_note)
        # increase offset each iteration so that notes do not stack
        offset += 0.5

    midi_stream = stream.Stream(output_notes)
    midi_stream.write('midi', fp='{}.mid'.format(filename))

create_midi(generate_seed(),'Genmusic-1')
create_midi(generate_seed(),'Genmusic-2')
create_midi(generate_seed(),'Genmusic-3')