In [None]:
import pretty_midi
import numpy as np
import matplotlib.pyplot as plt
from keras.models import Model
from keras.layers import Dense, Input, Lambda, Concatenate, LSTM

from keras import backend as K

import tensorflow as tf
#import tensorflow_probability as tfp # for tf version 2.0.0, tfp version 0.8 is needed 
import numpy as np

import matplotlib.pyplot as plt
import csv
from sys import stdout
import random

# My code
from loading import *
from models import *

%matplotlib inline

In [None]:
print("TensorFlow version: {}".format(tf.__version__))
print("GPU is available: {}".format(tf.test.is_gpu_available()))

# Load data

In [3]:
file = 'maestro-v2.0.0/maestro-v2.0.0.csv'

In [4]:
# Call data class
data = DataObject(file, what_type = 'train', train_sec = 15, test_sec = 5, fs = 20, window_size = 15)

# Create a batch class which we will iterate over
train_batch = Batch(data, batch_size = 128, songs_per_batch = 4)

# Define model

In [5]:
curr_batch = train_batch.data
model = simple_model(curr_batch)
model.compile(loss = tf.keras.losses.BinaryCrossentropy(), optimizer = 'adam')

In [6]:
model.summary()

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Input_layer_context (InputLa (128, 59, 15, 88)         0         
_________________________________________________________________
Reshape_layer_1 (Lambda)     (7552, 15, 88)            0         
_________________________________________________________________
Encoder_lstm_1 (LSTM)        (7552, 15, 512)           1230848   
_________________________________________________________________
Encoder_lstm_2 (LSTM)        (7552, 512)               2099200   
_________________________________________________________________
Encoder_dense_1 (Dense)      (7552, 512)               262656    
_________________________________________________________________
Encoder_dense_2 (Dense)      (7552, 256)               131328    
_________________________________________________________________
Encoder_dense_3 (Dense)      (7552, 10)                2570

In [7]:
def generate(train_batch):
    """a generator for batches, so model.fit_generator can be used. """
    while True:
        new_batch = next(train_batch)
        yield ([new_batch.context, new_batch.target], new_batch.target)

In [23]:
history = model.fit_generator(
                    generate(train_batch),
                    steps_per_epoch=1000,
                    epochs=2)

Epoch 1/2
Epoch 2/2


In [97]:
history.history['loss']

[0.03718636266607791, 0.015437088180333377]

In [24]:
output = model.predict([curr_batch.context, curr_batch.target], steps = 1)

# Experiment space

In [28]:
curr_batch.target[0,6,:]

<tf.Tensor: id=2696599, shape=(88,), dtype=float32, numpy=
array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 1., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0.], dtype=float32)>

In [29]:
output[0,7,:]

array([5.70243901e-06, 5.52789024e-05, 1.79611234e-04, 1.12407703e-04,
       3.00861080e-04, 4.02287260e-04, 9.27054934e-05, 2.36979425e-02,
       8.84433030e-05, 7.80734190e-05, 4.11175337e-04, 8.23952782e-04,
       1.80022523e-03, 6.96475734e-04, 1.19857641e-03, 7.39105931e-03,
       1.12812640e-03, 3.75009561e-03, 1.59647316e-03, 9.78283465e-01,
       4.01845988e-04, 4.36160044e-04, 5.41753648e-03, 9.55974101e-04,
       3.11712874e-03, 1.15243101e-03, 3.14946496e-03, 5.08662593e-03,
       3.35655757e-03, 3.84370866e-03, 8.07309407e-04, 6.04331121e-02,
       1.02264108e-03, 8.04050942e-04, 2.84986906e-02, 3.01948981e-03,
       1.45442309e-02, 8.04961100e-03, 7.30445096e-03, 9.62767005e-01,
       3.77063872e-03, 5.24997246e-03, 3.11924564e-03, 9.95260477e-01,
       7.74603279e-04, 9.23627755e-04, 9.86645460e-01, 3.70945525e-03,
       1.14304014e-02, 6.14788989e-03, 1.23634832e-02, 7.11906888e-03,
       5.62151009e-03, 1.68517313e-03, 6.74427662e-04, 2.42075231e-02,
      

# Generate midi and see how it looks

In [81]:
igor.shape

TensorShape([100, 88])

In [89]:
igor = curr_batch.target[0,:,:]
np.transpose(igor).shape

(88, 100)

In [90]:
def piano_roll_to_pretty_midi(piano_roll, fs=100, program=0):
    
    piano_roll = np.pad(piano_roll, [(0, 0), (20,20)], 'constant', constant_values=0)
    
    piano_roll = np.transpose(piano_roll)
    
    notes, frames = piano_roll.shape
    pm = pretty_midi.PrettyMIDI()
    instrument = pretty_midi.Instrument(program=program)

    # pad 1 column of zeros so we can acknowledge inital and ending events
    piano_roll = np.pad(piano_roll, [(0, 0), (1, 1)], 'constant')

    # use changes in velocities to find note on / note off events
    velocity_changes = np.nonzero(np.diff(piano_roll).T)

    # keep track on velocities and note on times
    prev_velocities = np.zeros(notes, dtype=int)
    note_on_time = np.zeros(notes)

    for time, note in zip(*velocity_changes):
        # use time + 1 because of padding above
        velocity = piano_roll[note, time + 1]
        time = time / fs
        if velocity > 0:
            if prev_velocities[note] == 0:
                note_on_time[note] = time
                prev_velocities[note] = velocity
        else:
            pm_note = pretty_midi.Note(
                velocity=prev_velocities[note],
                pitch=note,
                start=note_on_time[note],
                end=time)
            instrument.notes.append(pm_note)
            prev_velocities[note] = 0
    pm.instruments.append(instrument)
    return pm

def extract_piano_roll(predicted_pr, threshold):
    
    predicted_pr[predicted_pr >= threshold] = 1
    predicted_pr[predicted_pr < threshold] = 0
    
    return predicted_pr

In [91]:
my_midi = piano_roll_to_pretty_midi(curr_batch.target[0,:,:], fs = 20)

In [92]:
my_midi.write('example_target.mid')

In [93]:
predicted_target = extract_piano_roll(output[0,:,:], threshold = 0.5)
predicted_midi = piano_roll_to_pretty_midi(predicted_target, fs = 20)

In [94]:
predicted_midi.write('example_predicted.mid')

In [71]:
idx = 10
print(predicted_target[idx,:])
print(curr_batch.target[0,idx,:])

[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0. 1. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
tf.Tensor(
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0. 1. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.], shape=(88,), dtype=float32)


In [72]:
curr_batch.link[0]

'2006/MIDI-Unprocessed_01_R1_2006_01-09_ORIG_MID--AUDIO_01_R1_2006_01_Track01_wav.midi'

In [76]:
midi_data = pretty_midi.PrettyMIDI('maestro-v2.0.0/'+'2006/MIDI-Unprocessed_01_R1_2006_01-09_ORIG_MID--AUDIO_01_R1_2006_01_Track01_wav.midi')

In [77]:
midi_data.estimate_tempo()

194.1653240798144

In [None]:
target_shape  = curr_batch.target.shape

In [None]:
print(target_shape)

In [None]:
igor = tf.zeros((128, 10))

In [None]:
new_igor=tf.tile(tf.expand_dims(igor, 1), [1,target_shape[1],1])

In [None]:
tf.concat([curr_batch.target, new_igor], axis = 2)

In [None]:
curr_batch.target[0,0,:]

In [None]:
output[0,0,:]

In [None]:
igor = tf.convert_to_tensor([[1,1,1,1], [2,2,2,2], [3,3,3,3]])

In [None]:
igor.shape

In [None]:
tf.reshape(igor, [4,3])

# Encoder

In [None]:
lstm = LSTM(100)

In [None]:
output = lstm(curr_batch.context)

In [None]:
output.shape

In [None]:
res_output = K.mean(tf.reshape(output, [128, 59, 100]), axis = -2)

In [None]:
res_output.shape

In [None]:
tile_output = tf.tile(tf.expand_dims(res_output, 1), [1,150,1])

In [None]:
tile_output.shape

In [None]:
K.mean(res_output, axis = -2).shape

In [None]:
input_context_ = Input((None, 2), name="Input_layer_contxt_xy") # [num_pts, 2]
input_target_x = Input((None, 1), name="Input_layer_target_x")  # [num_pts, 1]

encoder = input_context_xy

In [None]:
curr_batch.context.shape