In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="-1"  

In [None]:
import pretty_midi
import midi
import numpy as np
import matplotlib.pyplot as plt
from keras.models import Model
from keras.layers import Dense, Input, Lambda, Concatenate, LSTM

from keras.optimizers import Adam

from keras import backend as K

import tensorflow as tf
#import tensorflow_probability as tfp # for tf version 2.0.0, tfp version 0.8 is needed 
import numpy as np

import matplotlib.pyplot as plt
import csv
from sys import stdout
import random

# My code
from loading import *
from models import *
from data import *
from midi_to_statematrix import *

%matplotlib inline

In [3]:
print("TensorFlow version: {}".format(tf.__version__))
print("GPU is available: {}".format(tf.test.is_gpu_available()))

TensorFlow version: 2.0.0
GPU is available: False


# Load data

In [4]:
file = 'maestro-v2.0.0/maestro-v2.0.0.csv'

In [5]:
# Call data class
data = DataObject(file, what_type = 'train', train_tms = 100, test_tms = 100, fs = 20, window_size = 15)

# Create a batch class which we will iterate over
train_batch = Batch(data, batch_size = 32, songs_per_batch = 4)

In [6]:
curr_batch = train_batch.data
curr_batch.featurize(use_biaxial = False)

# Define model

In [None]:
def my_binary_loss_seq(y_true, y_pred):
    y_true = tf.reshape(y_true, [-1, 78])
    y_pred = tf.reshape(y_pred, [-1, 78])
    
    bce = tf.keras.losses.BinaryCrossentropy()
    
    return bce(y_true, y_pred)

In [7]:
model = biaxial_target_model_oneseq(curr_batch)
model.compile(loss = tf.keras.losses.BinaryCrossentropy(), optimizer = Adam(learning_rate=0.0005))

In [8]:
curr_batch.target_train.shape

(32, 85, 78, 3)

In [9]:
model.summary()

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Input_layer_context (InputLa (32, 2, 100, 78)          0         
_________________________________________________________________
Encoder_layer_1 (Lambda)     (64, 100, 78)             0         
_________________________________________________________________
Encoder_lstm_1 (LSTM)        (64, 512)                 1210368   
_________________________________________________________________
Encoder_dense_1 (Dense)      (64, 512)                 262656    
_________________________________________________________________
Encoder_output (Dense)       (64, 10)                  5130      
_________________________________________________________________
Encoder_concat_representatio (32, 20)                  0         
_________________________________________________________________
Encoder_output_reshape (Lamb (32, 85, 78, 20)          0   

In [None]:
def inputize(curr_batch):
    
    train_target = curr_batch.target
    train_target = tf.roll(train_target, shift=1, axis=1)
    train_target = train_target.numpy()
    train_target[:,0,:] = curr_batch.context[:,-1,-1,:]
    
    return [curr_batch.context, train_target]

def generate(train_batch):
    """a generator for batches, so model.fit_generator can be used. """
    while True:
        new_batch    = next(train_batch)
        new_batch.featurize(use_biaxial = False)
        yield ([tf.convert_to_tensor(new_batch.context, dtype = tf.float32), 
                tf.convert_to_tensor(new_batch.target_train, dtype = tf.float32)], 
               tf.convert_to_tensor(new_batch.target_pred, dtype = tf.float32))

In [10]:
out = model.predict([tf.convert_to_tensor(curr_batch.context, dtype = tf.float32), 
                tf.convert_to_tensor(curr_batch.target_train, dtype = tf.float32)], steps = 1)

In [14]:
curr_batch.target_pred

array([[[0, 0],
        [0, 0],
        [0, 0],
        ...,
        [0, 0],
        [0, 0],
        [0, 0]],

       [[0, 0],
        [0, 0],
        [0, 0],
        ...,
        [0, 0],
        [0, 0],
        [0, 0]],

       [[0, 0],
        [0, 0],
        [0, 0],
        ...,
        [0, 0],
        [0, 0],
        [0, 0]],

       ...,

       [[0, 0],
        [0, 0],
        [0, 0],
        ...,
        [0, 0],
        [0, 0],
        [0, 0]],

       [[0, 0],
        [0, 0],
        [0, 0],
        ...,
        [0, 0],
        [0, 0],
        [0, 0]],

       [[0, 0],
        [0, 0],
        [0, 0],
        ...,
        [0, 0],
        [0, 0],
        [0, 0]]])

In [12]:
out

array([[0.2810245 , 0.27203536, 0.28445378, ..., 0.29772553, 0.2977431 ,
        0.29775432],
       [0.2810251 , 0.27203637, 0.28445524, ..., 0.29772615, 0.29774356,
        0.29775462],
       [0.2810286 , 0.27204183, 0.28446218, ..., 0.297738  , 0.2977559 ,
        0.29776746],
       ...,
       [0.28102383, 0.27203327, 0.2844499 , ..., 0.29771766, 0.29773495,
        0.29774594],
       [0.28102252, 0.2720318 , 0.28444922, ..., 0.29773825, 0.2977559 ,
        0.29776722],
       [0.28102183, 0.27202824, 0.28444186, ..., 0.29774076, 0.2977584 ,
        0.29776973]], dtype=float32)

In [None]:
history = model.fit_generator(
                    generate(train_batch),
                    steps_per_epoch=1024,
                    epochs=5)
model.save_weights('model_biaxial_oneseq_nofeat.h5')

In [None]:
model.save_weights('model_rolled_target.h5')

In [None]:
history.history['loss']

In [None]:
out = model.predict([tf.convert_to_tensor(curr_batch.context, dtype = tf.float32), 
                tf.convert_to_tensor(curr_batch.target_train, dtype = tf.float32)], steps = 1)

In [None]:
curr_batch.target_pred[57,2,:]

In [None]:
out[57,5,:]

In [None]:
out[:,:,:].max()

In [None]:
curr_batch.target_train.shape

In [None]:
def get_decoder_simple(model):

    input_shape = model.get_layer("lambda_2").output.shape

    input_embedding = Input(batch_shape = 
                          (input_shape[0],  # batch_size
                           None,            # timesteps ()
                           input_shape[2]),  # note_size
                          name="Input_layer_embedding")

    decoder, _, _ = LSTM(units = 512, 
                      return_sequences = True,
                      return_state = True,
                      activation = 'tanh',
                      name = 'Decoder_lstm_1')(input_embedding)

    decoder = LSTM(units = 88, 
                      activation = 'sigmoid',
                      name = 'Decoder_lstm_2')(decoder)

    new_model = Model(input_embedding, decoder)
    
    names = {layer.name:idx for idx, layer in enumerate(model.layers)}
    weights = model.get_weights()

    for idx, layer in enumerate(new_model.layers):
        if layer.name in names.keys():
            new_model.layers[idx].set_weights(weights[names[layer.name]])

    return new_model

In [None]:
#decoder = get_decoder_simple(model)
#decoder.summary()

#weights_list = model.get_weights()

In [None]:
out = model([curr_batch.context, curr_batch.target_train])

# Create music! (inefficient version)

In [None]:
def create_music(model, input_context):
    
    

# Experiment space

## How to change target so that it gives the first 50 non silent timesteps

In [None]:
change_target = curr_batch.target.numpy()

In [None]:
igor = np.nonzero(curr_batch.target)

In [None]:
first_index = np.nonzero(np.r_[1, np.diff(igor[0])[:-1]])[0]

In [None]:
first_note_index = igor[1][first_index]

In [None]:
for batch, idx in enumerate(first_note_index):
    change_target[batch,(idx+50):,:] = 0

# Generate midi and see how it looks

In [None]:
def piano_roll_to_pretty_midi(piano_roll, fs=100, program=0):
    
    piano_roll = np.pad(piano_roll, [(0, 0), (20,20)], 'constant', constant_values=0)
    
    piano_roll = np.transpose(piano_roll)
    
    notes, frames = piano_roll.shape
    pm = pretty_midi.PrettyMIDI()
    instrument = pretty_midi.Instrument(program=program)

    # pad 1 column of zeros so we can acknowledge inital and ending events
    piano_roll = np.pad(piano_roll, [(0, 0), (1, 1)], 'constant')

    # use changes in velocities to find note on / note off events
    velocity_changes = np.nonzero(np.diff(piano_roll).T)

    # keep track on velocities and note on times
    prev_velocities = np.zeros(notes, dtype=int)
    note_on_time = np.zeros(notes)

    for time, note in zip(*velocity_changes):
        # use time + 1 because of padding above
        velocity = piano_roll[note, time + 1]
        time = time / fs
        if velocity > 0:
            if prev_velocities[note] == 0:
                note_on_time[note] = time
                prev_velocities[note] = velocity
        else:
            pm_note = pretty_midi.Note(
                velocity=prev_velocities[note],
                pitch=note,
                start=note_on_time[note],
                end=time)
            instrument.notes.append(pm_note)
            prev_velocities[note] = 0
    pm.instruments.append(instrument)
    return pm

def extract_piano_roll(predicted_pr, threshold):
    
    predicted_pr[predicted_pr >= threshold] = 1
    predicted_pr[predicted_pr < threshold] = 0
    
    return predicted_pr

In [None]:
my_midi = piano_roll_to_pretty_midi(curr_batch.target[0,:,:], fs = 50)

In [None]:
my_midi.write('example_target.mid')

In [None]:
predicted_target = extract_piano_roll(output[0,:,:], threshold = 0.2)
predicted_midi = piano_roll_to_pretty_midi(predicted_target, fs = 20)

In [None]:
predicted_midi.write('example_predicted.mid')

In [None]:
idx = 10
print(predicted_target[idx,:])
print(curr_batch.target[0,idx,:])

In [None]:
curr_batch.link[0]

In [None]:
midi_data = pretty_midi.PrettyMIDI('maestro-v2.0.0/'+'2006/MIDI-Unprocessed_01_R1_2006_01-09_ORIG_MID--AUDIO_01_R1_2006_01_Track01_wav.midi')

In [None]:
midi_data.estimate_tempo()

In [None]:
target_shape  = curr_batch.target.shape

In [None]:
print(target_shape)

In [None]:
igor = tf.zeros((128, 10))

In [None]:
new_igor=tf.tile(tf.expand_dims(igor, 1), [1,target_shape[1],1])

In [None]:
tf.concat([curr_batch.target, new_igor], axis = 2)

In [None]:
curr_batch.target[0,0,:]

In [None]:
output[0,0,:]

In [None]:
igor = tf.convert_to_tensor([[1,1,1,1], [2,2,2,2], [3,3,3,3]])

In [None]:
igor.shape

In [None]:
tf.reshape(igor, [4,3])

# Encoder

In [None]:
lstm = LSTM(100)

In [None]:
output = lstm(curr_batch.context)

In [None]:
output.shape

In [None]:
res_output = K.mean(tf.reshape(output, [128, 59, 100]), axis = -2)

In [None]:
res_output.shape

In [None]:
tile_output = tf.tile(tf.expand_dims(res_output, 1), [1,150,1])

In [None]:
tile_output.shape

In [None]:
K.mean(res_output, axis = -2).shape

In [None]:
input_context_ = Input((None, 2), name="Input_layer_contxt_xy") # [num_pts, 2]
input_target_x = Input((None, 1), name="Input_layer_target_x")  # [num_pts, 1]

encoder = input_context_xy

In [None]:
curr_batch.context.shape