# Jazz Improv with LSTM

This is a jazz music generator using LSTM made possible by deep learning.

In [1]:
#Mounting google collab
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [26]:
#Changing working directory
%cd /content/drive/MyDrive/jazz

/content/drive/MyDrive/jazz


In [3]:
#Specifying the version of tensorflow
%tensorflow_version 1.x

TensorFlow 1.x selected.


In [4]:
#Importing tensorflow
import tensorflow
print(tensorflow.__version__)

1.15.2


In [8]:
#Installing Keras
!pip install keras==2.1.5

Collecting keras==2.1.5
  Using cached https://files.pythonhosted.org/packages/ba/65/e4aff762b8696ec0626a6654b1e73b396fcc8b7cc6b98d78a1bc53b85b48/Keras-2.1.5-py2.py3-none-any.whl
Installing collected packages: keras
  Found existing installation: Keras 2.4.3
    Uninstalling Keras-2.4.3:
      Successfully uninstalled Keras-2.4.3
Successfully installed keras-2.1.5


In [9]:
#Importing libraries
from __future__ import print_function
import IPython
import sys
from music21 import *
import numpy as np
from grammar import *
from qa import *
from preprocess import * 
from music_utils import *
from data_utils import *
from keras.models import load_model, Model
from keras.layers import Dense, Activation, Dropout, Input, LSTM, Reshape, Lambda, RepeatVector
from keras.initializers import glorot_uniform
from keras.utils import to_categorical
from keras.optimizers import Adam
from keras import backend as K

Using TensorFlow backend.


In [10]:
#Snippet of the music training corpus
IPython.display.Audio('./data/30s_seq.mp3')

In [11]:
#Load music data and preprocess to values
X, Y, n_values, indices_values = load_music_utils()
print('shape of X:', X.shape)
print('number of training examples:', X.shape[0])
print('Tx (length of sequence):', X.shape[1])
print('total # of unique values:', n_values)
print('Shape of Y:', Y.shape)

shape of X: (60, 30, 78)
number of training examples: 60
Tx (length of sequence): 30
total # of unique values: 78
Shape of Y: (30, 60, 78)


In [12]:
#Setting LSTM with 64 dimensional hidden state
n_a = 64

In [13]:
#Creating input and output layers
n_values  = 78 # number of music values
reshapor  = Reshape((1, n_values))                        # Used in Step 2.B of djmodel(), below
LSTM_cell = LSTM(n_a, return_state = True)         # Used in Step 2.C
densor    = Dense(n_values, activation='softmax')     # Used in Step 2.D




In [15]:
#Implementing the model
def djmodel(Tx, n_a, n_values):
    
    # Define the input of model with a shape 
    X = Input(shape=(Tx, n_values))
    
    # Define s0, initial hidden state for the decoder LSTM
    a0 = Input(shape=(n_a,), name='a0')
    c0 = Input(shape=(n_a,), name='c0')
    a = a0
    c = c0
    
    # Create empty list to append the outputs while you iterate (≈1 line)
    outputs = []
    
    # Loop
    for t in range(Tx):
        
        # select the "t"th time step vector from X. 
        x = Lambda(lambda x: X[:,t,:])(X)
        #  Use reshapor to reshape x to be (1, n_values) (≈1 line)
        x = reshapor(x)
        # Perform one step of the LSTM_cell
        a, _, c = LSTM_cell(x, initial_state=[a, c])
        #  Apply densor to the hidden state output of LSTM_Cell
        out = densor(a)
        # add the output to "outputs"
        outputs.append(out)
        
    # Create model instance
    model = Model(inputs=[X, a0, c0], outputs=outputs)
    
    return model

In [16]:
#Define model
model = djmodel(Tx = 30 , n_a = 64, n_values = 78)





In [17]:
#Model compilation
opt = Adam(lr=0.01, beta_1=0.9, beta_2=0.999, decay=0.01)

model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])





In [18]:
#Initializa a0 and c0 to return LSTM initial state to 0
m = 60
a0 = np.zeros((m, n_a))
c0 = np.zeros((m, n_a))

In [19]:
#Fit and train
model.fit([X, a0, c0], list(Y), epochs=100)

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where



Epoch 1/100





Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/1

<keras.callbacks.History at 0x7f7ab081e890>

In [20]:
#Predicitng and sampling
def music_inference_model(LSTM_cell, densor, n_values = 78, n_a = 64, Ty = 100):
    
    # Define the input of model with a shape 
    x0 = Input(shape=(1, n_values))
    
    # Define s0, initial hidden state for the decoder LSTM
    a0 = Input(shape=(n_a,), name='a0')
    c0 = Input(shape=(n_a,), name='c0')
    a = a0
    c = c0
    x = x0

    # Create an empty list of "outputs" to later store your predicted values (≈1 line)
    outputs = []
    
    # Loop over Ty and generate a value at every time step
    for t in range(Ty):
        
        # Perform one step of LSTM_cell (≈1 line)
        a, _, c = LSTM_cell(x, initial_state=[a, c])
        
        # Apply Dense layer to the hidden state output of the LSTM_cell (≈1 line)
        out = densor(a)

        # Append the prediction "out" to "outputs". out.shape = (None, 78) (≈1 line)
        outputs.append(out)
        
        #  Select the next value according to "out", and set "x" to be the one-hot representation of the
        #           selected value, which will be passed as the input to LSTM_cell on the next step. We have provided 
        #           the line of code you need to do this. 
        x = Lambda(one_hot)(out)
        
    # Create model instance with the correct "inputs" and "outputs" (≈1 line)
    inference_model = Model(inputs=[x0, a0, c0], outputs=outputs)
    
    return inference_model

In [21]:
#Define inference model
inference_model = music_inference_model(LSTM_cell, densor, n_values = 78, n_a = 64, Ty = 50)

In [22]:
#Create 0 value vectors
x_initializer = np.zeros((1, 1, 78))
a_initializer = np.zeros((1, n_a))
c_initializer = np.zeros((1, n_a))

In [23]:
#Implement predict and sample
def predict_and_sample(inference_model, x_initializer = x_initializer, a_initializer = a_initializer, 
                       c_initializer = c_initializer):
    
    # Use your inference model to predict an output sequence given x_initializer, a_initializer and c_initializer.
    pred = inference_model.predict([x_initializer, a_initializer, c_initializer], batch_size=None, verbose=0, steps=None)
    # Convert "pred" into an np.array() of indices with the maximum probabilities
    indices = np.argmax(pred, axis = -1)
    # Convert indices to one-hot vectors, the shape of the results should be (1, )
    results = to_categorical(indices,num_classes=x_initializer.shape[2]) # the num of class is corresponding to the n_values, which is 78 in our practice.
    
    return results, indices

In [24]:
results, indices = predict_and_sample(inference_model, x_initializer, a_initializer, c_initializer)
print("np.argmax(results[12]) =", np.argmax(results[12]))
print("np.argmax(results[17]) =", np.argmax(results[17]))
print("list(indices[12:18]) =", list(indices[12:18]))

np.argmax(results[12]) = 16
np.argmax(results[17]) = 33
list(indices[12:18]) = [array([16]), array([67]), array([30]), array([29]), array([45]), array([33])]


In [27]:
#Generate music and record to out_stream
out_stream = generate_music(inference_model)

Predicting new values for different set of chords.
Generated 51 sounds using the predicted values for the set of chords ("1") and after pruning
Generated 51 sounds using the predicted values for the set of chords ("2") and after pruning
Generated 50 sounds using the predicted values for the set of chords ("3") and after pruning
Generated 51 sounds using the predicted values for the set of chords ("4") and after pruning
Generated 51 sounds using the predicted values for the set of chords ("5") and after pruning
Your generated music is saved in output/my_music.midi


In [28]:
#My generated music
IPython.display.Audio('./output/my-music.mp3')