# **EDM Bass Accompaniment Generation**
*by Matthew Avallone, Anish Malhotra*

# Imports

In [0]:
import numpy as np
import os

# **Loading The Data**

In [0]:
data_folder = '../input/'
os.chdir(data_folder)

os.listdir(data_folder)

X_bass = np.load(data_folder + 'X_bass.npy')
y_bass = np.load(data_folder + 'y_bass.npy')

In [0]:
from sklearn.model_selection import train_test_split

Xtr_melody, Xts_melody = train_test_split(X_bass, test_size=0.01, shuffle=False)
ytr_bass, yts_bass = train_test_split(y_bass, test_size=0.01, shuffle=False)

In [0]:
print(len(Xtr_melody))

Xts_melody = Xts_melody[0:32]
yts_bass = yts_bass[0:32]

print(len(Xts_melody))

Creating a target set, which is output shifted by 1 step

In [0]:
Xtr_melody_pad = ytr_bass

for i in range(0, len(ytr_bass)):
    for j in range(0, len(ytr_bass[i])):
        Xtr_melody_pad[i][j] = np.hstack(([0],ytr_bass[i][j][:-1]))

print(Xtr_melody_pad.shape)
print(Xtr_melody_pad[0].shape)

# Xtr_melody_pad = ytr_bass
# padding = np.zeros(83)
# for i in range(0, len(ytr_bass)):
#     for j in range(0, len(ytr_bass[i])):
#         Xtr_melody_pad[i] = np.vstack((padding,ytr_bass[i][:-1]))

# print(Xtr_melody_pad.shape)
# print(Xtr_melody_pad[0].shape)

In [0]:
print(Xtr_melody_pad[0][0])
print(Xtr_melody_pad[0][65])

# **Model Setup**

In [0]:
from keras.models import Model
from keras.layers import Input, LSTM, Dense
from keras.utils.vis_utils import plot_model

In [0]:
num_classes = 7
note_shift = 24
num_of_notes = 83
num_measures = 0.25

tpqn = 96 # Varies with MIDI file, currently using same resolution

n_timesteps= int(4*num_measures*tpqn) # 96 ticks per quarter note x 4 quarter notes per measure x number of measures

batchsize = 8

# configure
num_encoder_tokens = num_of_notes # length of the sequence at each time step = num of notes
num_decoder_tokens = num_of_notes
latent_dim = 256

**Autoencoder Model**

In [0]:
# Define an input sequence and process it.
encoder_inputs = Input(shape=(None, num_encoder_tokens))
encoder = LSTM(latent_dim, return_state=True)
encoder_outputs, state_h, state_c = encoder(encoder_inputs)

# We discard `encoder_outputs` and only keep the states.
encoder_states = [state_h, state_c]

# Set up the decoder, using `encoder_states` as initial state.
decoder_inputs = Input(shape=(None, num_decoder_tokens))

# We set up our decoder to return full output sequences,
# and to return internal states as well. We don't use the
# return states in the training model, but we will use them in inference.
decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states)
decoder_dense = Dense(num_decoder_tokens, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

# Define the model that will turn
# `encoder_input_data` & `decoder_input_data` into `decoder_target_data`
autoencoder = Model([encoder_inputs, decoder_inputs], decoder_outputs)
autoencoder.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['acc'])

In [0]:
autoencoder.summary()

# plot the model and save as image
# plot_model(autoencoder, to_file='model.png', show_shapes=True)

**Encoder Inference Model**

In [0]:
# define encoder inference model
encoder_model = Model(encoder_inputs, encoder_states)

In [0]:
encoder_model.summary()

# plot_model(encoder_model, to_file='encoder_model.png', show_shapes=True)

**Decoder Inference Model**

In [0]:
# define decoder inference model
decoder_state_input_h = Input(shape=(latent_dim,))
decoder_state_input_c = Input(shape=(latent_dim,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]

decoder_outputs, state_h, state_c = decoder_lstm(decoder_inputs, initial_state=decoder_states_inputs)
decoder_states = [state_h, state_c]
decoder_outputs = decoder_dense(decoder_outputs)

decoder_model = Model([decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states)

In [0]:
decoder_model.summary()

# plot_model(decoder_model, to_file='decoder_model.png', show_shapes=True)

## Training

In [0]:
hist_bass = autoencoder.fit([Xtr_melody, Xtr_melody_pad], ytr_bass, batch_size=batchsize, epochs=30, verbose=2)

## Testing

Predictions are done using the inferences encoder and decoder models

In [0]:
# generate target given source sequence
def predict_sequence(infenc, infdec, source, n_steps, cardinality):
    # encode
    state = infenc.predict(source)
    
    # start of sequence input
    target_seq = np.array([0.0 for _ in range(cardinality)]).reshape(1, 1, cardinality)

    # collect predictions
    output = list()
    for t in range(n_steps):
        # predict next note
        yhat, h, c = infdec.predict([target_seq] + state)
        
        # store prediction
        output.append(yhat[0,0,:])
       
        # update state
        state = [h, c]
        
        # update target sequence
        target_seq = yhat
        
    return (np.array(output)).reshape(n_steps,cardinality)

In [0]:
test_phrase = np.empty((1,n_timesteps, num_of_notes))
ypred_bass = np.empty((yts_bass.shape))

for i in range(0, len(Xts_melody)):
    test_phrase[0] = Xts_melody[i]
    ypred_bass[i] = predict_sequence(encoder_model, decoder_model, test_phrase, n_timesteps, num_of_notes)

# Saving Results

In [0]:
# Save the predicted values for post processing
os.chdir('../working/')

np.save('../working/Xts_bass', Xts_melody)
np.save('../working/ypred_bass', ypred_bass)
np.save('../working/yts_bass', yts_bass)

# Save the model for future use

autoencoder.save('../working/bass_model.h5')
encoder_model.save('../working/bass_encoder.h5')
decoder_model.save('../working/bass_decoder.h5')