# Autoencoder music

In [1]:
import pandas as pd
import re
from music21 import *
import numpy as np

import time
import math

from keras.layers import Input, Dense, Reshape, Flatten, Dropout,LSTM
from keras.layers import BatchNormalization, Activation, ZeroPadding2D
from keras.layers.advanced_activations import LeakyReLU
from keras.layers.convolutional import UpSampling2D, Conv2D
from keras.models import Sequential, Model
from keras.optimizers import Adam

import matplotlib.pyplot as plt

import sys

import numpy as np

Using TensorFlow backend.


## Data Formatting with character to character prediction

In [2]:
import os
os.chdir(".../data")

In [3]:
text=open("jiggs.txt").read()

In [4]:
clean_text=text.split('\n\n')
corpus=[song for song in clean_text if song!='']

In [5]:
corpus[0]

'X: 1\nT:A and D\n% Nottingham Music Database\nS:EF\nM:4/4\nK:A\nM:6/8\nP:A\nf|"A"ecc c2f|"A"ecc c2f|"A"ecc c2f|"Bm"BcB "E7"B2f|\n"A"ecc c2f|"A"ecc c2c/2d/2|"D"efe "E7"dcB| [1"A"Ace a2:|\n [2"A"Ace ag=g||\\\nK:D\nP:B\n"D"f2f Fdd|"D"AFA f2e/2f/2|"G"g2g ecd|"Em"efd "A7"cBA|\n"D"f^ef dcd|"D"AFA f=ef|"G"gfg "A7"ABc |1"D"d3 d2e:|2"D"d3 d2||'

In [6]:
prefix_sentence=[]
suffix_sentence=[]

for music in corpus:
    for index in range(len(music)):
        prefix=music[:index+1]
        suffix='\t'+ music[index+1:]+'\n'
        prefix_sentence.append(prefix)
        suffix_sentence.append(suffix)


In [7]:
vocabulary = set(['\t', '\n'])

for music in corpus:
    for char in music:
        if char not in vocabulary:
            vocabulary.add(char)
            
vocabulary=sorted(vocabulary)

In [8]:
char_to_idx = dict((char, idx) for idx, char in enumerate(vocabulary))
idx_to_char = dict((idx, char) for idx, char in enumerate(vocabulary))

In [9]:
# Find the length of the longest prefix
max_len_prefix_sent = max([len(prefix) for prefix in prefix_sentence])

# Find the length of the longest suffix
max_len_suffix_sent = max([len(suffix) for suffix in suffix_sentence])

In [10]:
# Define a 3-D zero vector for the prefix sentences
input_data_prefix = np.zeros((len(prefix_sentence), max_len_prefix_sent, 
                              len(vocabulary)), dtype='float32')

# Define a 3-D zero vector for the suffix sentences
input_data_suffix = np.zeros((len(suffix_sentence), max_len_suffix_sent, 
                              len(vocabulary)), dtype='float32')

# Define a 3-D zero vector for the target data
target_data = np.zeros((len(suffix_sentence), max_len_suffix_sent, 
                        len(vocabulary)), dtype='float32')

In [11]:
for i in range(len(prefix_sentence)):
    # Iterate over each character in each prefix
    for k, ch in enumerate(prefix_sentence[i]):
        # Convert the character to a one-hot encoded vector
        input_data_prefix[i, k, char_to_idx[ch]] = 1

In [12]:

# Iterate over each character in each suffix
for k, ch in enumerate(suffix_sentence[i]):
    # Convert the character to a one-hot encoded vector
    input_data_suffix[i, k, char_to_idx[ch]] = 1

    # Target data is one timestep ahead and excludes start character
    if k > 0:
        target_data[i, k-1, char_to_idx[ch]] = 1

# Build the Model

### Encoder

In [13]:
# Create the input layer of the encoder
encoder_input = Input(shape=(None, len(vocabulary)))

# Create LSTM Layer of size 256
encoder_LSTM = LSTM(50, return_state = True)

# Save encoder output, hidden and cell state
encoder_outputs, encoder_h, encoder_c = encoder_LSTM(encoder_input)

# Save encoder states
encoder_states = [encoder_h, encoder_c]

### Decoder

In [14]:
# Create decoder input layer
decoder_input = Input(shape=(None, len(vocabulary)))

# Create LSTM layer of size 256
decoder_LSTM = LSTM(50,return_sequences=True, return_state = True)

# Save decoder output
decoder_out, _ , _ = decoder_LSTM(decoder_input, initial_state=encoder_states)

# Create a `Dense` layer with softmax activation
decoder_dense = Dense(len(vocabulary),activation='softmax')

# Save the decoder output
decoder_out = decoder_dense(decoder_out)

In [15]:
# Build model
model = Model(inputs=[encoder_input, decoder_input],outputs=[decoder_out])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy')

# Print model summary
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, None, 88)     0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            (None, None, 88)     0                                            
__________________________________________________________________________________________________
lstm_1 (LSTM)                   [(None, 50), (None,  27800       input_1[0][0]                    
__________________________________________________________________________________________________
lstm_2 (LSTM)                   [(None, None, 50), ( 27800       input_2[0][0]                    
                                                                 lstm_1[0][1]               

In [16]:
model.fit(x=[input_data_prefix, input_data_suffix], y=target_data,
          batch_size=64, epochs=1, validation_split=0.2)

Train on 103440 samples, validate on 25860 samples
Epoch 1/1


<keras.callbacks.callbacks.History at 0x2a89356bb0>

In [None]:
# Create encoder inference model
encoder_model_inf = Model(encoder_input, encoder_states)

# Create decoder input states for inference
decoder_state_input_h = Input(shape=(50,))
decoder_state_input_c = Input(shape=(50,))
decoder_input_states = [decoder_state_input_h, decoder_state_input_c]

# Get decoder output and feed it to the dense layer for final output prediction
decoder_out, decoder_h, decoder_c = decoder_LSTM(decoder_input, initial_state=decoder_input_states)
decoder_states = [decoder_h , decoder_c]
decoder_out = decoder_dense(decoder_out)

# Create decoder inference model
decoder_model_inf = Model(inputs=[decoder_input] + decoder_input_states, outputs=[decoder_out] + decoder_states )

In [None]:
# Pass input prefix to the Encoder inference model and get the states
inp_seq = input_data_prefix[50:100]
states_val = encoder_model_inf.predict(inp_seq)

# Seed the first character and get output from the decoder 
target_seq = np.zeros((1, 1, len(vocabulary)))
target_seq[0, 0, char_to_idx['\t']] = 1  
decoder_out, decoder_h, decoder_c = decoder_model_inf.predict(x=[target_seq] + states_val)

# Find out the next character from the Decoder output
max_val_index = np.argmax(decoder_out[0,-1,:])
sampled_suffix_char = idx_to_char[max_val_index]

# Print the first character
print(sampled_suffix_char)

In [None]:
input_data_prefix

In [None]:
# Insert the generated character from last time to the target sequence 
target_seq = np.zeros((1, 1, len(vocabulary)))
target_seq[0, 0, max_val_index] = 1

# Initialize the decoder state to the states from last iteration
states_val = [decoder_h, decoder_c]

# Get decoder output
decoder_out, decoder_h, decoder_c = decoder_model_inf.predict(x=[target_seq] + states_val)

# Get most probable next character and print it.
max_val_index = np.argmax(decoder_out[0,-1,:])
sampled_suffix_char = idx_to_char[max_val_index]
print(sampled_suffix_char)

In [20]:
def generate_suffix_sentence(inp_seq):

    # Initialize states value to the final states of the encoder
    states_val = encoder_model_inf.predict(inp_seq)

    # Initialize the target sequence to contain the start token
    target_seq = np.zeros((1, 1, len(vocabulary)))
    target_seq[0, 0, char_to_idx['\t']] = 1

    # Define a variable to store the suffix sentence
    suffix_sent = ''

    # Define stop condition flag
    stop_condition = False

    # Iterate until the end token is found or maximum length of the suffix sentence is reached
    while not stop_condition:

        # Get output from decoder inference model
        decoder_out, decoder_h, decoder_c = decoder_model_inf.predict(x=[target_seq] + states_val)

        # Get most probable next character
        max_val_index = np.argmax(decoder_out[0,-1,:])
        sampled_output_char = idx_to_char[max_val_index]

        # Append the generated char to the suffix sentence
        suffix_sent += sampled_output_char

        # Check if end token is encountered or maximum length of the suffix sentence is exceeded
        if ((sampled_output_char == '\n') or (len(suffix_sent) > max_len_suffix_sent)) :
            stop_condition = True

        # Add the new generated char to the existing target sequence
        target_seq = np.zeros((1, 1, len(vocabulary)))
        target_seq[0, 0, max_val_index] = 1

        # Save state values to use in the next iteration
        states_val = [decoder_h, decoder_c]

    # Return the suffix sentence
    return suffix_sent

In [None]:
# Generate 10 suffixes
for seq_index in range(10):
  
    # Get the next tokenized sentence
    inp_seq = input_data_prefix[seq_index:seq_index+1]
    
    # Generate the suffix sentence
    suffix_sent = generate_suffix_sentence(inp_seq)
    
    # Print the prefix sentence
    print('Prefix Sentence:', prefix_sentence[seq_index])
    
    # Print the suffix sentence
    print('Suffix Sentence:', suffix_sent)