In [5]:
import pandas as pd
import numpy as np
import pretty_midi
import music21
import re
from random import sample
from keras.utils import to_categorical

Using TensorFlow backend.


In [6]:
import os
os.chdir("/Users/desireewaugh/Desktop/MIT/Courses/6.883 - Modeling with ML/Projects/Final Project/MusicGenerator/data")

### Read text data

In [7]:
with open("Jigs.txt") as my_file:
    abc_text = my_file.read()

In [13]:
# Create vocab dictionaries
def create_dictionaries(text_data):
    char_list = list(text_data)
    vocab = list(set(char_list))
    
    # Dictionary with character to integer
    vocab_dict = {i: j for i,j in enumerate(char_list)}
    
    # Dictionary with integer to character
    vocab_dict_rev = {j: i for i,j in enumerate(char_list)}
        
    return vocab_dict, vocab_dict_rev

In [19]:
num_to_char, char_to_num = create_dictionaries(abc_text)

### Encoder & Decoder functions

In [24]:
def encoder(text_data, dictionary):
    character_nums = list(text_data)
    
    for i in range(len(character_nums)):
        character_nums[i] = dictionary[character_nums[i]]
        
    return character_nums

In [27]:
text_nums = encoder(abc_text, char_to_num)

In [29]:
def decoder(numeric_data, dictionary):
    text_list = []
    
    for i in range(len(numeric_data)):
        text_list.append(dictionary[numeric_data[i]])
    
    return "".join(text_list)
        

In [30]:
decoded_text = decoder(text_nums, num_to_char)

# Check to make sure decoder works:
abc_text == decoded_text

True

In [64]:
def create_training(char_nums, num_samples, str_length):
    # Get starting indices of the random samples for your training batch
    start_indices = sample(char_nums[0:(len(char_nums)-str_length-1)], num_samples)
    
    # The x_values begin at the starting indices and are str_length characters long
    # The y_values begin one character into the x_values and end one character longer than x_values
    x_data = np.array(char_nums[0:str_length])
    y_data = np.array(char_nums[1:str_length+1])
    for i in range(1,num_samples):
        x_data = np.vstack((x_data, np.array(char_nums[i:i+str_length])))
        y_data = np.vstack((y_data, np.array(char_nums[i+1:i+str_length+1])))
    
    #return x_data, y_data
    return x_data, y_data

In [65]:
x, y = create_training(text_nums, 10, 20)
x.shape, y.shape

((10, 20), (10, 20))

In [66]:
x

array([[   168, 135549, 135274, 135256, 135515, 130943, 135549, 135387,
        135242, 135260, 135278, 135549, 135387, 135242, 135260, 135278,
        135497, 130962, 135517, 130943],
       [135549, 135274, 135256, 135515, 130943, 135549, 135387, 135242,
        135260, 135278, 135549, 135387, 135242, 135260, 135278, 135497,
        130962, 135517, 130943, 135532],
       [135274, 135256, 135515, 130943, 135549, 135387, 135242, 135260,
        135278, 135549, 135387, 135242, 135260, 135278, 135497, 130962,
        135517, 130943, 135532, 132744],
       [135256, 135515, 130943, 135549, 135387, 135242, 135260, 135278,
        135549, 135387, 135242, 135260, 135278, 135497, 130962, 135517,
        130943, 135532, 132744, 135532],
       [135515, 130943, 135549, 135387, 135242, 135260, 135278, 135549,
        135387, 135242, 135260, 135278, 135497, 130962, 135517, 130943,
        135532, 132744, 135532, 135549],
       [130943, 135549, 135387, 135242, 135260, 135278, 135549, 135387,
   

In [67]:
y

array([[135549, 135274, 135256, 135515, 130943, 135549, 135387, 135242,
        135260, 135278, 135549, 135387, 135242, 135260, 135278, 135497,
        130962, 135517, 130943, 135532],
       [135274, 135256, 135515, 130943, 135549, 135387, 135242, 135260,
        135278, 135549, 135387, 135242, 135260, 135278, 135497, 130962,
        135517, 130943, 135532, 132744],
       [135256, 135515, 130943, 135549, 135387, 135242, 135260, 135278,
        135549, 135387, 135242, 135260, 135278, 135497, 130962, 135517,
        130943, 135532, 132744, 135532],
       [135515, 130943, 135549, 135387, 135242, 135260, 135278, 135549,
        135387, 135242, 135260, 135278, 135497, 130962, 135517, 130943,
        135532, 132744, 135532, 135549],
       [130943, 135549, 135387, 135242, 135260, 135278, 135549, 135387,
        135242, 135260, 135278, 135497, 130962, 135517, 130943, 135532,
        132744, 135532, 135549, 135497],
       [135549, 135387, 135242, 135260, 135278, 135549, 135387, 135242,
   