# The Data Proprocessing Pipeline!

In [1]:
from music21 import *

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import os
import glob

import pickle as p

In [2]:
def get_notes():
    notes = []
    errors = 0
    filename_e = []

    for file_i in glob.glob("TMID/*.midi"):
        try:
            # print(type(file_i))
            midi = converter.parse(file_i)
        except:
            errors+=1
            filename_e.append(file_i)
            print(f"ERROR, E_COUNT {errors}, NAME {file_i}")

        # print("Parsing %s" %file_i)
        # print(f"Previous Length: {len(notes)}")

        notes_to_parse = None

        notes_to_parse = midi.flat.notes

        for element in notes_to_parse:
            if isinstance(element, note.Note):
                notes.append(str(element.pitch) + " " + str(element.quarterLength))
            elif isinstance(element, chord.Chord):
                notes.append('.'.join(str(n) for n in element.normalOrder) + " " + str(element.quarterLength))
            elif isinstance(element, note.Rest):
                notes.append(str(element.name)  + " " + str(element.quarterLength))

    with open("notes.pkl", "wb") as pickle:
        p.dump(notes, pickle)

    return notes


In [3]:
notes = get_notes()
n_vocab = len(set(notes))

ERROR, E_COUNT 1, NAME TMID\Anime_A158320.midi
ERROR, E_COUNT 2, NAME TMID\Anime_A1925761.midi


In [None]:
print(len(notes))

69850


In [None]:
notes

['G3 1.0',
 '7 1.0',
 '0 1.5',
 '0 1.5',
 'E4 0.5',
 'E2 0.5',
 '7 1.0',
 '7 1.0',
 'G3 0.5',
 '7 0.5',
 'E-3 0.5',
 'E-2 0.5',
 'G3 0.5',
 '7 0.5',
 'B-3 0.5',
 'B-1 0.5',
 '3 1.5',
 '3 1.0',
 '3 0.5',
 'G4 0.5',
 'G2 0.5',
 '10 1.0',
 '10 1.0',
 '1 1.0',
 '1 1.0',
 '6 1.5',
 '6 1.5',
 'B-4 0.5',
 'B-2 0.5',
 '1 1.0',
 '1 1.0',
 '9 1.5',
 '9 1.5',
 'C#5 0.5',
 'C#3 0.5',
 '4 1.0',
 '4 1.0',
 '8.1 1.5',
 '1.5.8 1.0',
 '1.5.8 0.5',
 '1.5.8 0.5',
 '8.1 1.0',
 '1.5 0.5',
 '1.5.8 1.0',
 '9.1 1.5',
 '1.5.9 1.5',
 '1.5.9 0.5',
 '9.1 1.0',
 '1.5 0.5',
 '1.5.9 1.0',
 '9.1 1.5',
 '4.9 1.5',
 '9.1.4 0.5',
 '9.1 1.0',
 '1.4 0.5',
 '9.1.4 1.0',
 'A3 0.5',
 'C#4 0.5',
 'E4 0.5',
 'C#4 0.5',
 'E4 0.5',
 'E4 0.5',
 'A4 0.5',
 'A4 0.5',
 'C#5 0.5',
 'A4 0.5',
 'C#5 0.5',
 'E5 0.5',
 '1.5 0.5',
 'B-5 0.5',
 '10.1.5 1.0',
 '1.5 1.0',
 '10.1.5 1.0',
 '1.6 0.5',
 'A5 0.5',
 '6.9.1 1.0',
 '1.6 1.0',
 '6.9.1 1.0',
 '2.6 0.5',
 'A5 0.5',
 '2.6.9 1.0',
 '2.6 1.0',
 '2.6.9 1.0',
 '2.6 0.5',
 'A5 0.5',
 '11 1.0

In [5]:
def to_categorical(y, num_classes):
    """ 1-hot encodes a tensor """
    return np.eye(num_classes, dtype='uint8')[y]

## Preprocessing sequential data for 

Please note that SEQUENCE_LENGTH is a hyperparameter meaning that to predict the next note/chord/rest the LSTM model will have access to the previous N notes/chords/rests to help make a prediction.


In [6]:
SEQUENCE_LENGTH = 100
pitchnames = sorted(set(item for item in notes))

note_to_int = dict((note, number) for number, note in enumerate(pitchnames))

network_input = []
network_output = []

# Input sequences (by 100) and output sequences
for i in range(0, len(notes) - SEQUENCE_LENGTH, 1):
    sequence_in = notes[i:i + SEQUENCE_LENGTH]
    sequence_out = notes[i + SEQUENCE_LENGTH]
    network_input.append([note_to_int[char] for char in sequence_in])
    network_output.append(note_to_int[sequence_out])

    
n_patterns = len(network_input)

network_input = np.reshape(network_input, (n_patterns, SEQUENCE_LENGTH, 1))
#normalize input
network_input = network_input / float(n_vocab)
network_output = to_categorical(network_output, max(network_output)+1)

# Building a model

### MAR-Y
#### Multiple Attention RNN (You only listen once)

## Testing shit out

In [11]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM, Activation, Bidirectional, Flatten
from keras import utils
from keras.callbacks import ModelCheckpoint
from keras_self_attention import SeqSelfAttention

In [12]:
def create_network(network_input, n_vocab):
    """ create the structure of the neural network """
    model = Sequential()
    model.add(Bidirectional(LSTM(512,
        input_shape=(network_input.shape[1], network_input.shape[2]), #n_time_steps, n_features?
        return_sequences=True)))
    model.add(SeqSelfAttention(attention_activation='sigmoid'))
    model.add(Dropout(0.3))
    
    model.add(Bidirectional(LSTM(512,return_sequences=True)))
    model.add(SeqSelfAttention(attention_activation='sigmoid'))
    model.add(Dropout(0.3))
    
    model.add(Bidirectional(LSTM(512,return_sequences=True)))
    model.add(SeqSelfAttention(attention_activation='sigmoid'))
    model.add(Dense(256))
    model.add(Dropout(0.3))
    
    model.add(Flatten()) #Supposedly needed to fix stuff before dense layer
    model.add(Dense(n_vocab))
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')

    return model

def train(model, network_input, network_output):
    """ train the neural network """
    filepath = os.path.abspath("weights-3LSTMAttLayer-{epoch:03d}-{loss:.4f}.hdf5")
    checkpoint = ModelCheckpoint(
        filepath,
        period=10, #Every 10 epochs
        monitor='loss',
        verbose=1,
        save_best_only=False,
        mode='min'
    )
    callbacks_list = [checkpoint]

    model.fit(network_input, network_output, epochs=200, batch_size=64, callbacks=callbacks_list)

In [13]:
model = create_network(network_input, n_vocab)
train(model, network_input, network_output)

Epoch 1/200




Epoch 2/200
  49/1090 [>.............................] - ETA: 7:31:32 - loss: 6.3835

In [None]:
def generate():
    """ Generate a piano midi file """
    #load the notes used to train the model
    with open("notes.pkl", "wb") as pickle:
        p.load(notes, pickle)

    # Get all pitch names
    pitchnames = sorted(set(item for item in notes))
    # Get all pitch names
    n_vocab = len(set(notes))

    network_input, normalized_input = prepare_sequences_output(notes, pitchnames, n_vocab)
    model = create_network_add_weights(normalized_input, n_vocab)
    prediction_output = generate_notes(model, network_input, pitchnames, n_vocab)
    create_midi(prediction_output)

NameError: name 'get_notes' is not defined

In [None]:
def prepare_sequences_output(notes, pitchnames, n_vocab):
    """ Prepare the sequences used by the Neural Network """
    # map between notes and integers and back
    note_to_int = dict((note, number) for number, note in enumerate(pitchnames))

    sequence_length = 100
    network_input = []
    output = []
    for i in range(0, len(notes) - sequence_length, 1):
        sequence_in = notes[i:i + sequence_length]
        sequence_out = notes[i + sequence_length]
        network_input.append([note_to_int[char] for char in sequence_in])
        output.append(note_to_int[sequence_out])

    n_patterns = len(network_input)

    # reshape the input into a format compatible with LSTM layers
    normalized_input = np.reshape(network_input, (n_patterns, sequence_length, 1))
    # normalize input
    normalized_input = normalized_input / float(n_vocab)

    return (network_input, normalized_input)

NameError: name 'get_notes' is not defined

In [None]:
def create_network_add_weights(network_input, n_vocab):
    """ create the structure of the neural network """
    model = Sequential()

    model.add(Bidirectional(LSTM(512,return_sequences=True),input_shape=(network_input.shape[1], network_input.shape[2]))) #n_time_steps, n_features? Needed input_shape in first layer, which is Bid not LSTM
    model.add(SeqSelfAttention(attention_activation='sigmoid'))
    model.add(Dropout(0.3))
    
    model.add(Bidirectional(LSTM(512,return_sequences=True)))
    model.add(SeqSelfAttention(attention_activation='sigmoid'))
    model.add(Dropout(0.3))
    
    model.add(Bidirectional(LSTM(512,return_sequences=True)))
    model.add(SeqSelfAttention(attention_activation='sigmoid'))
    model.add(Dense(256))
    model.add(Dropout(0.3))
    
    model.add(Flatten()) #Supposedly needed to fix stuff before dense layer
    model.add(Dense(n_vocab))
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')

    # Load the weights to each node
    model.load_weights('weights-3LSTMAttLayer-010-5.7410.hdf5')
    
    return model

In [None]:
def generate_notes(model, network_input, pitchnames, n_vocab):
    """ Generate notes from the neural network based on a sequence of notes """
    # pick a random sequence from the input as a starting point for the prediction
    start = np.random.randint(0, len(network_input)-1)

    int_to_note = dict((number, note) for number, note in enumerate(pitchnames))

    pattern = network_input[start]
    prediction_output = []

    # generate 500 notes
    for note_index in range(500):
        prediction_input = np.reshape(pattern, (1, len(pattern), 1))
        prediction_input = prediction_input / float(n_vocab)

        prediction = model.predict(prediction_input, verbose=0)

        index = np.argmax(prediction)
        result = int_to_note[index]
        prediction_output.append(result)

        pattern.append(index)
        pattern = pattern[1:len(pattern)]

    return prediction_output

In [None]:
def create_midi(prediction_output):
    """ convert the output from the prediction to notes and create a midi file
        from the notes """
    offset = 0
    output_notes = []

    # create note and chord objects based on the values generated by the model
    for pattern in prediction_output:
        pattern = pattern.split()
        temp = pattern[0]
        duration = pattern[1]
        pattern = temp
        # pattern is a chord
        if ('.' in pattern) or pattern.isdigit():
            notes_in_chord = pattern.split('.')
            notes = []
            for current_note in notes_in_chord:
                new_note = note.Note(int(current_note))
                new_note.storedInstrument = instrument.Piano()
                notes.append(new_note)
            new_chord = chord.Chord(notes)
            new_chord.offset = offset
            output_notes.append(new_chord)
        # pattern is a rest
        elif('rest' in pattern):
            new_rest = note.Rest(pattern)
            new_rest.offset = offset
            new_rest.storedInstrument = instrument.Piano() #???
            output_notes.append(new_rest)
        # pattern is a note
        else:
            new_note = note.Note(pattern)
            new_note.offset = offset
            new_note.storedInstrument = instrument.Piano()
            output_notes.append(new_note)
        # increase offset each iteration so that notes do not stack
        offset += convert_to_float(duration)

    midi_stream = stream.Stream(output_notes)

    midi_stream.write('midi', fp='test_output.mid')
 
#From: https://stackoverflow.com/questions/1806278/convert-fraction-to-float
def convert_to_float(frac_str):
    try:
        return float(frac_str)
    except ValueError:
        num, denom = frac_str.split('/')
        try:
            leading, num = num.split(' ')
            whole = float(leading)
        except ValueError:
            whole = 0
        frac = float(num) / float(denom)
        return whole - frac if whole < 0 else whole + frac

In [None]:
generate()