In [47]:
from music21 import *
import pandas as pd
import numpy as np
from keras.preprocessing.sequence import pad_sequences

Using TensorFlow backend.


### Features
- chord_names (nominal)
        - Label encoding (C major -> 0, A major -> 13)
        - One-hot encoding?
        - Feature hashing
- roman_numerals

In [None]:
myChord = chord.Chord(["E-5","C4","G4"])
myChord.show()

In [None]:
myChord

In [None]:
pitches = myChord.pitches
pitches

In [None]:
melody = pitches[-1]
melody

In [None]:
myChord.sortAscending(inPlace = True)

In [None]:
bach_corpus = corpus.search('bach', 'composer')
bach_corpus[-23].parse().show()

In [None]:
#Load corpus of Bach chorales
bach_corpus = corpus.search('bach', 'composer')


for chorale in bach_corpus:
    chorale = chorale.parse()
    print('Processing chorale: {}'.format(chorale.metadata.title))
    sopranoPart = chorale.getElementsByClass('Part')[0]
    
    for keychange in sopranoPart.recurse().getElementsByClass(key.Key):
        #Get key information and offset location
        print(keychange, keychange.offset)


In [None]:
#Iterate through soprano part and print all key changes
print('Key Changes:')
for keychange in soprano.recurse().getElementsByClass(key.Key):
    #Get key information and offset location
    print(keychange, keychange.offset)

In [None]:
bachFeatures = pd.DataFrame(np.zeros((len(bach),2)), columns = ['roman_numerals', 'mel_notes'], dtype='object')
bachFeatures

In [None]:
bachFeatures.dtypes

In [None]:
k = key.Key('f#')  # 3-sharps minor
rn = roman.RomanNumeral('V', k)
rn.key

In [None]:
rn.pitchedCommonName

### Feature Extraction

In [50]:
# extract the notes from each piece in the corpus and place in in the x_train variable

bach_corpus = corpus.getComposer('bach')
x_train = []
durations = [0.125, 0.25, 0.5, 0.75, 1.0, 1.5, 2.0, 3.0, 4.0, 6.0, 8.0]
duration_indices = {durations[i]: i + 39 for i in range(len(durations))}

pitches = []
for piece in bach_corpus:
    sample = []
    piece_score = corpus.parse(piece)  # stream.Score object
    melody_part = piece_score.getElementsByClass('Part')[0]  # melody parts are always the first part in the score
    melody_notes = melody_part.flat.getElementsByClass(['Note', 'Rest'])
    
    # DON'T CHANGE n to Note or note (namespace conflict)
    # builds the feature vector by One-Hot encoding pitch class/octave with the duration at the end
    for n in melody_notes.recurse():  # iterates through all notes in the piece
        # 50 element vector because there are 11 durations and 39 pitches in this corpus
        new_note = np.zeros(50) # the next note, and thus next time step in the sample
        
        note_duration = n.duration.quarterLength
        if note_duration == 0.0:
            continue  # this is a grace note, toss it
        new_note[duration_indices[note_duration]] = 1

        if n.isNote:
            midi_pitch = n.pitch.midi
            pitches.append(midi_pitch)
            new_note[midi_pitch - 50] = 1
            
        sample.append(new_note) # appends the next time step to the sample
        x_train.append(sample)
        
# pad the sequences to be all the same length
padded_seqs = pad_sequences(x_train, padding="post", dtype='object', value=np.zeros(50))