In [None]:
from music21 import *
import pandas as pd
import numpy as np
import pickle
from collections import Counter
import matplotlib.pyplot as plt
plt.style.use('ggplot')
import os
if not os.path.exists('../figures'):
    os.mkdir('../figures')

### Feature Extraction
Features extracted:
- MIDI pitches
- Rhythms
- Chords (roman numerals)

In [None]:
# Extract features from the corpus and place each piece in the chorales_train variable
# --------------------------------------------------------------------------------
bachCorpus = corpus.getComposer('bach')
chorales_notes_train = [] #all chorale note tuples stored as a list
chorales_chords_train = [] #all chorale chord sequences stored as a list

# Keep track of all features encountered
all_durations = []
all_notes = []
all_chords = []

for piece in bachCorpus:    
    pieceScore = corpus.parse(piece)  # stream.Score object
    melodyPart = pieceScore.getElementsByClass('Part')[0]  # melody parts are always the first part in the score
    melodyNotes = melodyPart.flat.getElementsByClass('Note') #extract all notes in the part
    
    print('Processing chorale: {}'.format(pieceScore.metadata.title))

    
    #Chordify and extract chord sequences
    # ---------------------------------->
    bChords = pieceScore.chordify()
    #Condense all chords into the middle range of treble clef 
    for c in bChords.recurse().getElementsByClass('Chord'):
        c.closedPosition(forceOctave=4, inPlace=True)
        
    #Analyze roman numerals
    bKey = None
    for keychange in melodyPart.recurse().getElementsByClass(key.Key):
        #Get key of piece (for chord context)
        bKey = keychange
    for c in bChords.recurse().getElementsByClass('Chord'):
        rn = roman.romanNumeralFromChord(c, bKey)
        c.addLyric(str(rn.figure))
    
    #Collect chord sequence in a list
    chordSequence = []
    for c in bChords.flat:
        if 'Chord' not in c.classes:
            continue
        chordSequence.append(c.lyric)
        all_chords.append(c.lyric) #keep a running total of all chords seen
    
    chorales_chords_train.append(chordSequence) #store this piece's chord sequence in master list
    
    
    #Extract note pitches and rhythms
    # --------------------------------
    noteSequence = [] #series of all notes in order, from start to end of piece
    for n in melodyNotes.recurse():  # iterates through all notes in the piece
        midiNote = n.pitch.midi
        noteDuration = n.duration.quarterLength
        noteTuple = (midiNote, noteDuration) # (MIDI_pitch, note_duration)
        
        # Keep track of all notes and durations seen thus far
        all_durations.append(noteDuration) 
        all_notes.append(midiNote)
            
        noteSequence.append(noteTuple) # appends the next time step to the sample
        
    chorales_notes_train.append(noteSequence) #store this piece's sequence of notes in master list

In [None]:
#View durations frequencies of all chorales
durations_dict = Counter(all_durations)
durations_dict

In [None]:
plt.figure(figsize=(14,8))
plt.bar(range(len(durations_dict)), list(durations_dict.values()), align='center', color='blue')
plt.xticks(range(len(durations_dict)), list(durations_dict.keys()))
plt.xlabel("Quarter_length", fontsize=18)
plt.ylabel("Frequency", fontsize=18)
plt.title("Bach Chorale Note Length Distribution", fontsize=22)
plt.savefig('../figures/note_duration_distr.png', dpi=300)
plt.show()

In [None]:
#View pitch MIDI value frequencies of all chorale soprano parts
notes_dict = Counter(all_notes)
sorted(notes_dict)

In [None]:
plt.figure(figsize=(14,8))
plt.bar(range(len(notes_dict)), list(notes_dict.values()), align='center', color='blue')
plt.xticks(range(len(notes_dict)), list(notes_dict.keys()))
plt.xlabel("MIDI Pitch Value")
plt.ylabel("Frequency")
plt.title("Bach Chorale MIDI Pitch")

plt.show()

In [None]:
chorales_notes_train[0]

In [None]:
print(f"Number of chorales analyzed: {len(chorales_notes_train)}")

In [None]:
#Visualize chords analyzed in corpus

chorales_chords_train[0]

In [None]:
#View chord counts over the entire corpus
chords_dict = Counter(all_chords)
chords_dict

In [None]:
print("Number of distinct chords in corpus: ", len(chords_dict.keys()))

In [None]:
plt.figure(figsize=(16,8))
plt.bar(range(len(chords_dict)), list(chords_dict.values()), align='center', color='green')
plt.xticks(range(len(chords_dict)), list(chords_dict.keys()))
plt.xlabel("Chords")
plt.ylabel("Frequency")
plt.title("Bach Chorale Chords (Roman Numerals)")

plt.show()

In [None]:
with open('pickles/starting_duration_likelihood.pickle', 'rb') as f:
    starting_duration_likelihood = pickle.load(f)
    
with open('pickles/starting_pitch_likelihood.pickle', 'rb') as f:
    starting_pitch_likelihood = pickle.load(f)

In [None]:
plt.figure(figsize=(16, 8))
plt.bar(range(len(starting_duration_likelihood)), list(starting_duration_likelihood.values()), color="blue")
plt.xticks(range(len(starting_duration_likelihood.keys())), list(starting_duration_likelihood.keys()))
plt.xlabel("Note Duration")
plt.ylabel("Frequency")
plt.title("Bach Chorale Starting Note Durations")
plt.savefig('../figures/start_duration_distr.png', dpi=300)
plt.show()

In [None]:
plt.figure(figsize=(16,8))
plt.bar(range(len(starting_pitch_likelihood)), list(starting_pitch_likelihood.values()), color="blue")
plt.xticks(range(len(starting_pitch_likelihood.keys())), list(starting_pitch_likelihood.keys()))
plt.xlabel("MIDI Pitch")
plt.ylabel("Frequency")
plt.title("Bach Chorale Starting Note Pitches")
plt.savefig('../figures/start_pitch_distr.png', dpi=300)
plt.show()