In [2]:
from music21 import *
import collections
import numpy as np
import pandas as pd
import gzip

In [None]:
us = environment.UserSettings()
us['musicxmlPath'] = 'C:/Program Files/MuseScore 3/bin/MuseScore3.exe'
us['musescoreDirectPNGPath'] = 'C:/Program Files/MuseScore 3/bin/MuseScore3.exe'

### Get corpus and parse to pieces

In [3]:
coreCorpus = corpus.corpora.CoreCorpus()
sixEight = corpus.search('6/8')

bachCorpusScores = []
for c in sixEight:
    score = c.parse()
    bachCorpusScores.append(score)

### Collect notes from a single piece

In [None]:
sBach = corpus.parse('bach/bwv57.8')

In [None]:
len(sBach.getElementsByClass(stream.Part))

measures = len(sBach.getElementsByClass(stream.Part)[0].getElementsByClass(stream.Measure))
# Get a part of the piece
noteIterator = sBach.parts[0].getElementsByClass(stream.Measure).flat.getElementsByClass('Note')
allNotes = []

for el in noteIterator:
    if('-' in el.nameWithOctave):
        noteName = el.pitch.getEnharmonic().nameWithOctave
        allNotes.append(noteName)
    else:
        noteName = el.nameWithOctave
        allNotes.append(noteName)

# Collect notes from a corpus

In [4]:
possibleNotes = set() # set containing all possible notes for matrix creation
allNotesPerPiece = [] # Multidimensional array of all notes per piece
for p in bachCorpusScores:
    currNotes = []
    measures = len(p.getElementsByClass(stream.Part)[0].getElementsByClass(stream.Measure))
    # Get a part of the piece
    noteIterator = p.parts[0].getElementsByClass(stream.Measure).flat.getElementsByClass('Note')
    if(len(noteIterator) == 0):
        continue
    for el in noteIterator:
        pitchName = el.nameWithOctave
        if('-' in pitchName or '##' in pitchName):
            noteName = el.pitch.getEnharmonic().nameWithOctave
            possibleNotes.add(noteName)
            currNotes.append(noteName)
        else:
            noteName = el.nameWithOctave
            possibleNotes.add(noteName)
            currNotes.append(noteName)
            
    allNotesPerPiece.append(currNotes)

In [None]:
flatten = lambda l: [item for sublist in l for item in sublist]
flatten(allNotesPerPiece)

In [12]:
# Get frequency array
counter = collections.Counter(flatten(allNotesPerPiece))


In [13]:
# Check if no empty pieces
print(len(allNotesPerPiece))
print(len(list(filter(lambda x: x > 0, map(len, allNotesPerPiece)))))


2158
2158


### Create frequency matrix

In [8]:
# Get frequency array
counter = collections.Counter(flatten(allNotesPerPiece))
# Initial note counter
counter[' '] = len(allNotesPerPiece)

noteRows = possibleNotes.add(' ')

zeros = np.full((len(possibleNotes), len(possibleNotes)), 0)
matrix = pd.DataFrame(zeros, index=possibleNotes, columns=possibleNotes)
matrix = matrix.astype(float)

for allNotes in allNotesPerPiece:
    # Fill transition matrix frequencies
    for i in range(len(allNotes)+1):
        # First note
        if(i == 0):
            matrix[' '][allNotes[i]] = matrix[' '][allNotes[i]] + 1
            continue
        # Last note
        if(i == len(allNotes)):
            matrix[allNotes[i-1]][' '] = matrix[allNotes[i-1]][' '] + 1
            continue

        currNote = allNotes[i-1]
        nextNote = allNotes[i]

        matrix[currNote][nextNote] = matrix[currNote][nextNote] + 1


In [9]:
### Divide each row to get probabilistic model
for i in possibleNotes:
    for j in possibleNotes:
        matrix[j][i] = matrix[j][i] / counter[j]

In [15]:
# Check if p sum up to one
matrix.sum(0)

C#4    1.0
D#4    1.0
F3     1.0
A#6    1.0
D#6    1.0
C5     1.0
F4     1.0
D6     1.0
C6     1.0
G#5    1.0
A3     1.0
G5     1.0
C#7    1.0
G3     1.0
B5     1.0
B3     1.0
A6     1.0
       1.0
E4     1.0
D#5    1.0
B#3    1.0
A4     1.0
E#4    1.0
E6     1.0
C#5    1.0
E5     1.0
C3     1.0
A#5    1.0
D#3    1.0
E3     1.0
A#4    1.0
C7     1.0
F5     1.0
D3     1.0
B6     1.0
F#6    1.0
B#4    1.0
G#6    1.0
A#3    1.0
C#6    1.0
B#5    1.0
D4     1.0
F#5    1.0
G#3    1.0
F6     1.0
G6     1.0
G4     1.0
F#3    1.0
E#6    1.0
A5     1.0
E#5    1.0
F#4    1.0
G#4    1.0
B4     1.0
D5     1.0
C4     1.0
dtype: float64

In [16]:
# Export to csv
matrix.to_csv(path_or_buf="./transition-matrix.csv")