In [None]:
from music21 import *
import collections
import numpy as np
import pandas as pd
import gzip

In [None]:
us = environment.UserSettings()
us['musicxmlPath'] = 'C:/Program Files/MuseScore 3/bin/MuseScore3.exe'
us['musescoreDirectPNGPath'] = 'C:/Program Files/MuseScore 3/bin/MuseScore3.exe'

### Get corpus and parse to pieces

In [None]:
coreCorpus = corpus.corpora.CoreCorpus()
sixEight = corpus.search('6/8')

bachCorpusScores = []
for c in sixEight:
    score = c.parse()
    bachCorpusScores.append(score)

### Collect notes from a single piece

In [None]:
sBach = corpus.parse('bach/bwv57.8')

In [None]:
len(sBach.getElementsByClass(stream.Part))

measures = len(sBach.getElementsByClass(stream.Part)[0].getElementsByClass(stream.Measure))
# Get a part of the piece
noteIterator = sBach.parts[0].getElementsByClass(stream.Measure).flat.getElementsByClass('Note')
allNotes = []

for el in noteIterator:
    if('-' in el.nameWithOctave):
        noteName = el.pitch.getEnharmonic().nameWithOctave
        allNotes.append(noteName)
    else:
        noteName = el.nameWithOctave
        allNotes.append(noteName)

# Collect notes from a corpus

In [90]:
possibleNotes = set() # set containing all possible notes for matrix creation
allNotesPerPiece = [] # Multidimensional array of all notes per piece
for p in bachCorpusScores:
    currNotes = []
    measures = len(p.getElementsByClass(stream.Part)[0].getElementsByClass(stream.Measure))
    # Get a part of the piece
    noteIterator = p.parts[0].getElementsByClass(stream.Measure).flat.getElementsByClass('Note')
    if(len(noteIterator) == 0):
        continue
    for el in noteIterator:
        pitchName = el.nameWithOctave
        if('-' in pitchName or '##' in pitchName):
            noteName = el.pitch.getEnharmonic().nameWithOctave
            possibleNotes.add(noteName)
            currNotes.append(noteName)
        else:
            noteName = el.nameWithOctave
            possibleNotes.add(noteName)
            currNotes.append(noteName)
            
    allNotesPerPiece.append(currNotes)

In [None]:
flatten = lambda l: [item for sublist in l for item in sublist]
flatten(allNotesPerPiece)

In [91]:
# Get frequency array
counter = collections.Counter(flatten(allNotesPerPiece))


In [92]:
# Check if no empty pieces
print(len(allNotesPerPiece))
print(len(list(filter(lambda x: x > 0, map(len, allNotesPerPiece)))))


2158
2158


### Create frequency matrix

In [98]:
# Get frequency array
counter = collections.Counter(flatten(allNotesPerPiece))
# Initial note counter
counter[' '] = len(allNotesPerPiece)

noteRows = possibleNotes.add(' ')

zeros = np.full((len(possibleNotes), len(possibleNotes)), 0)
matrix = pd.DataFrame(zeros, index=possibleNotes, columns=possibleNotes)
matrix = matrix.astype(float)

for allNotes in allNotesPerPiece:
    # Fill transition matrix frequencies
    for i in range(len(allNotes)+1):
        # First note
        if(i == 0):
            matrix[' '][allNotes[i]] = matrix[' '][allNotes[i]] + 1
            continue
        # Last note
        if(i == len(allNotes)):
            matrix[allNotes[i-1]][' '] = matrix[allNotes[i-1]][' '] + 1
            continue

        currNote = allNotes[i-1]
        nextNote = allNotes[i]

        matrix[currNote][nextNote] = matrix[currNote][nextNote] + 1


In [99]:
### Divide each row to get probabilistic model
for i in possibleNotes:
    for j in possibleNotes:
        matrix[j][i] = matrix[j][i] / counter[j]

In [107]:
# Check if p sum up to one
matrix.sum(1)

A3     1.827102
E#6    0.019851
F5     0.609683
C4     1.318996
F#4    1.353596
B#4    0.000367
E#5    0.122903
C7     0.166667
D#5    0.992001
B4     1.545471
F#3    0.487904
A#5    0.507488
A4     2.639199
G6     1.865281
B#3    0.005101
B6     0.262331
G#6    1.204058
C5     1.135592
E#4    0.006072
B3     1.794710
C6     0.718268
C#7    0.166667
C#4    1.027662
D5     2.480647
D#3    0.123049
C#5    1.045989
E6     1.279134
A6     0.738204
G#5    0.496701
G#4    0.297665
A#3    0.630199
F3     0.690065
F#5    1.788985
D6     1.033378
E4     1.911423
A5     1.924106
B5     0.964886
G3     2.284740
E3     1.288508
D#4    0.437788
F#6    1.404401
A#4    0.539282
D3     0.411561
F4     0.846327
C3     0.553731
G5     1.666921
E5     1.854352
       0.454691
D#6    1.100008
B#5    0.206116
F6     0.957146
G#3    0.641316
G4     1.898486
C#6    1.519213
A#6    0.286755
D4     2.467257
dtype: float64

In [101]:
matrix.to_csv(path_or_buf="./transition-matrix.csv")