In [1]:
from music21 import *

In [2]:
import numpy as np
import pandas as pd
import os

In [3]:
environment.set('autoDownload','allow')
environment.set("musescoreDirectPNGPath", "C:/Program Files/MuseScore 3/bin/MuseScore3.exe")
environment.set("musicxmlPath", "C:/Program Files/MuseScore 3/bin/MuseScore3.exe")

Read in one score to test the process

In [4]:
filepath='data/classical/chopin/prelude/'

In [5]:
s=converter.parse(filepath+'prelude28-01.krn')

In [7]:
features.jSymbolic.OverallDynamicRangeFeature(s).extract().vector

[0]

In [8]:
#average time between attacks in seconds between Note On events regardless of channel
fe = features.jSymbolic.AverageTimeBetweenAttacksFeature(s)
f = fe.extract()
f.vector

[0.11453488370930233]

In [9]:
#initial tempo
fe = features.jSymbolic.InitialTempoFeature(s)
f = fe.extract()
f.vector  

[80.0]

In [10]:
#note density (average number of notes per second, taking local tempo into account)
fe = features.jSymbolic.NoteDensityFeature(s)
f = fe.extract()
f.vector

[15.595959597534945]

In [11]:
#range (semitones) between highest and lowest pitch
fe = features.jSymbolic.RangeFeature(s)
fe.extract().vector

[55]

In [12]:
#woodwinds fraction
#fe = features.jSymbolic.WoodwindsFractionFeature(s)
#fe.extract().vector
features.jSymbolic.WoodwindsFractionFeature(s).extract().vector

[0.0]

In [13]:
#strings fraction
#fe = features.jSymbolic.OrchestralStringsFractionFeature(s)
#fe.extract().vector
features.jSymbolic.OrchestralStringsFractionFeature(s).extract().vector[0]

0.0

In [14]:
#percussion prevalence (proportion of Note Ons that are unpitched percussion)
#fe = features.jSymbolic.PercussionPrevalenceFeature(s)
#fe.extract().vector
features.jSymbolic.PercussionPrevalenceFeature(s).extract().vector[0]

0

In [15]:
#string keyboard prevalence (proportion of Note Ons that are unpitched percussion)
#fe = features.jSymbolic.StringKeyboardFractionFeature(s)
#fe.extract().vector[0]
features.jSymbolic.StringKeyboardFractionFeature(s).extract().vector[0]

1.0

In [16]:
s.metadata.title

'Prelude in C Minor, Op. 28, No. 1'

In [17]:
s.metadata.composer

'Chopin, Frederic'

In [18]:
s.metadata.date

'None'

In [20]:
s.metadata.all()

[('composer', 'Chopin, Frederic'),
 ('copyright', 'Copyright 2008 by Craig Stuart Sapp'),
 ('electronic encoder', 'Craig Stuart Sapp'),
 ('movementName', 'Agitato'),
 ('number', 'No. 1'),
 ('opusNumber', 'Op. 28'),
 ('title', 'Prelude in C Minor, Op. 28, No. 1')]

Read in csv containing list of works (path, krn file name, and spotify ID)

In [39]:
workList = pd.read_excel("workList.xlsx")

In [40]:
workList

Unnamed: 0,include,folder,file,spotifyID
0,1,adam,fimaris.krn,0XKQIVQ97pPTJg2uDEivD4
1,1,alkan/op38,02-fa.krn,3jS5RqvjbgnThgzu9iqDD8
2,1,bach/violin,partita2-1.krn,2shAgoNd95hE6pAjAxxor7
3,1,bachcpe/vol01,Wq117-37.krn,0LSMmwJL5Nym0L7Vxt2FeH
4,1,beethoven/piano/sonata,sonata08-2.krn,4l5zWNPyET6PqBhxQEyKgt
5,1,bononcini,perlagloria.krn,2PhaOWw5VeChQCJcDBXpfD
6,1,brahms/op39,op39-02.krn,3j4FRAKjO93gupOyzvLjWM
7,1,buxtehude/op1,op1-3-1.krn,3xBWKAOczzfaLalXRzxntl
8,1,byrd,aveverum.krn,7J6b58JOnf4RGGqJmxq0bD
9,1,chopin/scherzo,scherzo2.krn,6OwavBgVS1N4lAXZ2zKUfc


Note: instrument families commented out for now because they are not being calculated correctly for all files (some have no instruments encoded in krn)

In [7]:
#test code: insert composer name where it did not get populated from krn file
sPetrus=converter.parse('data/classical/petrus/annun.krn')
sPetrus.metadata.all()

[('attributed composer', 'Petrus de Cruce; Pierre de la Croix'),
 ('title', 'Motet: Aucun - Lonc tans - Aunnuntiantes')]

In [9]:
sPetrus.metadata.composer='Petrus de Cruce'

In [10]:
sPetrus.metadata.all()

[('attributed composer', 'Petrus de Cruce; Pierre de la Croix'),
 ('composer', 'Petrus de Cruce'),
 ('title', 'Motet: Aucun - Lonc tans - Aunnuntiantes')]

In [11]:
sSusato=converter.parse('data/classical/susato/danserye/reihentaenze-07.krn')
sSusato.metadata.all()

[('arranger', 'Susato, Tielman'),
 ('electronic encoder', 'Craig Stuart Sapp'),
 ('movementNumber', '7. Herkulestanz'),
 ('parentTitle', 'Het Derde Mussyck Boexken, Vol 3'),
 ('title', 'Reihent&auml;nze Branlen')]

In [12]:
sSusato.metadata.composer='Susato, Tielman'
sSusato.metadata.all()

[('arranger', 'Susato, Tielman'),
 ('composer', 'Susato, Tielman'),
 ('electronic encoder', 'Craig Stuart Sapp'),
 ('movementNumber', '7. Herkulestanz'),
 ('parentTitle', 'Het Derde Mussyck Boexken, Vol 3'),
 ('title', 'Reihent&auml;nze Branlen')]

In [41]:
#calculate features and get metadata for each piece
featureList=[]
for index, row in workList.iterrows():
    if row['include']==1:
        fp='data/classical/'+row['folder']+'/' + row['file']
        print(fp)
        s = converter.parse(fp)
        
        if row['folder']=='susato\danserye':
            s.metadata.composer='Susato, Tielman'
        elif row['folder']=='petrus':
            s.metadata.composer='Petrus de Cruce'
           
        composer=s.metadata.composer
        title=s.metadata.title
        date=s.metadata.date
        spotifyID=row['spotifyID']

        #average number of independent voices sounding simultaneously
        avgNumVoices = features.jSymbolic.AverageNumberOfIndependentVoicesFeature(s).extract().vector[0]

        #key and type (e.g. C major)
        key = s.analyze('key')
        
        #key type
        keyType=key.type
        
        #number of sharps or flats (flats will be negative, sharps positive)
        sharpsOrFlats=abs(key.sharps)

        #initial tempo
        tempo = features.jSymbolic.InitialTempoFeature(s).extract().vector[0]
        
        #initial time signature
        timeSig = features.jSymbolic.InitialTimeSignatureFeature(s).extract().vector
        
        #note density (average number of notes per second, taking local tempo into account)
        noteDensity = features.jSymbolic.NoteDensityFeature(s).extract().vector[0]

        #proportions of note onsets from different instrument families
        #note - this is not working for all the scores because some krn files do not have instrument labels
        #stringsP = features.jSymbolic.OrchestralStringsFractionFeature(s).extract().vector[0]
        #keysP = features.jSymbolic.StringKeyboardFractionFeature(s).extract().vector[0]
        #woodwindsP = features.jSymbolic.WoodwindsFractionFeature(s).extract().vector[0]
        #percussionP = features.jSymbolic.PercussionPrevalenceFeature(s).extract().vector[0]
        #TBD: vocals
        #featureList.append([composer,title,date, avgNumVoices, keyType, sharpsOrFlats, tempo,
        #                   timeSig, noteDensity, stringsP, keysP, woodwindsP, percussionP, spotifyID])
        featureList.append([composer,title,date, avgNumVoices, keyType, sharpsOrFlats, tempo,
                            timeSig, noteDensity])

data/classical/adam/fimaris.krn
data/classical/alkan/op38/02-fa.krn
data/classical/bach/violin/partita2-1.krn
data/classical/bachcpe/vol01/Wq117-37.krn
data/classical/beethoven/piano/sonata/sonata08-2.krn
data/classical/bononcini/perlagloria.krn
data/classical/brahms/op39/op39-02.krn
data/classical/buxtehude/op1/op1-3-1.krn
data/classical/byrd/aveverum.krn
data/classical/chopin/scherzo/scherzo2.krn
data/classical/clementi/op36/sonatina-36-3-1.krn
data/classical/dufay/omnes_amici.krn
data/classical/dunstable/veni.krn
data/classical/faure/apres.krn
data/classical/flecha/bomba.krn
data/classical/frescobaldi\canzoni/canzoni14.krn
data/classical/gabrieli/mysterium.krn
data/classical/gabrielia\contrafacta/coppini23.krn
data/classical/gibbons/silverswan.krn
data/classical/giovannelli\contrafacta/coppini15.krn
data/classical/grieg\op46/op46-4.krn
data/classical/handel/largo.krn
data/classical/haydn\keyboard\uesonatas/sonata33-3.krn
data/classical/haydnm\6menutets/menuet1.krn
data/classical/hof

In [42]:
#create dataframe, export to csv and JSON
df=pd.DataFrame(featureList, columns=['composer','title', 'date','avgNumVoices','keyTypes','sharpsOrFlats', 
                                      'tempo','timeSig','noteDensity'])


In [43]:
df

Unnamed: 0,composer,title,date,avgNumVoices,keyTypes,sharpsOrFlats,tempo,timeSig,noteDensity
0,Adam de la Halle,Fi Maris de vostre Amour,,3.0,minor,1,80.0,"[3, 4]",3.764706
1,"Alkan, Charles-Valentin","Fa, Op. 38, No. 2",,2.879581,minor,1,72.0,"[3, 8]",11.781818
2,"Bach, Johann Sebastian",Movement 1: Allemande,,1.0,minor,1,94.0,"[4, 4]",6.822581
3,"Bach, Carl Philipp Emanuel",La Gause,,1.694444,major,1,110.0,"[2, 4]",5.991071
4,"Beethoven, Ludwig van",Piano Sonata no. 8 in C minor,,1.561379,major,4,40.0,"[2, 4]",5.465753
5,"Bononcini, Giovanni",For the love my heart doth prize,,2.001898,major,2,80.0,"[3, 4]",4.703108
6,"Brahms, Johannes","Waltz in E Major, Op.39 No.2",,0.419355,major,4,172.0,"[3, 4]",6.652137
7,"Buxtehude, Dietrich","Sonata in A Minor, Op. 1, No. 3",,2.396226,minor,0,72.0,"[4, 4]",3.785714
8,"Byrd, William",Ave verum corpus,,3.260274,minor,0,120.0,"[4, 4]",5.041667
9,"Chopin, Frederic",Scherzo in B-flat Minor,,1.354289,major,7,300.0,"[3, 4]",10.08959


In [32]:
#replace special characters
#TBD: get these html codes to display correctly in p5.js instead of replacing them with English letters 
df.title = df.title.str.replace('&egrave;', 'e') 
df.title = df.title.str.replace('&ecirc;', 'e') 
df.title = df.title.str.replace('&ouml;', 'o') 
df.title = df.title.str.replace('&szlig;','ss')
df.title = df.title.str.replace('&auml;','ae')
df.title = df.title.str.replace('&agrave;','a')

In [33]:
df.composer=df.composer.str.replace('&aacute;','a')
df.composer=df.composer.str.replace('&oacute;','o')

In [34]:
df

Unnamed: 0,composer,title,date,avgNumVoices,keyTypes,sharpsOrFlats,tempo,timeSig,noteDensity
0,Adam de la Halle,Fi Maris de vostre Amour,,3.0,minor,1,80.0,"[3, 4]",3.764706
1,"Alkan, Charles-Valentin","Fa, Op. 38, No. 2",,2.879581,minor,1,72.0,"[3, 8]",11.781818
2,"Bach, Johann Sebastian",Movement 1: Allemande,,1.0,minor,1,94.0,"[4, 4]",6.822581
3,"Bach, Carl Philipp Emanuel",La Gause,,1.694444,major,1,110.0,"[2, 4]",5.991071
4,"Beethoven, Ludwig van",Piano Sonata no. 8 in C minor,,1.561379,major,4,40.0,"[2, 4]",5.465753
5,"Bononcini, Giovanni",For the love my heart doth prize,,2.001898,major,2,80.0,"[3, 4]",4.703108
6,"Brahms, Johannes","Waltz in E Major, Op.39 No.2",,0.419355,major,4,172.0,"[3, 4]",6.652137
7,"Buxtehude, Dietrich","Sonata in A Minor, Op. 1, No. 3",,2.396226,minor,0,72.0,"[4, 4]",3.785714
8,"Byrd, William",Ave verum corpus,,3.260274,minor,0,120.0,"[4, 4]",5.041667
9,"Chopin, Frederic",Scherzo in B-flat Minor,,1.354289,major,7,300.0,"[3, 4]",10.08959


In [25]:
df.to_csv('features.csv',index=False)

In [26]:
df.to_json('features.json',orient='records')

In [45]:
#filelist=[]
#i=1
#for (dirpath, dirnames, filenames) in os.walk(path):
#    for file in filenames:
#            name, ext = os.path.splitext(file)
#            if (ext == '.krn'):            
#                #filelist += [os.path.join(dirpath, file)]
#                i+=1
#                #select every 100th file for now (TBD: pick 1 at random per composer)
#                if (np.mod(i,100)==0 & i<600):
#                    filelist += [[i,dirpath,file]]
#                    s = converter.parse(dirpath + "/" + file)                  