In [1]:
from music21 import *

In [2]:
import numpy as np
import pandas as pd
import os

In [3]:
environment.set('autoDownload','allow')
environment.set("musescoreDirectPNGPath", "C:/Program Files/MuseScore 3/bin/MuseScore3.exe")
environment.set("musicxmlPath", "C:/Program Files/MuseScore 3/bin/MuseScore3.exe")

Read in one score to test the process

In [6]:
filepath='data/classical/chopin/prelude/'

In [7]:
s=converter.parse(filepath+'prelude28-01.krn')

In [37]:
features.jSymbolic.OverallDynamicRangeFeature(s).extract().vector

[0]

In [19]:
#average time between attacks in seconds between Note On events regardless of channel
fe = features.jSymbolic.AverageTimeBetweenAttacksFeature(s)
f = fe.extract()
f.vector

[0.11453488370930233]

In [23]:
#initial tempo
fe = features.jSymbolic.InitialTempoFeature(s)
f = fe.extract()
f.vector  

[80.0]

In [24]:
#note density (average number of notes per second, taking local tempo into account)
fe = features.jSymbolic.NoteDensityFeature(s)
f = fe.extract()
f.vector

[15.595959597534945]

In [26]:
#range (semitones) between highest and lowest pitch
fe = features.jSymbolic.RangeFeature(s)
fe.extract().vector

[55]

In [126]:
#woodwinds fraction
#fe = features.jSymbolic.WoodwindsFractionFeature(s)
#fe.extract().vector
features.jSymbolic.WoodwindsFractionFeature(s).extract().vector

[0.0]

In [123]:
#strings fraction
#fe = features.jSymbolic.OrchestralStringsFractionFeature(s)
#fe.extract().vector
features.jSymbolic.OrchestralStringsFractionFeature(s).extract().vector[0]

0.0

In [124]:
#percussion prevalence (proportion of Note Ons that are unpitched percussion)
#fe = features.jSymbolic.PercussionPrevalenceFeature(s)
#fe.extract().vector
features.jSymbolic.PercussionPrevalenceFeature(s).extract().vector[0]

0

In [125]:
#string keyboard prevalence (proportion of Note Ons that are unpitched percussion)
#fe = features.jSymbolic.StringKeyboardFractionFeature(s)
#fe.extract().vector[0]
features.jSymbolic.StringKeyboardFractionFeature(s).extract().vector[0]

1.0

In [132]:
s.metadata.title

'Piano Sonata no. 28 in A major'

In [133]:
s.metadata.composer

'Beethoven, Ludwig van'

In [8]:
s.metadata.date

'None'

Read in csv containing list of works (path, krn file name, and spotify ID)

In [4]:
workList = pd.read_excel("workList.xlsx")

In [5]:
workList

Unnamed: 0,include,folder,file,spotifyID
0,1,adam,fimaris.krn,0XKQIVQ97pPTJg2uDEivD4
1,0,alkan/op38,02-fa.krn,3jS5RqvjbgnThgzu9iqDD8
2,1,bach/violin,partita2-1.krn,2shAgoNd95hE6pAjAxxor7
3,0,bachcpe/vol01,Wq117-37.krn,0LSMmwJL5Nym0L7Vxt2FeH
4,1,beethoven/piano/sonata,sonata08-2.krn,4l5zWNPyET6PqBhxQEyKgt
5,1,bononcini,perlagloria.krn,2PhaOWw5VeChQCJcDBXpfD
6,0,brahms/op39,op39-02.krn,3j4FRAKjO93gupOyzvLjWM
7,1,buxtehude/op1,op1-3-1.krn,3xBWKAOczzfaLalXRzxntl
8,1,byrd,aveverum.krn,7J6b58JOnf4RGGqJmxq0bD
9,1,chopin/scherzo,scherzo2.krn,6OwavBgVS1N4lAXZ2zKUfc


Note: excluding some pieces because they are generating errors calculating the instrument families
(no instruments encoded in humdrum)

In [7]:
#calculate features and get metadata for each piece
featureList=[]
for index, row in workList.iterrows():
    if row['include']==1:
        fp='data/classical/'+row['folder']+'/' + row['file']
        print(fp)
        s = converter.parse(fp)
        composer=s.metadata.composer
        title=s.metadata.title
        date=s.metadata.date
        spotifyID=row['spotifyID']

        #average number of independent voices sounding simultaneously
        avgNumVoices = features.jSymbolic.AverageNumberOfIndependentVoicesFeature(s).extract().vector[0]

        #key and type (e.g. C major)
        key = s.analyze('key')
        
        #key type
        keyType=key.type
        
        #number of sharps or flats (flats will be negative, sharps positive)
        sharpsOrFlats=abs(key.sharps)

        #key signature (e.g. 1 sharp)
        #keySignature= s.getElementsByClass(stream.Part)[0].getElementsByClass(stream.Measure)[0].getElementsByClass(stream.key.KeySignature)[0]
        
        #initial tempo
        tempo = features.jSymbolic.InitialTempoFeature(s).extract().vector[0]
        
        #initial time signature
        timeSig = features.jSymbolic.InitialTimeSignatureFeature(s).extract().vector
        
        #note density (average number of notes per second, taking local tempo into account)
        noteDensity = features.jSymbolic.NoteDensityFeature(s).extract().vector[0]

        #proportions of note onsets from different instrument families
        stringsP = features.jSymbolic.OrchestralStringsFractionFeature(s).extract().vector[0]
        keysP = features.jSymbolic.StringKeyboardFractionFeature(s).extract().vector[0]
        woodwindsP = features.jSymbolic.WoodwindsFractionFeature(s).extract().vector[0]
        percussionP = features.jSymbolic.PercussionPrevalenceFeature(s).extract().vector[0]
        #TBD: vocals
        featureList.append([composer,title,date, avgNumVoices, keyType, sharpsOrFlats, tempo,
                            timeSig, noteDensity, stringsP, keysP, woodwindsP, percussionP, spotifyID])

data/classical/adam/fimaris.krn


NameError: name 'spotifyID' is not defined

In [25]:
#create dataframe, export to csv and JSON
df=pd.DataFrame(featureList, columns=['composer','title', 'date','avgNumVoices','keyTypes','sharpsOrFlats', 
                                      'tempo','timeSig','noteDensity','stringsP','keysP','woodwindsP','percussionP',
                                      'spotifyID'])
df.to_csv('features.csv',index=False)
df.to_json('features.json',orient='records')

In [45]:
#filelist=[]
#i=1
#for (dirpath, dirnames, filenames) in os.walk(path):
#    for file in filenames:
#            name, ext = os.path.splitext(file)
#            if (ext == '.krn'):            
#                #filelist += [os.path.join(dirpath, file)]
#                i+=1
#                #select every 100th file for now (TBD: pick 1 at random per composer)
#                if (np.mod(i,100)==0 & i<600):
#                    filelist += [[i,dirpath,file]]
#                    s = converter.parse(dirpath + "/" + file)                  