In [13]:
from music21 import *
import numpy as np
import pandas as pd
import os
import shutil

Read in csv containing list of works (path, krn file name, etc.)

In [14]:
workList = pd.read_excel("workList.xlsx")

In [15]:
#workList

In [None]:
#calculate features and get metadata for each piece
featureList=[]

for index, row in workList.iterrows():
    if row['include']==1:
        file=row['file']
        folder=row['folder']
        #location of original krn file
        fp='data/classical/' + folder + '/' + file
        print(fp)
        
        filename = folder.split("/")[0] + '_' + file
        #copy the ones we are using into the corpus folder with composer name as a prefix
        if os.path.exists("corpus/kernfiles/" + filename)==False:
            shutil.copyfile(fp, "corpus/kernfiles/" + filename)
        
        #parse file and get flattened score
        s = converter.parse(fp)
        sflat=s.flat
        
        #export midi
        #filename=file.split('.')[0]
        #filename = folder.split("/")[0] + '_' + file
        midiFileName = filename.split('.')[0] + '.mid'
        midifp='docs/midi/' + midiFileName
        if os.path.exists(midifp)==False:
            midifp=s.write('midi',midifp)
               
        ####manual edits
        #insert missing composer info
        #for Susato piece, insert missing instrumentation (SATB recorders from .krn not recognized)
        #music21 does not distinguish between type of recorder (SATB) so just assign all parts as recorder.
    
        if folder == 'susato/danserye' and file =='reihentaenze-07.krn':
            s.metadata.composer='Susato, Tielman'
            for p in s.parts.stream():
                p.insert(instrument.Recorder())
        elif folder =='petrus' and file =='annun.krn':
            s.metadata.composer='Petrus de Cruce'
      
          
        composer=s.metadata.composer
        title=s.metadata.title
        date=s.metadata.date
        spotifyID=row['spotifyID']

        #average number of independent voices sounding simultaneously
        avgNumVoices = round(features.jSymbolic.AverageNumberOfIndependentVoicesFeature(s).extract().vector[0],2)
        
        #key and type (e.g. C major)
        key = s.analyze('key')
        
        #key type
        keyType=key.type
        
        #number of sharps or flats (flats will be negative, sharps positive)
        sharpsOrFlats=abs(key.sharps)

        #initial tempo
        tempo = features.jSymbolic.InitialTempoFeature(s).extract().vector[0]
        
        #initial time signature
        timeSig = features.jSymbolic.InitialTimeSignatureFeature(s).extract().vector
        
        #note density (average number of notes per second, taking local tempo into account)
        noteDensity = round(features.jSymbolic.NoteDensityFeature(s).extract().vector[0],2)
        
        #1 if the initial meter is compound, 0 otherwise
        compoundMeter = features.jSymbolic.CompoundOrSimpleMeterFeature(s).extract().vector[0]
        
        #1 if there was at least one meter change, 0 otherwise
        meterChanges = features.jSymbolic.ChangesOfMeterFeature(s).extract().vector[0]
        
        #length of piece in minutes
        minutes = round(sflat.seconds/60,2)
        
        #number of parts
        nParts=len(s.parts)
        
        #list of parts as text
        partList=[]
        for p in s.parts.stream():
            partList.append(p.partName)
        #print(partList)
        
        #Create indicators for instrument families

        if len(sflat.getElementsByClass(instrument.StringInstrument))>0:
            strings=1
        else:
            strings=0
            
        if len(sflat.getElementsByClass(instrument.WoodwindInstrument))>0:
            woodwinds=1
        else:
            woodwinds=0
            
        if len(sflat.getElementsByClass(instrument.BrassInstrument))>0:
            brass=1
        else:
            brass=0
            
        if len(sflat.getElementsByClass(instrument.Vocalist))>0:
            voice=1
        else:
            voice=0
            
        if len(sflat.getElementsByClass(instrument.KeyboardInstrument))>0:
            keys=1
        else:
            keys=0
            
        if len(sflat.getElementsByClass(instrument.Percussion))>0:
            percussion=1
        else:
            percussion=0
              
        #this piece has a hurdy-gurdy, which is not a standard music21 instrument.
        #should it be keys or strings?
        if folder == 'vaqueiras' and file =='kalenda_maya.krn':
            keys=1;
        
        #print('strings:',strings,', keys:',keys,', woodwinds:',woodwinds,', percussion:',percussion,
        #     ', voice:',voice)
        
         
        #percentage of notes that are accidentals (max across parts)
        #range in semitones within each part (max across parts)
        p_accidentals_list=[]
        partRange_list=[]
        duration_list=[]
        
        for part in s.parts:
            acc=0
            notes=0
            partRange=part.analyze('ambitus').semitones
            partRange_list.append(partRange)
            for n in part.recurse().getElementsByClass('Note'):
                if n.pitch.accidental is None:
                    acc+=0
                else: 
                    acc+=1
                notes+=1
                duration_list.append(n.duration.quarterLength)
            if notes>0:
                p_accidentals_list.append(round(acc/notes,2))
            
        max_p_accidentals=max(p_accidentals_list)
        max_partRange=max(partRange_list)
        
        #find the proportion of notes with note length equal to the shortest duration in the piece
        #example: if shortest note in piece is a sixteenth note, find the proportion of notes that are sixteenths
        durationSeries = pd.Series(duration_list)
        vals, counts = np.unique(durationSeries, return_counts=True)
        p_shortestDuration = counts[np.argmin(vals)]/sum(counts)

        featureList.append([composer, title, date, minutes,
                            key, keyType, sharpsOrFlats, max_p_accidentals, max_partRange,
                            tempo, timeSig, compoundMeter, meterChanges, noteDensity, p_shortestDuration,
                            avgNumVoices, nParts, partList, strings, keys, woodwinds, percussion, brass, voice, 
                            filename, midiFileName])
    

data/classical/adam/fimaris.krn
data/classical/alkan/op38/02-fa_edited.krn


In [9]:
#create dataframe
df=pd.DataFrame(featureList, columns=['composer','title','date', 'avgNumVoices', 'maxNumVoices', 'key', 'keyType', 'sharpsOrFlats', 'tempo',
                            'timeSig', 'compoundMeter', 'meterChanges', 'noteDensity', 'partList', 'strings', 'keys', 
                            'woodwinds', 'percussion', 'brass', 'voice', 'filename', 'midiFileName'])
#df

In [10]:
#replace special characters
#TBD: get these html codes to display correctly in p5.js instead of replacing them with English letters 
df.title = df.title.str.replace('&egrave;', 'e') 
df.title = df.title.str.replace('&ecirc;', 'e') 
df.title = df.title.str.replace('&ouml;', 'o') 
df.title = df.title.str.replace('&szlig;','ss')
df.title = df.title.str.replace('&auml;','ae')
df.title = df.title.str.replace('&agrave;','a')
#df

In [11]:
df.composer=df.composer.str.replace('&aacute;','a')
df.composer=df.composer.str.replace('&oacute;','o')

In [14]:
#df

Unnamed: 0,composer,title,date,avgNumVoices,key,keyType,sharpsOrFlats,tempo,timeSig,compoundMeter,...,noteDensity,partList,strings,keys,woodwinds,percussion,brass,voice,filename,midiFileName
0,Adam de la Halle,Fi Maris de vostre Amour,,3.0,e minor,minor,1,80.0,"[3, 4]",0,...,3.764706,"[Voice, Voice, Voice]",0,0,0,0,0,1,fimaris,adam_fimaris.mid
1,"Alkan, Charles-Valentin","Fa, Op. 38, No. 2",,2.879581,d minor,minor,1,72.0,"[3, 8]",0,...,11.781818,"[Piano, Piano, Piano]",0,1,0,0,0,0,02-fa_edited,alkan_02-fa_edited.mid
2,"Bach, Johann Sebastian",Movement 1: Allemande,,1.0,d minor,minor,1,94.0,"[4, 4]",0,...,6.822581,[Violin],1,0,0,0,0,0,partita2-1,bach_partita2-1.mid
3,"Bach, Carl Philipp Emanuel",La Gause,,1.694444,F major,major,1,110.0,"[2, 4]",0,...,5.991071,"[Piano, Piano]",0,1,0,0,0,0,Wq117-37_edited,bachcpe_Wq117-37_edited.mid
4,"Beethoven, Ludwig van",Piano Sonata no. 8 in C minor,,1.561379,A- major,major,4,40.0,"[2, 4]",0,...,5.465753,"[Piano, Piano]",0,1,0,0,0,0,sonata08-2,beethoven_sonata08-2.mid
5,"Bononcini, Giovanni",For the love my heart doth prize,,2.001898,D major,major,2,80.0,"[3, 4]",0,...,4.703108,"[Voice, Piano, Piano]",0,1,0,0,0,1,perlagloria,bononcini_perlagloria.mid
6,"Brahms, Johannes","Waltz in E Major, Op.39 No.2",,0.419355,E major,major,4,172.0,"[3, 4]",0,...,6.652137,"[Piano, Piano, Piano]",0,1,0,0,0,0,op39-02_edited,brahms_op39-02_edited.mid
7,"Buxtehude, Dietrich","Sonata in A Minor, Op. 1, No. 3",,2.396226,a minor,minor,0,72.0,"[4, 4]",0,...,3.785714,"[Violin, Viola, Harpsichord]",1,1,0,0,0,0,op1-3-1,buxtehude_op1-3-1.mid
8,"Byrd, William",Ave verum corpus,,3.260274,a minor,minor,0,120.0,"[4, 4]",0,...,5.041667,"[Voice, Voice, Voice, Voice]",0,0,0,0,0,1,aveverum,byrd_aveverum.mid
9,"Chopin, Frederic",Scherzo in B-flat Minor,,1.354289,C# major,major,7,300.0,"[3, 4]",0,...,10.08959,"[Piano, Piano]",0,1,0,0,0,0,scherzo2,chopin_scherzo2.mid


In [12]:
df.to_csv('features.csv',index=False)

In [13]:
df.to_json('docs/features.json',orient='records')

OverflowError: Maximum recursion level reached