In [1]:
import xml, xmltodict
from xml.parsers.expat import ExpatError
from operator import itemgetter


NUM_MEASURES_PER_PART = 4


def parse_one_measure(measure, measure_dur):
    #get sounding notes in a measure (filter rests)
    notes = measure['note']
    measure = []
    if notes:
        if type(notes) is list:
            for voice_hit in notes:
                time_position = int(voice_hit['@default-x'])
                if 'unpitched' in voice_hit.keys():
                    pitch_representation = voice_hit['unpitched']['display-step'] + voice_hit['unpitched']['display-octave']
                    figure = measure_dur//int(voice_hit['duration'])
                    assigned = False
                    for j, already in enumerate(measure):
                        if time_position == already[0]:
                            if measure[j][1] != 'X':
                                measure[j][1] = measure[j][1] + '.' + pitch_representation
                            else:
                                measure[j][1] = pitch_representation
                                measure[j][2] = figure
                            assigned = True
                    if not assigned:
                        measure.append([time_position, pitch_representation, figure])

                else:
                    figure = measure_dur//int(voice_hit['duration'])
                    assigned = False
                    for j, already in enumerate(measure):
                        if time_position == already[0]:
                            assigned = True
                    if not assigned:
                        measure.append([time_position, 'X', figure])
        else:
            time_position = int(notes['@default-x'])
            if 'unpitched' in notes.keys():
                pitch_representation = notes['unpitched']['display-step'] + notes['unpitched']['display-octave']
                figure = measure_dur//int(notes['duration'])
                assigned = False
                for j, already in enumerate(measure):
                    if time_position == already[0]:
                        if measure[j][1] != 'X':
                            measure[j][1] = measure[j][1] + '.' + pitch_representation
                        else:
                            measure[j][1] = pitch_representation
                            measure[j][2] = figure
                        assigned = True
                if not assigned:
                    measure.append([time_position, pitch_representation, figure])

            else:
                figure = measure_dur//int(notes['duration'])
                assigned = False
                for j, already in enumerate(measure):
                    if time_position == already[0]:
                        assigned = True
                if not assigned:
                    measure.append([time_position, 'X', figure])
    measure = sorted(measure, key=itemgetter(0)) 
    str_measure = ''
    for x in measure:
        str_measure = "{}{}:{} ".format(str_measure ,x[1] , str(x[2])) 
    
    return str_measure[:-1]


def ConvertXML2Annotation(infilename, init_silence_measure = 0 ):
    # LOAD XML FILE
    with open(infilename, encoding='utf-8') as fd:
        doc = xmltodict.parse(fd.read())

    DATA = dict()
    
    # PARSE TITLE INFO
    DATA["title"] = doc['score-partwise']['work']['work-title']

    #PARSE METER INFO
    meter_nom = doc['score-partwise']['part']['measure'][0]['attributes']['time']['beats']
    meter_denom = doc['score-partwise']['part']['measure'][0]['attributes']['time']['beat-type']
    DATA["meter"] = meter_nom + '/' + meter_denom
    
    # PARSE TEMPO (BPM) INFO
    tempo_dur = int(doc['score-partwise']['part']['measure'][0]['attributes']['divisions']) # in abs value power of 2
    measure_dur = tempo_dur * int(meter_nom) # in abs value
    
    # in some files bpm info is not in the first measure
    if str(type(doc['score-partwise']['part']['measure'][0]['direction'])) == "<class 'collections.OrderedDict'>":
        BPM = doc['score-partwise']['part']['measure'][0]['direction']['direction-type']['metronome']['per-minute']    
    else:
        for i in range(len(doc['score-partwise']['part']['measure'][0]['direction'])):
            if 'metronome' in doc['score-partwise']['part']['measure'][0]['direction'][i]['direction-type'].keys():
                BPM = doc['score-partwise']['part']['measure'][0]['direction'][i]['direction-type']['metronome']['per-minute']
                break
            
    DATA["bpm"] = BPM
    BPM = int(BPM)
    
    # duration of a measure / 4 beats (in seconds)
    measureDur = int(meter_denom) * 60 / BPM
    # number of measures in the exercise
    num_measures = len(doc['score-partwise']['part']['measure'])
    # number of separate parts in the exercise
    num_parts = int(num_measures / NUM_MEASURES_PER_PART)
    beats = []
    
    # PARSE PARTS INFO
    parts = []
    measures = []
    printable_measures = []
    if init_silence_measure > 0:
        for i in range(init_silence_measure):
            measures.append([])
    
    repetition_measure = False
    init_loop = -1
    acc_loop_counter = 0
    newline_idx_list = []
    
    for i in range(init_silence_measure, num_measures):
        actual_measure = doc['score-partwise']['part']['measure'][i]
        #check if is a repetition measure, start measure repeat
        
        if i > 0 and 'print' in doc['score-partwise']['part']['measure'][i].keys():
            newline_idx_list.append(i)
        
        if 'attributes' not in actual_measure.keys():
            
                
            if 'barline' in actual_measure.keys():
                if actual_measure['barline']['bar-style'] == 'light-heavy':
                    if 'repeat' in actual_measure['barline'].keys():
                        if repetition_measure:
                            measures.append(measures[-repetition_acc])
                            printable_measures.append(printable_measures[-repetition_acc])
                        else:    
                            measure = parse_one_measure(actual_measure, measure_dur)
                            measures.append(measure)
                            printable_measures.append(measure)
                        aux = len(measures[(init_loop + acc_loop_counter):])
                        measures = measures + measures[(init_loop + acc_loop_counter):]
                        acc_loop_counter = acc_loop_counter + aux
                        init_loop = -1
                        continue

                    else:
                        measure = parse_one_measure(actual_measure, measure_dur)
                        measures.append(measure)
                        printable_measures.append(measure)
                        break
                elif actual_measure['barline']['bar-style'] == 'light-light':
                    if repetition_measure:
                        measures.append(measures[-repetition_acc])
                        printable_measures.append(printable_measures[-repetition_acc])
                        repetition_measure = False
                        continue
                    measure = parse_one_measure(actual_measure, measure_dur)
                    measures.append(measure)
                    printable_measures.append(measure)
                    repetition_measure = False   
                    continue 
                
            if repetition_measure:
                measures.append(measures[-repetition_acc])
                printable_measures.append(printable_measures[-repetition_acc])
                continue
                   
            
        elif 'measure-style' in actual_measure['attributes'].keys():
            if str(type(actual_measure['attributes']['measure-style'])) != "<class 'list'>":
                if actual_measure['attributes']['measure-style']['measure-repeat']['@type'] == 'start':
                    repetition_measure = True
                    repetition_acc = int(actual_measure['attributes']['measure-style']['measure-repeat']['#text']) 
                if actual_measure['attributes']['measure-style']['measure-repeat']['@type'] == 'stop':
                    repetition_measure = False
            elif str(type(actual_measure['attributes']['measure-style'])) == "<class 'list'>":
                repetition_measure = True
                repetition_acc = int(actual_measure['attributes']['measure-style'][0]['measure-repeat']['#text']) 
                
        elif 'time' in actual_measure['attributes'].keys():
            meter_nom = actual_measure['attributes']['time']['beats']
            meter_denom = actual_measure['attributes']['time']['beat-type']
            
        if repetition_measure:
            measures.append(measures[-repetition_acc])
            printable_measures.append(measures[-repetition_acc])
        
        if not repetition_measure:
            # if measure is not a repetition
            measure = parse_one_measure(actual_measure, measure_dur)
            measures.append(measure)
            printable_measures.append(measure)

            
        if 'barline' in actual_measure.keys():
            if actual_measure['barline']['bar-style'] == 'heavy-light':
                init_loop = i
            if actual_measure['barline']['bar-style'] == 'light-heavy':
                if 'repeat' in actual_measure['barline'].keys():
                    measures = measures + measures[init_loop:]
                    init_loop = -1
                else:
                    break
    
    init = 0
    for end in newline_idx_list:
        parts.append(printable_measures[init:end])
        init = end
    parts.append(printable_measures[init:])

    DATA["parts"] = parts
    DATA["interpretation"] = measures    
    
    for i in range(len(measures)*int(meter_nom) + 2*int(meter_nom) ):
        beats.append(i*60/BPM)
    DATA['beats'] = beats
    return DATA

In [None]:
from scripts.utils import get_files_in_dir 
import json

annotations_folder = 'data/MusicSchool/Grade0/annotations/'
xml_dir = 'data/MusicSchool/Grade0/annotations/musicxml/'
json_dir = 'data/MusicSchool/Grade0/annotations/json/'
xml_files = get_files_in_dir(xml_dir)
init_silence = [0,0,0,0,2,0,0,0]
for i, file in enumerate(xml_files):
    song = ConvertXML2Annotation(file, init_silence[i])
    with open(annotations_folder + 'json/results/' + song['title'].split('-')[0][:-1] + '.json', 'w') as outfile:
        json.dump(song, outfile)

In [None]:
annotations_folder = 'data/MusicSchool/Grade1/annotations/'
xml_dir = 'data/MusicSchool/Grade1/annotations/musicxml/'
json_dir = 'data/MusicSchool/Grade1/annotations/json/'
xml_files = get_files_in_dir(xml_dir)
init_silence = [1,0,0,0,0,4,1,3]
for i, file in enumerate(xml_files):
    song = ConvertXML2Annotation(file, init_silence[i])
    with open(annotations_folder + 'json/results/' + song['title'].split('-')[0][:-1] + '.json', 'w') as outfile:
        json.dump(song, outfile)