## Add harmony annotations to mscx-files (uncompressed MuseScore)

For harmony textfiles in the format

```
@piece: op.12_no.3

@key: E
@meter: 4/4

m1 1 .E.I
m2 1 IV
m2 3 vi
```
with annotations starting in line 6, indicating `measure beat label`. The first 5 lines are being ignored.

In [1]:
import pandas as pd
from bs4 import BeautifulSoup
from fractions import Fraction

# conversion dictionary
durations = {"measure" : 1.0, "whole" : 1/1,
 "half" : 1/2, "quarter" : 1/4, "eighth" : 1/8, "16th" : 1/16, "32nd" : 1/32,
 "64th" : 1/64, "128th" : 1/128}

In [8]:
class Piece():

    def __init__(self,xml_file):

        self.measure = {}           # dict to access information about each measure
        self.timesig = Fraction()   # value of the last detected time signature
        self.len = 0                # length of the piece in ticks (quarter note = 480 ticks, whole note = 1920 ticks)
        self.tuplets = {}           # dict with id: tuplet fraction, e.g. {"1": Fraction(7/4)}

        with open(xml_file, 'r') as file:
            ### Object representing the XML structure to be altered
            self.soup = BeautifulSoup(file.read(), 'xml')

        #save all tuplets
        for x in self.soup.find_all("Tuplet", id=True): # loop over all <Tuplet> tags
            if x.find('Tuplet'): # this is a nested tuplet, so the fraction has to be multiplied by that of the parent tuplet
                self.tuplets[x['id']] = Fraction(int(x.normalNotes.string), int(x.actualNotes.string)) * self.tuplets[x.find('Tuplet').string]
            else:
                self.tuplets[x['id']] = Fraction(int(x.normalNotes.string), int(x.actualNotes.string))

        # Save all measure nodes from the first staff (because [0] would be the tag within the <part> declaration)
        measures = self.soup.find_all("Staff", id="1")[1].find_all('Measure')
        #Deal with pickup measure (in that case the two first measures should have number "1")
        self.pickup = True if measures[1]['number'] == "1" else False

        for i,x in enumerate(measures):

            if x.find('TimeSig'):
                self.timesig = Fraction(int(x.find('sigN').string),int(x.find('sigD').string))

            if not self.pickup:
                i += 1 # if there is no pickup, measure 1 is saved with index 1 instead of 0
                
            self[i] = x # this calls self.__setitem__ which saves the measure in the measure dictionary


    def __getitem__(self, key):
        return self.measure[key]

    def __setitem__(self, key, node):

        events = {} # dictionary holding all the rests and chords in this measure
        pos = 0 #pointer in ticks, 0 is beat 1 of this measure
        
        try: # measure's length to be added to the self.len property in order to know where the next measure will start
            length = int(Fraction(node['len']) * 1920) #if measure has irregular length (e.g. pickup measure)
        except:
            length = int(self.timesig * 1920)

        for i, x in enumerate(node.find_all(['Rest','Chord'])): # looping over the <Rest> and <Chord> tags to save the events
            if not x.find("track"): # exclude events from secondary voices (to also exclude hidden events: ["visible","track"])
                value = x.find('durationType').string # note value in words such as "half" (--> conversion dictionary)
                prev = x.find('durationType').previous_sibling.previous_sibling # previous node, potentially indication <dots>
                sca = sum([0.5 ** i for i in range(int(prev.string)+1)]) if prev and prev.name == 'dots' else 1 # scalar depending on dots
                sca = sca * self.tuplets[x.find('Tuplet').string] if x.find('Tuplet') else sca # altering scalar if in a tuplet
                duration = durations[value] * 1920 * sca
                events[int(pos)] = { # the dictionary key is the offset from beat 1 in ticks
                    'i': i, # event's index (not used)
                    'node': x, # needed to insert a harmony label 
                    'type': x.name, # Rest or Chord? (not needed)
                    'value': value, 
                    'duration': duration
                    }
                pos += duration # move the pointer to the beginning of the next event

        self.measure[key] = { # now the measure information is ready to be stored
            'number': node['number'],
            'node': node, # in case a harmony cannot be linked to an event, it has to be inserted here
            'len': length, # in ticks
            'start': self.len, # measure's onset in ticks
            'events': events
            }
        self.len += length

    def add_harmony(self, m, b, h): # adds the harmony label "h" at beat b (quarter beats) of measure m
        beat = int((b - 1) * 480) # calculate the beat's offset in ticks
        tick = self.measure[m]['start'] + beat # measure's offset
        

        ### create the new <Harmony><name>h</name></Harmony> structure
        h_tag = self.soup.new_tag("Harmony")
        h_name = self.soup.new_tag("name")
        h_name.string = h
        h_tag.append(h_name)

        if not beat in self.measure[m]['events']: # if at this beat there is no event to attach the harmony to:
                #create additional <tick>offset</tick> tag
            t_tag = self.soup.new_tag("tick")
            t_tag.string = str(tick)
            if len(self.measure[m]['events']) == 0: # if measure is empty, just insert at the end
                self.measure[m]['node'].append(h_tag)
                h_tag.insert_before(t_tag)
            else:
                # save offsets of events after beat b (=tick) to know where to insert the Harmony
                events_after = [x for x in self.measure[m]['events'].keys() if x > tick] 
                if len(events_after) == 0: # if no events follow this beat, insert at the end of the measure
                    self.measure[m]['node'].append(h_tag)
                    h_tag.insert_before(t_tag)
                else: # otherwise insert before the event following the beat b (=tick)
                    self.measure[m]['events'][min(events_after)]['node'].insert_before(h_tag)
                    h_tag.insert_before(t_tag)

        else: #if at this beat an event occures, attach the harmony to it
            self.measure[m]['events'][beat]['node'].insert_before(h_tag)

        return tick, h_tag
    
    def add_space(self, s):
        space = self.soup.new_tag("harmonyY")
        space.string = str(s)
        self.soup.find("Style").insert(0, space)
        

    def dump(self,filename): #save the altered XML structure as filename
        ### the following code makes sure that <opening>and</closing> are written into the same line
        unformatted_tag_list = []
        for i, tag in enumerate(self.soup.find_all()):
            unformatted_tag_list.append(str(tag))
            tag.replace_with('{' + 'unformatted_tag_list[{0}]'.format(i) + '}')
        pretty_markup = self.soup.prettify().format(unformatted_tag_list=unformatted_tag_list) #writes tags into the same line

        with open(filename, "w") as file:
            file.write(pretty_markup)
        return pretty_markup

In [9]:
################################################
# score = mscx-file to be annotated
# harmonies = textfile with harmonic annotations
# target = new file to write to
# spacing = optional value if you want to add spacing between the labels and the upper system (good values go from 5 to appr. 8)
# eights = should be true if the annotations work with eighth beats
################################################
def merge(score,harmonies,target, spacing = 0, eighths=False):

    with open(harmonies, 'r') as file:
    #################################################################
    # skiprows = 4 means that line 6 will be the first row of the dataframe an
    # the process will give a warning for each bad line, i.e. where the harmony symbol is followed by additional information without comment sign
    #################################################################
        an = pd.read_table(file, delim_whitespace=True, header=None, names=['measure','beat','label'], skiprows=4, comment='?', error_bad_lines=False, warn_bad_lines=True)

    an['measure'] = an['measure'].apply(lambda x: int(x[1:])) #delete the m

    if eighths:
        an['beat'] = an['beat'].apply(lambda x: (x - 1) / 2 + 1) #convert eighth beats into quarter beats

    s = Piece(score)
    for m, b, h in an.itertuples(index = False, name=None):
        print(s.add_harmony(m, b, h))
    if spacing != 0:
        s.add_space(spacing)
    s.dump(target)

### Before use

* [Score] If the score has a pickup measure, make sure it is not counted, so that the first full measure has number 1
* [Labels] Check whether the text file has the correct initial symbol in measure 1, e.g. `.F.I`
* [Labels] Replace all `"` (old-style organ point) with the correct notation with `[` and `]`
* [Labels] Correct the text file so it doesn't contain a 4th column. The only 4th column allowed would be after the comment symbol `?`. That is to say harmonies will be ignored if the line holds more information than `measure beat label ?(optional comment)`.
* [Labels] Correct all comments (momentarily indicated by `#`) to start with `?`

In [14]:
op = "12"
for i in range(8):
    nr = str(i+1)
    merge("op" + op + "-" + nr + ".mscx","../Annotations/op" + op + "No" + nr + ".txt","op" + op + "-" + nr + "_labelled.mscx")
#s = Piece("op12-8.mscx")

(0, <Harmony><name>.Eb.I[I</name></Harmony>)
(960, <Harmony><name>viio2(4)</name></Harmony>)
(1200, <Harmony><name>viio2(2)</name></Harmony>)
(1440, <Harmony><name>viio2</name></Harmony>)
(2880, <Harmony><name>I(9)</name></Harmony>)
(3120, <Harmony><name>I(7)</name></Harmony>)
(3360, <Harmony><name>I]</name></Harmony>)
(3840, <Harmony><name>vi(#6)</name></Harmony>)
(4320, <Harmony><name>iii6(6)</name></Harmony>)
(4800, <Harmony><name>vi64(#6)</name></Harmony>)
(5280, <Harmony><name>iii64(6)</name></Harmony>)
(5760, <Harmony><name>vi(#6)</name></Harmony>)
(6240, <Harmony><name>iii6(6)</name></Harmony>)
(6720, <Harmony><name>vi64(#6)</name></Harmony>)
(7200, <Harmony><name>iii64(6)</name></Harmony>)
(7680, <Harmony><name>vi(#6)</name></Harmony>)
(7920, <Harmony><name>vi7</name></Harmony>)
(8640, <Harmony><name>#viio2/iii</name></Harmony>)
(9120, <Harmony><name>I</name></Harmony>)
(9600, <Harmony><name>IV(6)</name></Harmony>)
(9840, <Harmony><name>IVM7</name></Harmony>)
(10560, <Harmony><

(0, <Harmony><name>.E.I</name></Harmony>)
(480, <Harmony><name>IV</name></Harmony>)
(1440, <Harmony><name>vi</name></Harmony>)
(2400, <Harmony><name>IV(6)</name></Harmony>)
(3360, <Harmony><name>V</name></Harmony>)
(4320, <Harmony><name>I\\</name></Harmony>)
(6240, <Harmony><name>@none</name></Harmony>)
(8160, <Harmony><name>IV</name></Harmony>)
(8640, <Harmony><name>viio6</name></Harmony>)
(9120, <Harmony><name>vi</name></Harmony>)
(10080, <Harmony><name>IV(6)</name></Harmony>)
(10560, <Harmony><name>ii7</name></Harmony>)
(11040, <Harmony><name>V7(4)</name></Harmony>)
(11520, <Harmony><name>V7</name></Harmony>)
(12000, <Harmony><name>I\\</name></Harmony>)
(13920, <Harmony><name>iii</name></Harmony>)
(14400, <Harmony><name>iii7</name></Harmony>)
(14880, <Harmony><name>vi(9)</name></Harmony>)
(15360, <Harmony><name>vi7</name></Harmony>)
(15840, <Harmony><name>vi</name></Harmony>)
(16320, <Harmony><name>V7/V</name></Harmony>)
(16800, <Harmony><name>V\\</name></Harmony>)
(17760, <Harmony>

FileNotFoundError: [Errno 2] No such file or directory: 'op12-5.mscx'

In [None]:
s[4]['events']