In [108]:
import numpy as np
import re

In [109]:
def read_file(file_name):
    with open(file_name, 'r') as f:
        return f.read()

In [110]:
file_content = read_file('test.mei')

def find_measures(file_content):
    measures = re.findall(r'<measure[\s\S]*?measure>', file_content)
    return measures

measures = find_measures(file_content)

In [111]:
def extract_measure_content(measure):
    content = re.findall(r'<beam[\s\S]*?beam>|<note.*?/>|<note.[\s\S]*?note>|<rest.*?/>|<multiRest.*?/>', measure)
    return content

In [112]:
octaves = {0: ',,,,,', 1: ',,,,', 2: ',,,', 3: ',,', 4: ',', 5: '', 6: "'", 7: "''"}
duration_mapping = {32: '/', 16: 1, 8: 2, 4: 4, 2: 8, 1: 16}
accid_map = {'s': '^', 'f': '_', 'n': '='}

In [113]:
def parse_note(note):
        """Extracts and formats a note from XML."""
        value = re.search(r'pname="([^"]*)"', note).group(1)
        octave = octaves[int(re.search(r'oct="([^"]*)"', note).group(1))]
        duration = duration_mapping[int(re.search(r'dur="([^"]*)"', note).group(1))]
        
        # Handle dotted notes
        if 'dots="' in note:
            duration = f'{int(duration * 1.5)}'
        
        # Handle sharps and flats
        accid_match = re.search(r'accid="([^"]*)"', note)
        if accid_match:
            value = f"{accid_map.get(accid_match.group(1), '')}{value}"
        
        return f"{value}{octave}{duration}"

In [114]:
def get_measures_labels(measures): 
    measures_content = {}

    for i, measure in enumerate(measures):
        measure_notes = []

        measure_content = extract_measure_content(measure)

        for symbol in measure_content:
            if symbol.startswith('<beam'):
                beam_notes = re.findall(r'<note.[\s\S]*?note>|<note.*?/>', symbol)
                measure_notes.append(''.join(parse_note(n) for n in beam_notes))

            elif symbol.startswith('<note'):
                measure_notes.append(parse_note(symbol))

            elif symbol.startswith('<rest'):
                duration = duration_mapping[int(re.search(r'dur="([^"]*)"', symbol).group(1))]
                measure_notes.append(f'z{duration}')

            elif symbol.startswith('<multiRest'):
                duration = re.search(r'num="([^"]*)"', symbol).group(1)
                measure_notes.append(f'Z{duration}')

        measures_content[i] = measure_notes

    return measures_content


In [115]:
gammes = {'0':'C', '1s':'G', '2s':'D', '3s':'A', '4s':'E', '5s':'B', '6s':'F#', '7s':'C#', '1f':'F', '2f':'Bb', '3f':'Eb', '4f':'Ab', '5f':'Db', '6f':'Gb', '7f':'Cb'}

def find_score_def(file_content):
    score_def = re.findall(r'<scoreDef[\s\S]*?scoreDef>', file_content)
    key =  re.findall(r'key.sig="([^"]*)"', score_def[0])
    meter_count = re.findall(r'meter.count="([^"]*)"', score_def[0])
    meter_unit = re.findall(r'meter.unit="([^"]*)"', score_def[0])
    staff_def = re.findall(r'<staffDef.*?/>', score_def[0])[0]

    if meter_count and meter_unit:
        meter_count, meter_unit = int(meter_count[0]), int(meter_unit[0])
    else:
        meter_count, meter_unit = '', ''
    
    if key : 
        key = gammes[key[0]]
    else:
        key = ''
        
    if staff_def:
        clef_shape = re.findall(r'clef.shape="([^"]*)"', staff_def)[0]
        clef_line = re.findall(r'clef.line="([^"]*)"', staff_def)[0]
    else:
        clef_shape = ''
        clef_line = ''

    score_def = {'key':key, 'meter_count': meter_count, 'meter_unit': meter_unit, 'clef': clef_shape+clef_line}
    return score_def

score_def = find_score_def(file_content)

In [116]:
def mei_to_abc(measures_content, score_def):
    abc_content = ''
    abc_content += f'X:1\n'
    if score_def["meter_count"] != '':
        abc_content += f'M:{score_def["meter_count"]}/{score_def["meter_unit"]}\n'
    else:
        abc_content += f'M:C\n'
    if score_def["key"] != '':
        abc_content += f'K:{score_def["key"]}\n'
    abc_content += f'L:1/16\n'
    abc_content += f'K: clef={score_def["clef"]}\n'
    for i in range (len(measures_content)):
        measure = measures_content[i]
        for note in measure:
            abc_content += f' {note}'
        abc_content += ' |'
    abc_content += f']'
    return abc_content

abc_content = mei_to_abc(get_measures_labels(find_measures(file_content)), score_def)
print(abc_content)

X:1
M:2/4
K:Eb
L:1/16
K: clef=C1
 Z23 | z4 z2 b,2 | b,6 g,2 | e6 d2 | c2 c2 z4 |]


In [117]:
def convert_file(file_content):
    measures_content = find_measures(file_content)
    score_def = find_score_def(file_content)
    abc_content = mei_to_abc(get_measures_labels(measures_content), score_def)
    return abc_content

In [118]:
file_content = read_file('000051778-1_1_1.mei')
abc = convert_file(file_content)

print(abc)

X:1
M:C
L:1/16
K: clef=C1
 e,4 | a,1b,1a,1b,1 b,3a,/b,/ c4 b,1c/d/c1b,1 | a,1a,1b,1c1 b,3a,1 ^g,3^f,1 e,2 |]


In [None]:
from zipfile import ZipFile, ZipInfo

with ZipFile('data_mei/data.zip', 'r') as myzip:
    for myfile in myzip.infolist()[:10]:
        if myfile.filename.endswith('.mei'):
            with myzip.open(myfile) as myfile:
                file_content = myfile.read().decode('utf-8')
                abc = convert_file(file_content)
                print(abc)

X:1
M:3/4
K:Eb
L:1/16
K: clef=G2
 b4 e2b2c'2b2 | a2a2 z1 a1g1a1 b1a1g1a1 |]
X:1
M:3/4
K:Eb
L:1/16
K: clef=C1
 Z10 | b,4 e,2 e2 e2 c2 | a,2 a,2 z4 z4 | f4 e3 c1 b,3 a,1 | a,2 g,2 z4 |]
X:1
M:6/8
L:1/16
K: clef=C1
 Z14 | z4 z2 z4 g,2 | c4 z2 z4 z2 | z4 c2 e4 c2 | a,4 a,2 z4 z2 |]
X:1
M:C
K:Eb
L:1/16
K: clef=C1
 Z4 | z8 g,2 g,1 g,1 g,2 =a,2 | =b,2 b,2 z2 b,2 b,2 b,2 =a,2 b,2 | g,4 |]
X:1
M:2/4
K:Eb
L:1/16
K: clef=C1
 Z23 | z4 z2 b,2 | b,6 g,2 | e6 d2 | c2 c2 z4 |]
