Please use the current development version and report any bugs you encounter:
`pip install git+https://github.com/DCMLab/wavescapes.git@corpus_structure`

In [1]:
import os, shutil, subprocess
import ms3
import ray

In [2]:
DATA_FOLDER = os.path.abspath('../mscz') # on the HPC: /scratch/data/musescore.com/
SAMPLE_FOLDER = os.path.abspath('./mscz')
CONVERSION_FOLDER = os.path.abspath('./mscx')
OUTPUT_PATHS = dict(
    events = os.path.abspath('./events'),
    notes = os.path.abspath('./notes'),
    measures = os.path.abspath('./measures'),
    labels = os.path.abspath('./labels'),
    metadata = os.path.abspath('./metadata'),
)


Creating a sample of 1000 zipped MuseScore 3 for development purposes:

Using the MuseScore 3 binary to convert the file format to the current version of MuseScore 3 (needs to be installed or available as AppImage). [MuseScore commandline options](https://musescore.org/en/handbook/3/command-line-options)

ToDos:

* parallelize the conversion (anyone interested in learning [ray](https://www.ray.io/)?)
* avoid processing files more than once, making use of the fact that the file names correspond to IDs
* add proper error handling, keeping track of a mapping ID -> `errors message` for files that cannot successfully be converted (checkout stdout and stderr arguments of [subprocess.run()](https://docs.python.org/3/library/subprocess.html#subprocess.run))

In [3]:
# indicate the path to your MuseScore 3 executable or try using the standard path:
musescore_cmd = "mscore-portable" #ms3.get_musescore('auto')
print(musescore_cmd)

mscore-portable


In [4]:
ray.init()

2022-11-24 18:43:49,956	INFO worker.py:1528 -- Started a local Ray instance.


0,1
Python version:,3.8.8
Ray version:,2.1.0


In [5]:
@ray.remote
def process_chunk(start_idx, end_idx):
    for i, entry in enumerate(os.scandir(SAMPLE_FOLDER)):
        if i < start_idx:
            continue
        if i >= end_idx:
            break
        ID, file_extension = os.path.splitext(entry.name)
        converted_file_path = os.path.join(CONVERSION_FOLDER, ID + '.mscx')
        print(f"Converting {entry.path} to {converted_file_path}...", end=' ')
        
        result = subprocess.run([musescore_cmd,"--score-meta", "-o", converted_file_path, entry.path], capture_output=True, text=True)
        print(f"Exit code: {result.returncode}")
        print(f"Result: {result.stdout.strip()}") # the extraction of metadata as JSON does not work on Windows; please store the JSON to the metadata output folder
        print(f"Errors: {result.stderr.strip()}")

In [7]:
futures = [process_chunk.remote(i*100, i*100+100) for i in range (10)]

[2m[36m(process_chunk pid=237239)[0m Converting /home/erwan/epfl/s1/ml/ComposersClassifier/data/mscz/101555.mscz to /home/erwan/epfl/s1/ml/ComposersClassifier/data/mscx/101555.mscx... 
[2m[36m(process_chunk pid=237240)[0m Converting /home/erwan/epfl/s1/ml/ComposersClassifier/data/mscz/102059.mscz to /home/erwan/epfl/s1/ml/ComposersClassifier/data/mscx/102059.mscx... 
[2m[36m(process_chunk pid=237242)[0m Converting /home/erwan/epfl/s1/ml/ComposersClassifier/data/mscz/102715.mscz to /home/erwan/epfl/s1/ml/ComposersClassifier/data/mscx/102715.mscx... 
[2m[36m(process_chunk pid=237235)[0m Converting /home/erwan/epfl/s1/ml/ComposersClassifier/data/mscz/101289.mscz to /home/erwan/epfl/s1/ml/ComposersClassifier/data/mscx/101289.mscx... 
[2m[36m(process_chunk pid=237236)[0m Converting /home/erwan/epfl/s1/ml/ComposersClassifier/data/mscz/100753.mscz to /home/erwan/epfl/s1/ml/ComposersClassifier/data/mscx/100753.mscx... 
[2m[36m(process_chunk pid=237237)[0m Converting /home/erw

In [8]:
print(ray.get(futures))

[2m[36m(process_chunk pid=237242)[0m Exit code: 0
[2m[36m(process_chunk pid=237242)[0m Result: {
[2m[36m(process_chunk pid=237242)[0m "metadata": {"composer":"Written by Daniel Ingram\nArranged by Rudie16 ","duration":82,"fileVersion":206,"hasHarmonies":"false","hasLyrics":"false","keysig":1,"lyrics":"","measures":41,"mscoreVersion":"2.0.1","pageFormat":{"height":279,"twosided":"true","width":216},"pages":1,"parts":[{"harmonyCount":0,"hasDrumStaff":"false","hasPitchedStaff":"true","hasTabStaff":"false","instrumentId":"wind.flutes.flute","isVisible":"true","lyricCount":0,"name":"Flute","program":73}],"poet":"","previousSource":"","subtitle":"","tempo":0,"tempoText":"","textFramesData":{"composers":["Written by Daniel Ingram\nArranged by Rudie16 "],"poets":[],"subtitles":[],"titles":["Failure Success song "]},"timesig":"4/4","title":"Failure Success song "}
[2m[36m(process_chunk pid=237242)[0m }
[2m[36m(process_chunk pid=237242)[0m Errors: /lib/x86_64-linux-gnu/libjack.so.

KeyboardInterrupt: 

In [11]:
# for i, entry in enumerate(os.scandir(SAMPLE_FOLDER)):
#     if i == 10:
#         break
#     ID, file_extension = os.path.splitext(entry.name)
#     converted_file_path = os.path.join(CONVERSION_FOLDER, ID + '.mscx')
#     print(f"Converting {entry.path} to {converted_file_path}...", end=' ')
    
#     result = subprocess.run([musescore_cmd,"--score-meta", "-o", converted_file_path, entry.path], capture_output=True, text=True)
#     print(f"Exit code: {result.returncode}")
#     print(f"Result: {result.stdout.strip()}") # the extraction of metadata as JSON does not work on Windows; please store the JSON to the metadata output folder
#     print(f"Errors: {result.stderr.strip()}")

Converting /home/erwan/epfl/s1/ml/ComposersClassifier/data/mscz/100462.mscz to /home/erwan/epfl/s1/ml/ComposersClassifier/data/mscx/100462.mscx... Exit code: 0
Result: {
"metadata": {"composer":"","duration":16,"fileVersion":114,"hasHarmonies":"false","hasLyrics":"false","keysig":0,"lyrics":"","measures":8,"mscoreVersion":"1.3","pageFormat":{"height":297,"twosided":"true","width":210},"pages":1,"parts":[{"harmonyCount":0,"hasDrumStaff":"false","hasPitchedStaff":"true","hasTabStaff":"false","instrumentId":"keyboard.piano","isVisible":"true","lyricCount":0,"name":"<font face=\"Times New Roman\"/>Piano","program":0}],"poet":"","previousSource":"","subtitle":"","tempo":0,"tempoText":"","textFramesData":{"composers":[],"poets":[],"subtitles":[],"titles":["Erin Taylor Nicole"]},"timesig":"4/4","title":"Erin Taylor Nicole"}
}
Errors: /lib/x86_64-linux-gnu/libjack.so.0
/lib/x86_64-linux-gnu/libnss3.so
Creating main window…
Reading translations…
Converting /home/erwan/epfl/s1/ml/ComposersClassi

Using the ms3 parsing library to extract score information:

In [5]:
for entry in os.scandir(CONVERSION_FOLDER):
    if entry.is_dir():
        continue
    parsed = ms3.Score(entry.path, read_only=True)
    ID, _ = os.path.splitext(entry.name)
    tsv_name = f"{ID}.tsv"
    dataframes = dict(
        events = parsed.mscx.events(),
        notes = parsed.mscx.notes(),
        measures = parsed.mscx.measures(),
        labels = parsed.mscx.labels(),
    )
    for facet, df in dataframes.items():
        if df is None:
            continue
        tsv_path = os.path.join(OUTPUT_PATHS[facet], tsv_name)
        df.to_csv(tsv_path, sep='\t', index=False)
    metadata = parsed.mscx.metadata # please add this nested dictionary to the JSON stored in the previous step
    metadata['id'] = ID

	MC 15, the 1st measure of a 2nd volta, should have MN 14, not MN 15.
