In [1]:
import fastai
from fastai.text.all import *
import os
from PIL import Image
from pathlib import Path
import numpy as np
import seaborn as sns
from tqdm import tqdm
from matplotlib import pyplot as plt
import pickle
from collections import Counter
from seaborn import barplot
import shutil

In [2]:
MEI_PATH = Path('/home/ibukey/ttmp/mei-txt/')

In [3]:
whitespace_tokenizer = BaseTokenizer()

In [4]:
data_loader = TextDataLoaders.from_folder(
    path=MEI_PATH, 
    is_lm=True, valid_pct=0.1, 
    tok_tfm=whitespace_tokenizer)

Rules: []


In [5]:
learn = language_model_learner(
    data_loader, 
    AWD_LSTM, 
    metrics=[accuracy, Perplexity()], 
    path=str(MEI_PATH), 
    pretrained=True,
    wd=0.1).to_fp16()

In [6]:
learn.load('mei-model')

<fastai.text.learner.LMLearner at 0x7f13dfecd9d0>

In [8]:
data_loader.vocab

['xxunk',
 'xxpad',
 'xxbos',
 'xxeos',
 'xxfld',
 'xxrep',
 'xxwrep',
 'xxup',
 'xxmaj',
 '>',
 '/>',
 'dur=',
 'pname=',
 'oct=',
 '<note',
 '"4"',
 '"1"',
 'n=',
 '"5"',
 '"8"',
 'right=',
 '<staff',
 '<layer',
 '</layer',
 '</staff',
 '</measure',
 '<measure',
 '"single"',
 '"16"',
 '"d"',
 '"c"',
 '<beam',
 '</beam',
 '"2"',
 '"a"',
 '"b"',
 '"g"',
 '"e"',
 '"f"',
 '"3"',
 '<rest',
 'dots=',
 '<scoreDef',
 '<music',
 '<body',
 '<mdiv',
 '<score',
 '<staffGrp',
 'clef.shape=',
 'clef.line=',
 'lines=',
 '</staffGrp',
 '</scoreDef',
 '<section',
 '</section',
 '</score',
 '</mdiv',
 '</body',
 '</music',
 '',
 '<staffDef',
 'key.sig=',
 '"G"',
 'meter.sym=',
 'meter.count=',
 'meter.unit=',
 '"invis"',
 'accid=',
 '<accid',
 '</note',
 '"32"',
 '"C"',
 '"common"',
 'tie=',
 'grace=',
 'stem.dir=',
 '"up"',
 '<multiRest',
 'num=',
 '"s"',
 '"6"',
 '"unacc"',
 '"1f"',
 '"cut"',
 '"i"',
 '"t"',
 '"2s"',
 '"1s"',
 '"2f"',
 '"n"',
 '"3f"',
 '"F"',
 '"3s"',
 'fermata=',
 '"above"',
 '"bre

In [7]:
KEYSIGS = [
    'key.sig= "1f" ',
    'key.sig= "5f" ',
    'key.sig= "1s" ',
    'key.sig= "4s" ',
    'key.sig= "2s" ',
    'key.sig= "7s" ',
    'key.sig= "3s" ',
    'key.sig= "4f" ',
    'key.sig= "6s" ',
    '',
    'key.sig= "6f" ',
    'key.sig= "5s" ',
    'key.sig= "2f" ',
    'key.sig= "3f" '
]

CLEFS = [
    'clef.shape= "G" clef.line= "2"',
    'clef.shape= "C" clef.line= "5"',
    'clef.shape= "C" clef.line= "2"',
    'clef.shape= "F" clef.line= "4"',
    'clef.shape= "C" clef.line= "1"',
    'clef.shape= "C" clef.line= "4"',
    'clef.shape= "C" clef.line= "3"',
    'clef.shape= "F" clef.line= "3"',
    'clef.shape= "F" clef.line= "5"',
    'clef.shape= "G" clef.line= "1"'
]

TIMESIGS = [
    'meter.count= "2" meter.unit= "4"',
    'meter.count= "3" meter.unit= "4"',
    'meter.sym= "common"',
    'meter.count= "5" meter.unit= "4"',
    'meter.count= "6" meter.unit= "4"',
    'meter.count= "7" meter.unit= "4"',
    'meter.count= "3" meter.unit= "8"',
    'meter.count= "6" meter.unit= "8"',
]

In [8]:
def generate_randomized_mei_seed(timesig, start_token=""):
    keysig = KEYSIGS[int(random.random() * len(KEYSIGS))]
    clef = CLEFS[int(random.random() * len(CLEFS))]
    return f'{start_token} <music > <body > <mdiv > <score > <scoreDef {keysig}{timesig} > <staffGrp > <staffDef {clef}'

In [None]:
SEED = generate_randomized_mei_seed('meter.count= "2" meter.unit= "4"')
for j in range(0, 2000):
    text = learn.predict(SEED, 300, temperature=.8, no_unk=True)

    new_text = ''
    for i, token in enumerate(text.split()):
        if 'music' in token and i != 0:
            new_text += token + ' >'
            break
        else:
            new_text += token + " "
    if new_text[-9:] == '</music >':
        with open("/home/ibukey/ttmp/mei-preds/pred_2_4_{}.txt".format(j), 'w') as fout:
            fout.write(new_text)

In [None]:
SEED = generate_randomized_mei_seed('meter.count= "3" meter.unit= "4"')
for j in range(0, 2000):
    text = learn.predict(SEED, 300, temperature=.8, no_unk=True)

    new_text = ''
    for i, token in enumerate(text.split()):
        if 'music' in token and i != 0:
            new_text += token + ' >'
            break
        else:
            new_text += token + " "
    if new_text[-9:] == '</music >':
        with open("/home/ibukey/ttmp/mei-preds/pred_3_4_{}.txt".format(j), 'w') as fout:
            fout.write(new_text)

In [None]:
SEED = generate_randomized_mei_seed('meter.sym= "common"')
for j in range(0, 2000):
    text = learn.predict(SEED, 300, temperature=.8, no_unk=True)

    new_text = ''
    for i, token in enumerate(text.split()):
        if 'music' in token and i != 0:
            new_text += token + ' >'
            break
        else:
            new_text += token + " "
    if new_text[-9:] == '</music >':
        with open("/home/ibukey/ttmp/mei-preds/pred_common_{}.txt".format(j), 'w') as fout:
            fout.write(new_text)

In [None]:
SEED = generate_randomized_mei_seed('meter.count= "5" meter.unit= "4"')
for j in range(0, 2000):
    text = learn.predict(SEED, 300, temperature=.8, no_unk=True)

    new_text = ''
    for i, token in enumerate(text.split()):
        if 'music' in token and i != 0:
            new_text += token + ' >'
            break
        else:
            new_text += token + " "
    if new_text[-9:] == '</music >':
        with open("/home/ibukey/ttmp/mei-preds/pred_5_4_{}.txt".format(j), 'w') as fout:
            fout.write(new_text)

In [None]:
SEED = generate_randomized_mei_seed('meter.count= "6" meter.unit= "4"')
for j in range(0, 2000):
    text = learn.predict(SEED, 300, temperature=.8, no_unk=True)

    new_text = ''
    for i, token in enumerate(text.split()):
        if 'music' in token and i != 0:
            new_text += token + ' >'
            break
        else:
            new_text += token + " "
    if new_text[-9:] == '</music >':
        with open("/home/ibukey/ttmp/mei-preds/pred_6_4_{}.txt".format(j), 'w') as fout:
            fout.write(new_text)

In [None]:
SEED = generate_randomized_mei_seed('meter.count= "7" meter.unit= "4"')
for j in range(0, 2000):
    text = learn.predict(SEED, 300, temperature=.8, no_unk=True)

    new_text = ''
    for i, token in enumerate(text.split()):
        if 'music' in token and i != 0:
            new_text += token + ' >'
            break
        else:
            new_text += token + " "
    if new_text[-9:] == '</music >':
        with open("/home/ibukey/ttmp/mei-preds/pred_7_4_{}.txt".format(j), 'w') as fout:
            fout.write(new_text)

In [None]:
SEED = generate_randomized_mei_seed('meter.count= "3" meter.unit= "8"')
for j in range(0, 2000):
    text = learn.predict(SEED, 300, temperature=.8, no_unk=True)

    new_text = ''
    for i, token in enumerate(text.split()):
        if 'music' in token and i != 0:
            new_text += token + ' >'
            break
        else:
            new_text += token + " "
    if new_text[-9:] == '</music >':
        with open("/home/ibukey/ttmp/mei-preds/pred_3_8_{}.txt".format(j), 'w') as fout:
            fout.write(new_text)

In [None]:
SEED = generate_randomized_mei_seed('meter.count= "6" meter.unit= "8"')
for j in range(0, 2000):
    text = learn.predict(SEED, 300, temperature=.8, no_unk=True)

    new_text = ''
    for i, token in enumerate(text.split()):
        if 'music' in token and i != 0:
            new_text += token + ' >'
            break
        else:
            new_text += token + " "
    if new_text[-9:] == '</music >':
        with open("/home/ibukey/ttmp/mei-preds/pred_6_8_{}.txt".format(j), 'w') as fout:
            fout.write(new_text)