In [1]:
import os
from PIL import Image
from pathlib import Path
from tqdm import tqdm
import re

In [2]:
def get_accuracy(measures, sixteenths, is_complete):
    if is_complete:
        last_measure = len(measures)
    else:
        last_measure = len(measures) - 1

    acc = 0
    invalid = 0   # dot before note
    for i in range(1, last_measure):  # exclude first and, if not complete, last
        last_duration = 0
        num_sixteenths = 0
        gracenote = False
        ignore = False
        durations = {"sixty_fourth": 0.25, "thirty_second": 0.5, "sixteenth": 1, "eighth": 2, "quarter": 4, 
                     "half": 8, "whole": 16, "double_whole": 32, "quadruple_whole": 64}
        
        for word in measures[i].split():
            if word in durations.keys():
                if not gracenote:
                    last_duration = durations[word]
                    num_sixteenths += last_duration
                else:
                    last_duration = 0
                    gracenote = False
            elif word == "dot" and last_duration != 0:
                last_duration = last_duration * 0.5
                num_sixteenths += last_duration
            elif word == "dotdot" and last_duration != 0:
                last_duration = last_duration * 0.75
                num_sixteenths += last_duration
            elif (word == "dot" or word == "dotdot") and last_duration == 0:
                invalid += 1
                ignore = True
                break
            elif word == 'gracenote':
                gracenote = True
                last_duration = 0
            elif word[:13] == 'timeSignature':
                break
            else:
                last_duration = 0
            
        if num_sixteenths == sixteenths:
            acc += 1

    wrong = last_measure - 1 - (acc + invalid)
        
    return acc, invalid, wrong

In [3]:
def semantic_to_pseudo(contents):
    contents = re.sub('\s+', ' ', contents)
    contents = re.sub('-|_', ' ', contents)
    contents = re.sub('thirty second', 'thirty_second', contents)
    contents = re.sub('sixty fourth', 'sixty_fourth', contents)
    contents = re.sub('quadruple whole', 'quadruple_whole', contents)
    contents = re.sub('double whole', 'double_whole', contents)
    contents = re.sub('clef (\w+)', r'clef-\1', contents)
    contents = re.sub('keySignature (\w+)', r'keySignature-\1', contents)
    contents = re.sub('timeSignature (\w+)', r'timeSignature-\1', contents)
    contents = re.sub('\.\.', ' dotdot', contents)
    contents = re.sub('\.', ' dot', contents)
    return contents

## GPT-2 semantic preds

## Final

In [4]:
SEM_2_4_PATH = Path('/home/macosta/ttmp/generated-semantic-final/timeSignature-2-4')
SEM_3_4_PATH = Path('/home/macosta/ttmp/generated-semantic-final/timeSignature-3-4')
SEM_4_4_PATH = Path('/home/macosta/ttmp/generated-semantic-final/timeSignature-C')
SEM_5_4_PATH = Path('/home/macosta/ttmp/generated-semantic-final/timeSignature-5-4')
SEM_6_4_PATH = Path('/home/macosta/ttmp/generated-semantic-final/timeSignature-6-4')
SEM_7_4_PATH = Path('/home/macosta/ttmp/generated-semantic-final/timeSignature-7-4')
SEM_3_8_PATH = Path('/home/macosta/ttmp/generated-semantic-final/timeSignature-3-8')
SEM_6_8_PATH = Path('/home/macosta/ttmp/generated-semantic-final/timeSignature-6-8')

In [5]:
results_2_4 = []
for file in tqdm(os.listdir(SEM_2_4_PATH)):
    with open(SEM_2_4_PATH/file) as f:
        complete = False
        t = f.read()
        t = semantic_to_pseudo(t)
        if t.split()[-1] == "barline":
            complete = True
        measures = t.split("barline")
    results_2_4.append(get_accuracy(measures, 2*4, complete))

100%|█████████████████████████████████████| 2000/2000 [00:00<00:00, 8136.69it/s]


In [6]:
results_3_4 = []
for file in tqdm(os.listdir(SEM_3_4_PATH)):
    with open(SEM_3_4_PATH/file) as f:
        complete = False
        t = f.read()
        t = semantic_to_pseudo(t)
        if t.split()[-1] == "barline":
            complete = True
        measures = t.split("barline")
    results_3_4.append(get_accuracy(measures, 3*4, complete))

100%|█████████████████████████████████████| 2000/2000 [00:00<00:00, 8411.25it/s]


In [7]:
results_4_4 = []
for file in tqdm(os.listdir(SEM_4_4_PATH)):
    with open(SEM_4_4_PATH/file) as f:
        complete = False
        t = f.read()
        t = semantic_to_pseudo(t)
        if t.split()[-1] == "barline":
            complete = True
        measures = t.split("barline")
    results_4_4.append(get_accuracy(measures, 4*4, complete))

100%|█████████████████████████████████████| 2000/2000 [00:00<00:00, 7987.88it/s]


In [8]:
results_5_4 = []
for file in tqdm(os.listdir(SEM_5_4_PATH)):
    with open(SEM_5_4_PATH/file) as f:
        complete = False
        t = f.read()
        t = semantic_to_pseudo(t)
        if t.split()[-1] == "barline":
            complete = True
        measures = t.split("barline")
    results_5_4.append(get_accuracy(measures, 5*4, complete))

100%|█████████████████████████████████████| 2000/2000 [00:00<00:00, 8808.04it/s]


In [9]:
results_6_4 = []
for file in tqdm(os.listdir(SEM_6_4_PATH)):
    with open(SEM_6_4_PATH/file) as f:
        complete = False
        t = f.read()
        t = semantic_to_pseudo(t)
        if t.split()[-1] == "barline":
            complete = True
        measures = t.split("barline")
    results_6_4.append(get_accuracy(measures, 6*4, complete))

100%|█████████████████████████████████████| 2000/2000 [00:00<00:00, 8979.57it/s]


In [10]:
results_7_4 = []
for file in tqdm(os.listdir(SEM_7_4_PATH)):
    with open(SEM_7_4_PATH/file) as f:
        complete = False
        t = f.read()
        t = semantic_to_pseudo(t)
        if t.split()[-1] == "barline":
            complete = True
        measures = t.split("barline")
    results_7_4.append(get_accuracy(measures, 7*4, complete))

100%|█████████████████████████████████████| 2000/2000 [00:00<00:00, 9097.45it/s]


In [11]:
results_3_8 = []
for file in tqdm(os.listdir(SEM_3_8_PATH)):
    with open(SEM_3_8_PATH/file) as f:
        complete = False
        t = f.read()
        t = semantic_to_pseudo(t)
        if t.split()[-1] == "barline":
            complete = True
        measures = t.split("barline")
    results_3_8.append(get_accuracy(measures, 3*2, complete))

100%|█████████████████████████████████████| 2000/2000 [00:00<00:00, 8258.62it/s]


In [12]:
results_6_8 = []
for file in tqdm(os.listdir(SEM_6_8_PATH)):
    with open(SEM_6_8_PATH/file) as f:
        complete = False
        t = f.read()
        t = semantic_to_pseudo(t)
        if t.split()[-1] == "barline":
            complete = True
        measures = t.split("barline")
    results_6_8.append(get_accuracy(measures, 6*2, complete))

100%|█████████████████████████████████████| 2000/2000 [00:00<00:00, 7060.11it/s]


In [13]:
acc_2_4 = [sum(x) for x in zip(*results_2_4)]
print(acc_2_4)
acc_2_4[0] / (acc_2_4[0] + acc_2_4[-1])

[7111, 2, 1681]


0.808803457688808

In [14]:
acc_3_4 = [sum(x) for x in zip(*results_3_4)]
print(acc_3_4)
acc_3_4[0] / (acc_3_4[0] + acc_3_4[-1])

[6503, 5, 1683]


0.7944050818470559

In [15]:
acc_4_4 = [sum(x) for x in zip(*results_4_4)]
print(acc_4_4)
acc_4_4[0] / (acc_4_4[0] + acc_4_4[-1])

[4622, 1, 1313]


0.7787700084245999

In [16]:
acc_5_4 = [sum(x) for x in zip(*results_5_4)]
print(acc_5_4)
acc_5_4[0] / (acc_5_4[0] + acc_5_4[-1])

[1851, 3, 3422]


0.35103356722928125

In [17]:
acc_6_4 = [sum(x) for x in zip(*results_6_4)]
print(acc_6_4)
acc_6_4[0] / (acc_6_4[0] + acc_6_4[-1])

[4275, 2, 1276]


0.7701315078364259

In [18]:
acc_7_4 = [sum(x) for x in zip(*results_7_4)]
print(acc_7_4)
acc_7_4[0] / (acc_7_4[0] + acc_7_4[-1])

[21, 0, 5926]


0.0035311921977467632

In [19]:
acc_3_8 = [sum(x) for x in zip(*results_3_8)]
print(acc_3_8)
acc_3_8[0] / (acc_3_8[0] + acc_3_8[-1])

[7753, 6, 2310]


0.7704461890092418

In [20]:
acc_6_8 = [sum(x) for x in zip(*results_6_8)]
print(acc_6_8)
acc_6_8[0] / (acc_6_8[0] + acc_6_8[-1])

[5531, 1, 1771]


0.7574637085729937

In [21]:
overall = [sum(x) for x in zip(acc_2_4,acc_3_4,acc_4_4,acc_5_4,acc_6_4,acc_7_4,acc_3_8,acc_6_8)]
print(overall)
overall[0] / (overall[0] + overall[-1])

[37667, 20, 19382]


0.6602569720766359

## AWD-LSTM semantic preds

## Final

In [22]:
SEM_2_4_PATH = Path('/home/ibukey/ttmp/semantic-nonrhythmic-preds/2_4')
SEM_3_4_PATH = Path('/home/ibukey/ttmp/semantic-nonrhythmic-preds/3_4')
SEM_4_4_PATH = Path('/home/ibukey/ttmp/semantic-nonrhythmic-preds/C')
SEM_5_4_PATH = Path('/home/ibukey/ttmp/semantic-nonrhythmic-preds/5_4')
SEM_6_4_PATH = Path('/home/ibukey/ttmp/semantic-nonrhythmic-preds/6_4')
SEM_7_4_PATH = Path('/home/ibukey/ttmp/semantic-nonrhythmic-preds/7_4')
SEM_3_8_PATH = Path('/home/ibukey/ttmp/semantic-nonrhythmic-preds/3_8')
SEM_6_8_PATH = Path('/home/ibukey/ttmp/semantic-nonrhythmic-preds/6_8')

In [23]:
results_2_4 = []
for file in tqdm(os.listdir(SEM_2_4_PATH)):
    with open(SEM_2_4_PATH/file) as f:
        complete = False
        t = f.read()
        if t.split()[-1] == "barline":
            complete = True
        measures = t.split("barline")
    results_2_4.append(get_accuracy(measures, 2*4, complete))

100%|████████████████████████████████████| 2000/2000 [00:00<00:00, 20879.81it/s]


In [24]:
results_3_4 = []
for file in tqdm(os.listdir(SEM_3_4_PATH)):
    with open(SEM_3_4_PATH/file) as f:
        complete = False
        t = f.read()
        if t.split()[-1] == "barline":
            complete = True
        measures = t.split("barline")
    results_3_4.append(get_accuracy(measures, 3*4, complete))

100%|████████████████████████████████████| 2000/2000 [00:00<00:00, 20795.02it/s]


In [25]:
results_4_4 = []
for file in tqdm(os.listdir(SEM_4_4_PATH)):
    with open(SEM_4_4_PATH/file) as f:
        complete = False
        t = f.read()
        if t.split()[-1] == "barline":
            complete = True
        measures = t.split("barline")
    results_4_4.append(get_accuracy(measures, 4*4, complete))

100%|████████████████████████████████████| 2000/2000 [00:00<00:00, 22932.91it/s]


In [26]:
results_5_4 = []
for file in tqdm(os.listdir(SEM_5_4_PATH)):
    with open(SEM_5_4_PATH/file) as f:
        complete = False
        t = f.read()
        if t.split()[-1] == "barline":
            complete = True
        measures = t.split("barline")
    results_5_4.append(get_accuracy(measures, 5*4, complete))

100%|████████████████████████████████████| 2000/2000 [00:00<00:00, 11587.72it/s]


In [27]:
results_6_4 = []
for file in tqdm(os.listdir(SEM_6_4_PATH)):
    with open(SEM_6_4_PATH/file) as f:
        complete = False
        t = f.read()
        if t.split()[-1] == "barline":
            complete = True
        measures = t.split("barline")
    results_6_4.append(get_accuracy(measures, 6*4, complete))

100%|████████████████████████████████████| 2000/2000 [00:00<00:00, 22400.92it/s]


In [28]:
results_7_4 = []
for file in tqdm(os.listdir(SEM_7_4_PATH)):
    with open(SEM_7_4_PATH/file) as f:
        complete = False
        t = f.read()
        if t.split()[-1] == "barline":
            complete = True
        measures = t.split("barline")
    results_7_4.append(get_accuracy(measures, 7*4, complete))

100%|████████████████████████████████████| 2000/2000 [00:00<00:00, 22353.23it/s]


In [29]:
results_3_8 = []
for file in tqdm(os.listdir(SEM_3_8_PATH)):
    with open(SEM_3_8_PATH/file) as f:
        complete = False
        t = f.read()
        if t.split()[-1] == "barline":
            complete = True
        measures = t.split("barline")
    results_3_8.append(get_accuracy(measures, 3*2, complete))

100%|████████████████████████████████████| 2000/2000 [00:00<00:00, 20816.64it/s]


In [30]:
results_6_8 = []
for file in tqdm(os.listdir(SEM_6_8_PATH)):
    with open(SEM_6_8_PATH/file) as f:
        complete = False
        t = f.read()
        if t.split()[-1] == "barline":
            complete = True
        measures = t.split("barline")
    results_6_8.append(get_accuracy(measures, 6*2, complete))

100%|████████████████████████████████████| 2000/2000 [00:00<00:00, 21524.70it/s]


In [31]:
acc_2_4 = [sum(x) for x in zip(*results_2_4)]
print(acc_2_4)
acc_2_4[0] / (acc_2_4[0] + acc_2_4[-1])

[4373, 1, 1446]


0.7515036947929198

In [32]:
acc_3_4 = [sum(x) for x in zip(*results_3_4)]
print(acc_3_4)
acc_3_4[0] / (acc_3_4[0] + acc_3_4[-1])

[4954, 1, 1274]


0.7954399486191394

In [33]:
acc_4_4 = [sum(x) for x in zip(*results_4_4)]
print(acc_4_4)
acc_4_4[0] / (acc_4_4[0] + acc_4_4[-1])

[3187, 1, 921]


0.7758033106134372

In [34]:
acc_5_4 = [sum(x) for x in zip(*results_5_4)]
print(acc_5_4)
acc_5_4[0] / (acc_5_4[0] + acc_5_4[-1])

[11, 1, 3750]


0.0029247540547726668

In [35]:
acc_6_4 = [sum(x) for x in zip(*results_6_4)]
print(acc_6_4)
acc_6_4[0] / (acc_6_4[0] + acc_6_4[-1])

[2563, 1, 1076]


0.7043143720802418

In [36]:
acc_7_4 = [sum(x) for x in zip(*results_7_4)]
print(acc_7_4)
acc_7_4[0] / (acc_7_4[0] + acc_7_4[-1])

[2, 1, 3778]


0.0005291005291005291

In [37]:
acc_3_8 = [sum(x) for x in zip(*results_3_8)]
print(acc_3_8)
acc_3_8[0] / (acc_3_8[0] + acc_3_8[-1])

[5523, 1, 1567]


0.7789844851904091

In [38]:
acc_6_8 = [sum(x) for x in zip(*results_6_8)]
print(acc_6_8)
acc_6_8[0] / (acc_6_8[0] + acc_6_8[-1])

[3283, 2, 1049]


0.7578485687903971

In [39]:
overall = [sum(x) for x in zip(acc_2_4,acc_3_4,acc_4_4,acc_5_4,acc_6_4,acc_7_4,acc_3_8,acc_6_8)]
print(overall)
overall[0] / (overall[0] + overall[-1])

[23896, 9, 14861]


0.6165595892354929