In [64]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt
import os
import re
import functools
from tqdm.notebook import tqdm
import subprocess
import glob
from IPython import display as ipythondisplay

In [57]:
def make_datapath_list(rootpath):
    alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
    

    path_list = []
    
    for c in tqdm(alphabet):
        target_path = os.path.join(rootpath + c + '/*.abc')
        for path in glob.glob(target_path):
            path_list.append(path)

    return path_list

In [35]:
def extract_song_data(text):
    # extract song from abc notation
    # remove header
    text = re.sub(r'%%.*', '', text)
    text = text.strip()
    text = re.sub(r'\n{2,}',r'\n',text)
    return text

In [None]:
def save_song_to_abc(song, filename):
    save_name = "/result/{}.abc".format(filename)
    with open(save_name, 'w') as f:
        f.write(song)
    
    return filename

def abc2wav(abc_file):
    suf = abc_file.rsrip('.abc')
    cmd = "abc2midi {} -o {}".format(abc_file, suf + ".mid")
    os.system(cmd)
    cmd = "timidity {}.mid -Ow {}.wav".format(suf, suf)

    return os.system()

def play_wav(wav_file):
    return ipythondisplay.Audio(wav_file)

def play_song(song):
    basename = save_song_to_abc(song)
    ret = abc2wav(basename + ".abc")
    if ret == 0:
        return play_wav(basename + ".wav")
    
    return None

In [58]:
cwd = os.getcwd()
rootpath = cwd + '/notation/'
notation_list = make_datapath_list(rootpath)

In [61]:
songs = []

for notation in tqdm(notation_list):
    with open(notation, 'r') as f:
        text = f.read()
        song = extract_song_data(text)
        songs.append(song)

  0%|          | 0/1744 [00:00<?, ?it/s]

In [69]:
example_song = songs[119]
print("Example song: ")
print(example_song)

Example song: 
X: 0
T: Austria
Z: Franz Joseph Hayden, 1797
Z: Public  domain
L: 1/4
M: 4/4
V: P1 name="Unnamed-000"
V: P2 name="Unnamed-001"
V: P3 name="Tempo Track"
K: Eb
[V: P1]  [B,3/E3/] [B,/F/] [EG] [DF] | [FA] [EG] [D/F/]D/ [B,E] | [C/c/]D/ [EB] [FA] [EG] | [CF] [E/G/]E/ [D2B2] | [B,3/E3/] [B,/F/] [EG] [DF] | [FA] [EG] [D/F/]D/ [B,E] | [C/c/]D/ [EB] [FA] [EG] | [CF] [E/G/]E/ [D2B2] | [B,F] [B,G] [B,/F/]D/ B, | [C/A/]D/ [EG] [D/F/][B,/D/] B, | [B,B] [C/A/]D/ [E3/G3/] [E/G/] | [E3/=A3/] [E/A/] [D2B2] | [E3/e3/] [E/d/] [E/d/]c/ [EB] | [E3/c3/] [E/B/] [D/B/]A/ [EG] | [DF] [D/G/]A/ [E/B/]c/ [C/A/]F/ | [B,E] [D/G/]F/ E2- | E2z2|]
[V: P2]  [E,3/G,3/] [E,/A,/] [E,B,] [B,,B,] | [D,B,] [E,B,] [B,,A,] [E,G,] | A, [G,B,] [D,B,] [E,B,] | [A,,A,] [=A,,C] [B,,2F,2] | [E,3/G,3/] [E,/A,/] [E,B,] [B,,B,] | [D,B,] [E,B,] [B,,A,] [E,G,] | A, [G,B,] [D,B,] [E,B,] | [A,,A,] [=A,,C] [B,,2F,2] | [B,,D,] [B,,E,] [B,,/D,/]F,/ [B,,/A,/]G,/ | [B,,F,] [B,,/E,/]G,/ [B,,/B,/]F,/ [B,,/D,/]A,,/ | [G,,G,] [A,,/F

In [72]:
joined = "\n\n".join(songs)
vocab = sorted(set(joined))
print("There are", len(vocab), "unique characters in the dataset")

There are 109 unique characters in the dataset


In [66]:
char2idx = {u:i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)

In [73]:
def vectorize_string(string):
    vectorized_list = np.array([char2idx[s] for s in string])
    return vectorized_list

vectorized_songs = vectorize_string(joined)