In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt
import os
import re
import functools
from tqdm.notebook import tqdm
import subprocess
import glob
from IPython import display as ipythondisplay
from torch.utils.data import Dataset, DataLoader
from torch.distributions import Categorical

In [2]:
def make_datapath_list(rootpath):
    alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
    

    path_list = []
    
    for c in tqdm(alphabet):
        target_path = os.path.join(rootpath + c + '/*.abc')
        for path in glob.glob(target_path):
            path_list.append(path)

    return path_list

In [3]:
def extract_song_data(text):
    # extract song from abc notation
    # remove header
    text = re.sub(r'%%.*', '', text)
    text = re.sub(r'Z:.*\n', '', text)
    text = re.sub(r'K:.*\n', '', text)
    text = re.sub(r'Q:.*\n', '', text)
    text = text.strip()
    text = re.sub(r'\n{2,}',r'\n',text)
    return text

In [4]:
cwd = os.getcwd()
rootpath = cwd + '/notation/'
notation_list = make_datapath_list(rootpath)

  0%|          | 0/26 [00:00<?, ?it/s]

In [5]:
songs = []

for notation in tqdm(notation_list):
    with open(notation, 'r') as f:
        text = f.read()
        song = extract_song_data(text)
        songs.append(song)

  0%|          | 0/1744 [00:00<?, ?it/s]

In [6]:
example_song = songs[119]
print("Example song: ")
print(example_song)

Example song: 
X: 0
T: Austria
L: 1/4
M: 4/4
V: P1 name="Unnamed-000"
V: P2 name="Unnamed-001"
V: P3 name="Tempo Track"
[V: P1]  [B,3/E3/] [B,/F/] [EG] [DF] | [FA] [EG] [D/F/]D/ [B,E] | [C/c/]D/ [EB] [FA] [EG] | [CF] [E/G/]E/ [D2B2] | [B,3/E3/] [B,/F/] [EG] [DF] | [FA] [EG] [D/F/]D/ [B,E] | [C/c/]D/ [EB] [FA] [EG] | [CF] [E/G/]E/ [D2B2] | [B,F] [B,G] [B,/F/]D/ B, | [C/A/]D/ [EG] [D/F/][B,/D/] B, | [B,B] [C/A/]D/ [E3/G3/] [E/G/] | [E3/=A3/] [E/A/] [D2B2] | [E3/e3/] [E/d/] [E/d/]c/ [EB] | [E3/c3/] [E/B/] [D/B/]A/ [EG] | [DF] [D/G/]A/ [E/B/]c/ [C/A/]F/ | [B,E] [D/G/]F/ E2- | E2z2|]
[V: P2]  [E,3/G,3/] [E,/A,/] [E,B,] [B,,B,] | [D,B,] [E,B,] [B,,A,] [E,G,] | A, [G,B,] [D,B,] [E,B,] | [A,,A,] [=A,,C] [B,,2F,2] | [E,3/G,3/] [E,/A,/] [E,B,] [B,,B,] | [D,B,] [E,B,] [B,,A,] [E,G,] | A, [G,B,] [D,B,] [E,B,] | [A,,A,] [=A,,C] [B,,2F,2] | [B,,D,] [B,,E,] [B,,/D,/]F,/ [B,,/A,/]G,/ | [B,,F,] [B,,/E,/]G,/ [B,,/B,/]F,/ [B,,/D,/]A,,/ | [G,,G,] [A,,/F,/]B,,/ [C,3/E,3/] [C,/C/] | [F,3/C3/] [F,,/F,/] [B,,

In [7]:
# define a class to read data
# this class is used to read data from a list of songs and return a data
# the input is a list of songs, sequence length, batch size
# the output is a batch of data

class SongDataset(Dataset):
    def __init__(self, songs, seq_len):
        self.songs = '\n\n'.join(songs)
        self.seq_len = seq_len
        self.vocab = set()
        self.vocab_to_int = {}
        self.int_to_vocab = {}
        self.vocab_size = 0
        self.song_len = len(self.songs)
        self.create_vocab()
        self.create_int()
        self.n_batch = int(self.song_len / self.seq_len)
        
    def create_vocab(self):
        self.vocab = set(self.songs)
        self.vocab = sorted(self.vocab)
        self.vocab_size = len(self.vocab)
        
    def create_int(self):
        self.vocab_to_int = {c: i for i, c in enumerate(self.vocab)}
        self.int_to_vocab = dict(enumerate(self.vocab))
        
    def __len__(self):
        return self.n_batch
    
    def __getitem__(self, index):
        index = index * self.seq_len
        x_str = self.songs[index : index + self.seq_len]
        y_str = self.songs[index+1 : index + self.seq_len + 1]
        
        if len(x_str) < self.seq_len:
            x_str = x_str + ' ' * (self.seq_len - len(x_str))

        if len(y_str) < self.seq_len:
            y_str = y_str + ' ' * (self.seq_len - len(y_str))
        

        x = torch.tensor([self.vocab_to_int[c] for c in x_str])
        y = torch.tensor([self.vocab_to_int[c] for c in y_str])

        return x, y

In [8]:
# define a class to build a RNN model
# the input is the number of hidden units, the number of layers, the size of the vocabulary, the batch size, the sequence length

class RNN(nn.Module):
    def __init__(self, n_hidden, n_layers, embedding_dim, vocab_size, batch_size, seq_len):
        super(RNN, self).__init__()
        self.n_hidden = n_hidden
        self.n_layers = n_layers
        self.vocab_size = vocab_size
        self.batch_size = batch_size
        self.seq_len = seq_len
        self.embed = nn.Embedding(self.vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, self.n_hidden, self.n_layers, batch_first=True)
        self.fc = nn.Linear(self.n_hidden, self.vocab_size)
        
    def forward(self, x, prev_state):
        x = self.embed(x)
        r_out, hidden = self.lstm(x, prev_state)
        
        out = self.fc(r_out)
        
        return out, hidden
    
    def init_hidden(self):
        weight = next(self.parameters()).data

        hidden = (weight.new(self.n_layers, self.batch_size, self.n_hidden).zero_(),
                    weight.new(self.n_layers, self.batch_size, self.n_hidden).zero_())
        
        return hidden

In [9]:
# define a training function
# the input is the model, the number of epochs, optimzer, loss function, the dataloader, the sequence length, the batch size

def train(model, epochs, optimizer, criterion, dataloader, seq_len, batch_size):
    model.train()
    for epoch in tqdm(range(epochs)):
        h = model.init_hidden()
        for x, y in dataloader:
            x = x.to(DEVICE)
            y = y.to(DEVICE)
            h = tuple([e.data for e in h])
            model.zero_grad()
            output, h = model(x, h)
            loss = criterion(output.view(batch_size * seq_len, -1), y.view(batch_size * seq_len))
            loss.backward()
            optimizer.step()

        print("Epoch: {}/{}...".format(epoch+1, epochs),
            "Loss: {:.4f}...".format(loss.item()))

    return model

In [10]:
# define a function to generate new text
# the input is the model, dataset, the size of the vocabulary, the number of characters to generate, the start string

def generate(model, dataset, vocab_size, size, start_str):
    model.eval()
    h = model.init_hidden()
    x = torch.tensor([dataset.vocab_to_int[c] for c in start_str]).to(DEVICE)
    x = x.view(1,-1)
    chars = [c for c in start_str]
    
    for i in range(size):
        h = tuple([e.data for e in h])
        output, h = model(x, h)
        # p = F.softmax(output, dim=1).data
        output = F.softmax(torch.squeeze(output), dim=0)
        dist = Categorical(output)
        index = dist.sample()
        # top_c = np.random.choice(vocab_size, 1, p=p.numpy().ravel())[0]
        chars.append(dataset.int_to_vocab[index.item()])
        x = torch.tensor([index.item()]).to(DEVICE)
        x = x.view(1,-1)
        
    return ''.join(chars)

In [11]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
BATCH_SIZE = 16
SEQ_LEN = 10
N_EPOCHS = 100
N_HIDDEN = 512
N_LAYERS = 3
LEARNING_RATE = 0.001
EMBEDDING_DIM = 256

In [12]:
songdata = SongDataset(songs, SEQ_LEN)

In [13]:
trainloader = DataLoader(songdata, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)

In [14]:
batch_iterator = iter(trainloader)
inputs, targets = next(batch_iterator)
print(inputs)
print(targets.shape)

tensor([[11, 57, 56, 33, 10, 10, 13, 22, 35, 10],
        [32, 57, 86,  1, 56, 35, 10, 17, 13, 18],
        [83, 74, 10,  1, 49, 68, 65,  1, 30, 74],
        [34, 10, 13,  1, 33, 10, 13,  1, 87,  1],
        [10,  1, 31, 10, 10,  1, 87,  1, 30, 10],
        [61, 67, 65,  1, 62, 78, 69, 74, 67, 11],
        [31, 10, 10,  1, 87,  1, 34, 10, 17, 87],
        [35, 17, 13, 63, 17, 13, 57,  1, 56, 34],
        [10, 13, 18,  1, 32, 10,  1, 35, 10,  1],
        [57,  1, 87,  1, 56, 66, 64,  7, 57,  1],
        [ 1, 76, 61, 69, 78,  1, 50, 74, 11,  1],
        [13, 18, 11, 32, 17, 13, 18, 11, 34, 17],
        [86,  1, 56, 35, 36, 58, 30, 57, 86, 16],
        [10, 13,  1, 87,  1,  8, 30, 10, 13, 33],
        [ 0, 56, 51, 24,  1, 45, 16, 57,  1,  1],
        [ 0, 51, 24,  1, 45, 16,  1, 74, 61, 73]])
torch.Size([16, 10])


In [15]:
model = RNN(N_HIDDEN, N_LAYERS, EMBEDDING_DIM, songdata.vocab_size, BATCH_SIZE, SEQ_LEN)
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
loss_fn = nn.CrossEntropyLoss()

In [16]:
model = model.to(DEVICE)

In [17]:
trained_model = train(model, N_EPOCHS, optimizer, loss_fn, trainloader, SEQ_LEN, BATCH_SIZE)

  0%|          | 0/100 [00:00<?, ?it/s]

RuntimeError: Expected hidden[0] size (3, 14, 512), got [3, 16, 512]

In [None]:
song = generate(trained_model, songdata, songdata.vocab_size, 500, "X")

In [None]:
print(song)

X: 0
T: Angelus Ad Virginum
L: Ad Virginum
L: 1/4
M: 6/8
V: P1 name="melody"
V: P1 1/4
M: 6/8
V: P1 name="melody"
V: P1 name="melody"
V: P1 name="melody"
V: P1 name="melody"
M: 6/8
V: P1 name="melody"
V: P1 name="melody"
V: P1 name="melody"
V: P1 name="melody"
V: P1 name="melody"
V: Angelus Ad Virginum
L: 1/4
M: 6/8
V: P1 name="melody"
M: 6/8
V: P1 name=" Virginum
L: 1/4
M: 6/8
V: P1 name="melody"
V: P1 name="melody"
V: P1 name="melody"
V: P1 name="melody"
V: P1 name="melody"
Virginum
L: 1/4
M: 6
