In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt
import os
import re
import functools
from tqdm.notebook import tqdm
import subprocess
import glob
from IPython import display as ipythondisplay
from torch.utils.data import Dataset, DataLoader

In [2]:
def make_datapath_list(rootpath):
    alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
    

    path_list = []
    
    for c in tqdm(alphabet):
        target_path = os.path.join(rootpath + c + '/*.abc')
        for path in glob.glob(target_path):
            path_list.append(path)

    return path_list

In [3]:
def extract_song_data(text):
    # extract song from abc notation
    # remove header
    text = re.sub(r'%%.*', '', text)
    text = text.strip()
    text = re.sub(r'\n{2,}',r'\n',text)
    return text

In [4]:
def save_song_to_abc(song, filename):
    save_name = "/result/{}.abc".format(filename)
    with open(save_name, 'w') as f:
        f.write(song)
    
    return filename

def abc2wav(abc_file):
    suf = abc_file.rsrip('.abc')
    cmd = "abc2midi {} -o {}".format(abc_file, suf + ".mid")
    os.system(cmd)
    cmd = "timidity {}.mid -Ow {}.wav".format(suf, suf)

    return os.system()

def play_wav(wav_file):
    return ipythondisplay.Audio(wav_file)

def play_song(song):
    basename = save_song_to_abc(song)
    ret = abc2wav(basename + ".abc")
    if ret == 0:
        return play_wav(basename + ".wav")
    
    return None

In [5]:
cwd = os.getcwd()
rootpath = cwd + '/notation/'
notation_list = make_datapath_list(rootpath)

  0%|          | 0/26 [00:00<?, ?it/s]

In [6]:
songs = []

for notation in tqdm(notation_list):
    with open(notation, 'r') as f:
        text = f.read()
        song = extract_song_data(text)
        songs.append(song)

  0%|          | 0/1744 [00:00<?, ?it/s]

In [7]:
example_song = songs[119]
print("Example song: ")
print(example_song)

Example song: 
X: 0
T: Austria
Z: Franz Joseph Hayden, 1797
Z: Public  domain
L: 1/4
M: 4/4
V: P1 name="Unnamed-000"
V: P2 name="Unnamed-001"
V: P3 name="Tempo Track"
K: Eb
[V: P1]  [B,3/E3/] [B,/F/] [EG] [DF] | [FA] [EG] [D/F/]D/ [B,E] | [C/c/]D/ [EB] [FA] [EG] | [CF] [E/G/]E/ [D2B2] | [B,3/E3/] [B,/F/] [EG] [DF] | [FA] [EG] [D/F/]D/ [B,E] | [C/c/]D/ [EB] [FA] [EG] | [CF] [E/G/]E/ [D2B2] | [B,F] [B,G] [B,/F/]D/ B, | [C/A/]D/ [EG] [D/F/][B,/D/] B, | [B,B] [C/A/]D/ [E3/G3/] [E/G/] | [E3/=A3/] [E/A/] [D2B2] | [E3/e3/] [E/d/] [E/d/]c/ [EB] | [E3/c3/] [E/B/] [D/B/]A/ [EG] | [DF] [D/G/]A/ [E/B/]c/ [C/A/]F/ | [B,E] [D/G/]F/ E2- | E2z2|]
[V: P2]  [E,3/G,3/] [E,/A,/] [E,B,] [B,,B,] | [D,B,] [E,B,] [B,,A,] [E,G,] | A, [G,B,] [D,B,] [E,B,] | [A,,A,] [=A,,C] [B,,2F,2] | [E,3/G,3/] [E,/A,/] [E,B,] [B,,B,] | [D,B,] [E,B,] [B,,A,] [E,G,] | A, [G,B,] [D,B,] [E,B,] | [A,,A,] [=A,,C] [B,,2F,2] | [B,,D,] [B,,E,] [B,,/D,/]F,/ [B,,/A,/]G,/ | [B,,F,] [B,,/E,/]G,/ [B,,/B,/]F,/ [B,,/D,/]A,,/ | [G,,G,] [A,,/F

In [8]:
# define a class to read data
# this class is used to read data from a list of songs and return a data
# the input is a list of songs, sequence length, batch size
# the output is a batch of data

class SongDataset(Dataset):
    def __init__(self, songs, seq_len):
        self.songs = songs
        self.seq_len = seq_len
        self.vocab = set()
        self.vocab_to_int = {}
        self.int_to_vocab = {}
        self.vocab_size = 0
        self.song_index = 0
        self.song = self.songs[self.song_index]
        self.song_len = len(self.song)
        self.song_index += 1
        self.create_vocab()
        self.create_int()
        self.n_batch = int(self.song_len / self.seq_len)
        
    def create_vocab(self):
        for song in self.songs:
            self.vocab = self.vocab.union(set(song))
        self.vocab = sorted(self.vocab)
        self.vocab_size = len(self.vocab)
        
    def create_int(self):
        self.vocab_to_int = {c: i for i, c in enumerate(self.vocab)}
        self.int_to_vocab = dict(enumerate(self.vocab))
        
    def __len__(self):
        return self.n_batch
    
    def __getitem__(self, index):
        x = np.zeros((self.seq_len))
        y = np.zeros((self.seq_len))

        for i in range(self.seq_len):
            x[i] = self.vocab_to_int[self.song[index * self.seq_len + i]]
            y[i] = self.vocab_to_int[self.song[index * self.seq_len + i + 1]]
            
        return x, y

In [9]:
# define a class to build a RNN model
# the input is the number of hidden units, the number of layers, the size of the vocabulary, the batch size, the sequence length

class RNN(nn.Module):
    def __init__(self, n_hidden, n_layers, embedding_dim, vocab_size, batch_size, seq_len):
        super(RNN, self).__init__()
        self.n_hidden = n_hidden
        self.n_layers = n_layers
        self.vocab_size = vocab_size
        self.batch_size = batch_size
        self.seq_len = seq_len
        self.embed = nn.Embedding(self.vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, self.n_hidden, self.n_layers, batch_first=True)
        self.fc = nn.Linear(self.n_hidden, self.vocab_size)
        
    def forward(self, x, hidden, cell):
        x = self.embed(x)
        r_out, (hidden, cell) = self.lstm(x, (hidden,cell))

        r_out = r_out.contiguous().view(-1, self.n_hidden)
        
        out = self.fc(r_out)
        
        return out, hidden, cell
    
    def init_hidden(self):
        hidden = torch.zeros(self.n_layers, self.batch_size, self.n_hidden).cuda()
        cell = torch.zeros(self.n_layers, self.batch_size, self.n_hidden).cuda()
        
        return hidden, cell

In [10]:
# define a training function
# the input is the model, the number of epochs, optimzer, loss function, the dataloader, the sequence length, the batch size

def train(model, epochs, optimizer, criterion, dataloader, seq_len, batch_size):
    model.train()
    for epoch in range(epochs):
        
        h, c = model.init_hidden()

        print(h.shape)
        
        for x, y in dataloader:
            x = x.long().to(DEVICE)
            y = y.long().to(DEVICE)

            model.zero_grad()
            
            output, h, c = model(x, h, c)
            
            loss = criterion(output.view(batch_size * seq_len, -1), y.view(batch_size * seq_len))
            # loss = criterion(output, y)
            loss_value=loss.item()
            loss.backward(retain_graph=True)
            optimizer.step()

        print("Epoch: {}/{}...".format(epoch + 1, epochs),
                "Loss: {:.4f}...".format(loss.item()))

In [11]:
# define a function to generate new text
# the input is the model, dataset, the size of the vocabulary, the number of characters to generate, the start string

def generate(model, dataset, vocab_size, size, start='X'):
    model.eval()
    chars = [ch for ch in start]
    hidden = model.init_hidden()
    x = np.zeros((1, 1))
    x[0, 0] = dataset.vocab_to_int[start]
    x = torch.from_numpy(x).long()
    
    for i in range(size):
        hidden = tuple([each.data for each in hidden])
        output, hidden = model(x, hidden)
        p = F.softmax(output, dim=1).data
        if dataset.int_to_vocab[int(torch.max(p, 1)[1])] == 'E':
            break
        else:
            chars.append(dataset.int_to_vocab[int(torch.max(p, 1)[1])])
            x = np.zeros((1, 1))
            x[0, 0] = dataset.vocab_to_int[dataset.int_to_vocab[int(torch.max(p, 1)[1])]]
            x = torch.from_numpy(x).long()
            
    return ''.join(chars)

In [12]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
BATCH_SIZE = 16
SEQ_LEN = 10
N_EPOCHS = 100
N_HIDDEN = 512
N_LAYERS = 3
LEARNING_RATE = 0.001
EMBEDDING_DIM = 256

In [13]:
songdata = SongDataset(songs, SEQ_LEN)

In [14]:
trainloader = DataLoader(songdata, batch_size=BATCH_SIZE, shuffle=True)

In [15]:
batch_iterator = iter(trainloader)
inputs, targets = next(batch_iterator)
print(inputs)
print(targets.shape)

tensor([[57., 37., 10., 17., 13., 34., 17., 13., 58.,  1.],
        [37., 13.,  1., 88.,  1., 57., 43., 24.,  1., 20.],
        [57., 37., 10., 34., 58.,  1., 57., 37., 10., 13.],
        [74., 62., 70., 75.,  0., 42., 24.,  1., 15., 13.],
        [54., 24.,  1., 14.,  0., 50., 24.,  1., 31., 75.],
        [10., 34., 58.,  1., 57., 37., 10., 13., 34., 13.],
        [58.,  1., 57., 37., 10., 34., 58.,  1., 57., 37.],
        [13., 34., 17., 13., 58.,  1., 88.,  1., 57., 37.],
        [13., 37., 13.,  1., 31.,  1., 34., 13.,  1., 88.],
        [43., 24.,  1., 23., 13., 22., 58.,  1.,  1., 31.],
        [27.,  3., 74., 66., 73., 76., 65., 86.,  3.,  0.],
        [24.,  1., 20., 13., 22., 58.,  1.,  1., 57., 37.],
        [13., 34., 17., 13., 58.,  1., 57., 37., 10., 17.],
        [13.,  1., 88.,  1., 31., 13., 37., 13., 31., 13.],
        [88.,  1., 57., 37., 10., 34., 58.,  1., 57., 37.],
        [10., 17., 13., 34., 17., 13., 58.,  1., 88.,  1.]],
       dtype=torch.float64)
torch.Size(

In [16]:
model = RNN(N_HIDDEN, N_LAYERS, EMBEDDING_DIM, songdata.vocab_size, BATCH_SIZE, SEQ_LEN)
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
loss_fn = nn.CrossEntropyLoss()

In [17]:
model = model.to(DEVICE)

In [18]:
train(model, N_EPOCHS, optimizer, loss_fn, trainloader, SEQ_LEN, BATCH_SIZE)

torch.Size([3, 16, 512])


RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.FloatTensor [2048, 256]] is at version 3; expected version 1 instead. Hint: enable anomaly detection to find the operation that failed to compute its gradient, with torch.autograd.set_detect_anomaly(True).