# RNN for Text Generation

In [None]:
import torch
from torch import nn
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt

## Get Text Data

In [None]:
with open('songs.txt', 'r', encoding='utf8') as f:
    text = f.read()
print(len(text))
print(text[:1000])

200670
X:1
T:Alexander's
Z: id:dc-hornpipe-1
M:C|
L:1/8
K:D Major
(3ABc|dAFA DFAd|fdcd FAdf|gfge fefd|(3efe (3dcB A2 (3ABc|!
dAFA DFAd|fdcd FAdf|gfge fefd|(3efe dc d2:|!
AG|FAdA FAdA|GBdB GBdB|Acec Acec|dfaf gecA|!
FAdA FAdA|GBdB GBdB|Aceg fefd|(3efe dc d2:|!

X:2
T:An Buachaill Dreoite
Z: id:dc-hornpipe-2
M:C|
L:1/8
K:G Major
GF|DGGB d2GB|d2GF Gc (3AGF|DGGB d2GB|dBcA F2GF|!
DGGB d2GF|DGGF G2Ge|fgaf gbag|fdcA G2:|!
GA|B2BG c2cA|d2GF G2GA|B2BG c2cA|d2DE F2GA|!
B2BG c2cA|d^cde f2 (3def|g2gf gbag|fdcA G2:|!

X:3
T:Belfast
Z: id:dc-hornpipe-3
M:C|
L:1/8
K:D Major
ag|(3faf df AdFA|DFAd f2ef|gbec dfAF|GABG E2ag|!
(3faf df AdFA|DFAd f2ef|gbed cABc|d2f2 d2:|!
(3DEF|GFGA Bcde|fgfe dcdB|A2f2 fef2|G2e2 ede2|!
GFGA Bcde|fgfe dcdB|Afed cABc|d2f2 d2:|!
ag|(3fgf (3efe (3ded (3cdc|(3BcB (3ABA G2ba|(3gag (3fgf (3efe (3ded|(3cdc (3BcB A2ag|!
(3fgf (3efe (3ded (3cdc|(3BcB (3ABA (3GAG (3FGF|Eged cABc|d2f2 d2:|!

X:4
T:Blackbird
Z: id:dc-hornpipe-4
M:C|
L:1/8
K:D Mixolydian
AG|F2FA GFD2|de (3fed d^cAF|G2GF

### Encode Entire Text

In [None]:
all_characters = set(text)
decoder = dict(enumerate(all_characters))

In [None]:
decoder

{0: '5',
 1: 'y',
 2: 'p',
 3: '8',
 4: 'h',
 5: 'O',
 6: '.',
 7: '9',
 8: '#',
 9: 'g',
 10: "'",
 11: '2',
 12: '\n',
 13: 'P',
 14: 'U',
 15: 'l',
 16: '4',
 17: 't',
 18: 'M',
 19: 'b',
 20: 'x',
 21: '=',
 22: '!',
 23: '"',
 24: '/',
 25: 'u',
 26: '<',
 27: ']',
 28: '_',
 29: '(',
 30: '0',
 31: '6',
 32: 'a',
 33: 'S',
 34: 's',
 35: '[',
 36: 'z',
 37: 'X',
 38: ',',
 39: 'k',
 40: 'G',
 41: '-',
 42: 'B',
 43: 'D',
 44: '7',
 45: '3',
 46: 'K',
 47: 'W',
 48: 'n',
 49: 'v',
 50: 'J',
 51: 'F',
 52: '>',
 53: 'H',
 54: 'Q',
 55: 'Z',
 56: '^',
 57: 'R',
 58: 'e',
 59: 'd',
 60: 'L',
 61: ')',
 62: '1',
 63: ':',
 64: ' ',
 65: 'I',
 66: 'V',
 67: 'j',
 68: 'o',
 69: 'w',
 70: 'C',
 71: 'A',
 72: '|',
 73: 'i',
 74: 'c',
 75: 'N',
 76: 'E',
 77: 'T',
 78: 'q',
 79: 'f',
 80: 'Y',
 81: 'r',
 82: 'm'}

In [None]:
encoder = {char: ind for ind,char in decoder.items()}

In [None]:
encoder

{'5': 0,
 'y': 1,
 'p': 2,
 '8': 3,
 'h': 4,
 'O': 5,
 '.': 6,
 '9': 7,
 '#': 8,
 'g': 9,
 "'": 10,
 '2': 11,
 '\n': 12,
 'P': 13,
 'U': 14,
 'l': 15,
 '4': 16,
 't': 17,
 'M': 18,
 'b': 19,
 'x': 20,
 '=': 21,
 '!': 22,
 '"': 23,
 '/': 24,
 'u': 25,
 '<': 26,
 ']': 27,
 '_': 28,
 '(': 29,
 '0': 30,
 '6': 31,
 'a': 32,
 'S': 33,
 's': 34,
 '[': 35,
 'z': 36,
 'X': 37,
 ',': 38,
 'k': 39,
 'G': 40,
 '-': 41,
 'B': 42,
 'D': 43,
 '7': 44,
 '3': 45,
 'K': 46,
 'W': 47,
 'n': 48,
 'v': 49,
 'J': 50,
 'F': 51,
 '>': 52,
 'H': 53,
 'Q': 54,
 'Z': 55,
 '^': 56,
 'R': 57,
 'e': 58,
 'd': 59,
 'L': 60,
 ')': 61,
 '1': 62,
 ':': 63,
 ' ': 64,
 'I': 65,
 'V': 66,
 'j': 67,
 'o': 68,
 'w': 69,
 'C': 70,
 'A': 71,
 '|': 72,
 'i': 73,
 'c': 74,
 'N': 75,
 'E': 76,
 'T': 77,
 'q': 78,
 'f': 79,
 'Y': 80,
 'r': 81,
 'm': 82}

In [None]:
encoded_text = np.array([encoder[char] for char in text])
encoded_text[:100]

array([37, 63, 62, 12, 77, 63, 71, 15, 58, 20, 32, 48, 59, 58, 81, 10, 34,
       12, 55, 63, 64, 73, 59, 63, 59, 74, 41,  4, 68, 81, 48,  2, 73,  2,
       58, 41, 62, 12, 18, 63, 70, 72, 12, 60, 63, 62, 24,  3, 12, 46, 63,
       43, 64, 18, 32, 67, 68, 81, 12, 29, 45, 71, 42, 74, 72, 59, 71, 51,
       71, 64, 43, 51, 71, 59, 72, 79, 59, 74, 59, 64, 51, 71, 59, 79, 72,
        9, 79,  9, 58, 64, 79, 58, 79, 59, 72, 29, 45, 58, 79, 58])

### One Hot Encoding

In [None]:
def one_hot_encoder(encoded_text, num_uni_chars):
    one_hot = np.zeros((encoded_text.size, num_uni_chars))
    one_hot = one_hot.astype(np.float32)
    one_hot[np.arange(one_hot.shape[0]), encoded_text.flatten()] = 1.0
    return one_hot.reshape((*encoded_text.shape, num_uni_chars))

In [None]:
one_hot_encoder(encoded_text[:3], len(set(text)))

array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0.

## Creating Training Batches

In [None]:
def generate_batches(encoded_text, samp_per_batch=10, seq_len=50):
    char_per_batch = samp_per_batch * seq_len
    num_batches_avail = int(len(encoded_text)/char_per_batch)
    encoded_text = encoded_text[:num_batches_avail * char_per_batch]
    encoded_text = encoded_text.reshape((samp_per_batch, -1))
    for n in range(0, encoded_text.shape[1], seq_len):
        x = encoded_text[:, n:n+seq_len]
        y = np.zeros_like(x)
        try:
            y[:, :-1] = x[:, 1:]
            y[:, -1]  = encoded_text[:, n+seq_len]
        except:
            y[:, :-1] = x[:, 1:]
            y[:, -1] = encoded_text[:, 0]
        yield x, y

## Creating the LSTM Model

In [None]:
class CharModel(nn.Module):
    def __init__(self, all_chars, num_hidden=256, num_layers=4, drop_prob=0.5, use_gpu=False):
        super().__init__()
        self.drop_prob = drop_prob
        self.num_layers = num_layers
        self.num_hidden = num_hidden
        self.use_gpu = use_gpu
        self.all_chars = all_chars
        self.decoder = dict(enumerate(all_chars))
        self.encoder = {char: ind for ind,char in decoder.items()}
        self.lstm = nn.LSTM(len(self.all_chars), num_hidden, num_layers, dropout=drop_prob, batch_first=True)
        self.dropout = nn.Dropout(drop_prob)
        self.fc_linear = nn.Linear(num_hidden, len(self.all_chars))

    def forward(self, x, hidden):
        lstm_output, hidden = self.lstm(x, hidden)
        drop_output = self.dropout(lstm_output)
        drop_output = drop_output.contiguous().view(-1, self.num_hidden)
        final_out = self.fc_linear(drop_output)
        return final_out, hidden

    def hidden_state(self, batch_size):
        if self.use_gpu:
            hidden = (torch.zeros(self.num_layers,batch_size, self.num_hidden).cuda(),
                     torch.zeros(self.num_layers, batch_size, self.num_hidden).cuda())
        else:
            hidden = (torch.zeros(self.num_layers, batch_size, self.num_hidden),
                     torch.zeros(self.num_layers, batch_size, self.num_hidden))
        return hidden

### Instance of the Model

In [None]:
model = CharModel(all_chars=all_characters, num_hidden=512, num_layers=3, drop_prob=0.5, use_gpu=torch.cuda.is_available())
optimizer = torch.optim.Adam(model.parameters(),lr=0.001)
criterion = nn.CrossEntropyLoss()

In [None]:
total_param = []
for p in model.parameters():
    total_param.append(int(p.numel()))
sum(total_param)

5467731

Try to make the total_parameters be roughly the same magnitude as the number of characters in the text.

### Training Data and Validation Data

In [None]:
# percentage of data to be used for training
train_percent = 0.7
train_ind = int(len(encoded_text) * (train_percent))

In [None]:
train_data = encoded_text[:train_ind]
val_data = encoded_text[train_ind:]

# Training the Network

In [None]:
epochs = 40
batch_size = 128
seq_len = 100
tracker = 0
num_char = max(encoded_text)+1

In [None]:
model.train()
if model.use_gpu:
    model.cuda()

for i in range(epochs):
    hidden = model.hidden_state(batch_size)
    for x, y in generate_batches(train_data, batch_size, seq_len):
        tracker += 1
        x = one_hot_encoder(x, num_char)
        inputs = torch.from_numpy(x)
        targets = torch.from_numpy(y)
        if model.use_gpu:
            inputs = inputs.cuda()
            targets = targets.cuda()
        hidden = tuple([state.data for state in hidden])
        lstm_output, hidden = model.forward(inputs, hidden)
        loss = criterion(lstm_output,targets.view(batch_size*seq_len).long())
        model.zero_grad()
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), max_norm=5)
        optimizer.step()

        if tracker % 40 == 0:
            val_hidden = model.hidden_state(batch_size)
            val_losses = []
            model.eval()
            for x, y in generate_batches(val_data, batch_size, seq_len):
                x = one_hot_encoder(x,num_char)
                inputs = torch.from_numpy(x)
                targets = torch.from_numpy(y)
                if model.use_gpu:
                    inputs = inputs.cuda()
                    targets = targets.cuda()
                val_hidden = tuple([state.data for state in val_hidden])
                lstm_output, val_hidden = model.forward(inputs,val_hidden)
                val_loss = criterion(lstm_output,targets.view(batch_size*seq_len).long())
                val_losses.append(val_loss.item())
            model.train()
            print(f"Epoch: {i} Step: {tracker} Val Loss: {val_loss.item()}")

Epoch: 3 Step: 40 Val Loss: 3.4219772815704346
Epoch: 7 Step: 80 Val Loss: 3.1488876342773438
Epoch: 11 Step: 120 Val Loss: 2.856606960296631
Epoch: 15 Step: 160 Val Loss: 2.390146255493164
Epoch: 19 Step: 200 Val Loss: 2.004641056060791
Epoch: 23 Step: 240 Val Loss: 1.8238532543182373
Epoch: 27 Step: 280 Val Loss: 1.7271721363067627
Epoch: 31 Step: 320 Val Loss: 1.6527515649795532
Epoch: 35 Step: 360 Val Loss: 1.6052765846252441
Epoch: 39 Step: 400 Val Loss: 1.5591416358947754


## Generating Predictions

In [None]:
def predict_next_char(model, char, hidden=None, k=1):
    encoded_text = model.encoder[char]
    encoded_text = np.array([[encoded_text]])
    encoded_text = one_hot_encoder(encoded_text, len(model.all_chars))
    inputs = torch.from_numpy(encoded_text)
    if (model.use_gpu):
        inputs = inputs.cuda()
    hidden = tuple([state.data for state in hidden])
    lstm_out, hidden = model(inputs, hidden)
    probs = F.softmax(lstm_out, dim=1).data
    if (model.use_gpu):
        probs = probs.cpu()
    probs, index_positions = probs.topk(k)
    index_positions = index_positions.numpy().squeeze()
    probs = probs.numpy().flatten()
    probs = probs/probs.sum()
    char = np.random.choice(index_positions, p=probs)
    return model.decoder[char], hidden

In [None]:
def generate_text(model, size, seed='The', k=1):
    if(model.use_gpu):
        model.cuda()
    else:
        model.cpu()
    model.eval()
    output_chars = [c for c in seed]
    hidden = model.hidden_state(1)
    for char in seed:
        char, hidden = predict_next_char(model, char, hidden, k=k)
    output_chars.append(char)
    for i in range(size):
        char, hidden = predict_next_char(model, output_chars[-1], hidden, k=k)
        output_chars.append(char)
    return ''.join(output_chars)

In [None]:
print(generate_text(model, 1000, seed='The ', k=3))

NameError: name 'model' is not defined

## Music generation

Now, we will explore using the previous Recurrent Neural Network for music generation. We will train the model to learn the patterns in raw sheet music in ABC notation and then use this model to generate new music.

In [None]:
!pip install mitdeeplearning
!apt-get install abcmidi timidity
import mitdeeplearning as mdl
from IPython import display as ipythondisplay

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
abcmidi is already the newest version (20220218+ds1-1).
timidity is already the newest version (2.14.0-8ubuntu1).
0 upgraded, 0 newly installed, 0 to remove and 35 not upgraded.


In [None]:
with open('songs.txt', 'r', encoding='utf8') as f:
    textABC = f.read()
songs = mdl.lab1.extract_song_snippet(textABC)
print("\nExample song: ")
print(songs[3])

Found 817 songs in text

Example song: 
X:4
T:Blackbird
Z: id:dc-hornpipe-4
M:C|
L:1/8
K:D Mixolydian
AG|F2FA GFD2|de (3fed d^cAF|G2GF GFDE|FdcA G2AG|!
F2FA GFD2|de (3fed d^cAG|AdcA GcAG|F2D2 D2:|!
fg|agfa gfeg|fd e^c d=cA2|agfa gfde|fdgf e2 fg|!
a2ge f3e|d^cde fdAG|AdcA GcAG|F2D2 D2:|!


In [None]:
mdl.lab1.play_song(songs[5])

Output hidden; open in https://colab.research.google.com to view.

In [None]:
all_chars = sorted(set(textABC))

# TODO

In [None]:
generated_text = generate_text(model, 1000, seed="X:", k=3)
generated_songs = mdl.lab1.extract_song_snippet(generated_text)
for i, song in enumerate(generated_songs):
    waveform = mdl.lab1.play_song(song)
    if waveform:
        print("Generated song", i)
        ipythondisplay.display(waveform)

Output hidden; open in https://colab.research.google.com to view.