#### Import libraries + change the search path

In [1]:
import numpy as np
import pickle
import os

In [2]:
%cd ..
%cd ..
%cd ..
%cd ..

/Users/nad/hse/2023-24/spring_proj2/res/RESO/generators/neural/transformer
/Users/nad/hse/2023-24/spring_proj2/res/RESO/generators/neural
/Users/nad/hse/2023-24/spring_proj2/res/RESO/generators
/Users/nad/hse/2023-24/spring_proj2/res/RESO


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


#### Data preprocessing

In [3]:
DATA_PATH = 'data/Bach'
notes = []
notes_file_path = os.path.join(DATA_PATH, 'notes')

input_size = 5

with open(notes_file_path, 'rb') as notes_file:
    notes = pickle.load(notes_file)

unique_notes = sorted(set(notes))
note_to_int = dict((note, number) for number, note in enumerate(unique_notes))
int_to_note = dict((number, note) for number, note in enumerate(unique_notes))

input_sequences = []
output_sequences = []

for i in range(len(notes) - 100 * input_size):
    sequence_in = notes[i:i + input_size]
    sequence_out = notes[i + input_size:i + 2 * input_size]
    
    input_sequences.append([note_to_int[char] for char in sequence_in])
    output_sequences.append([note_to_int[char] for char in sequence_out])

num_classes = len(unique_notes)
input_sequences = input_sequences[:408000]
output_sequences = output_sequences[:408000]

#### Setting up the data loader

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torch.nn.utils.rnn import pad_sequence

In [5]:
from torch.utils.data import Dataset, DataLoader

batch_size = 64

class MyDataset(Dataset):
    def __init__(self, data, labels):
        self.data = torch.tensor(data, dtype=torch.long)
        self.labels = torch.tensor(labels, dtype=torch.long)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

data = input_sequences
labels = output_sequences

dataset = MyDataset(data, labels)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

#### Model structure

In [6]:
import torch
import torch.nn as nn
import torch.optim as optim

class CharTransformer(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, num_layers):
        super(CharTransformer, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.transformer = nn.Transformer(
            d_model=embedding_dim,
            nhead=8,
            num_encoder_layers=num_layers,
            num_decoder_layers=num_layers,
            dim_feedforward=hidden_dim,
            batch_first=True
        )
        self.fc = nn.Linear(embedding_dim, vocab_size)

    def forward(self, src, tgt):
        src_emb = self.embedding(src)
        tgt_emb = self.embedding(tgt)
        output = self.transformer(src_emb, tgt_emb)
        output = self.fc(output)
        return output

vocab_size = num_classes
embedding_dim = 64
hidden_dim = 64
num_layers = 2

### Model training

In [7]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [8]:
model = CharTransformer(vocab_size, embedding_dim, hidden_dim, num_layers)

criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

In [9]:
print(len(input_sequences[-1]), len(output_sequences[-1]))

5 5


#### Train loop

In [None]:
model.train() 
num_epochs = 100
for epoch in range(num_epochs):
    total_loss = 0
    cnt = 0
    for inputs, labels in dataloader:
        cnt += 1
        optimizer.zero_grad()
        labels_target = labels[:, :-1]
        labels_loss = labels[:, 1:]
        output = model(inputs, labels_target)
        output = output.view(-1, output.size(-1))
        labels_loss = labels_loss.reshape(-1)
        loss = criterion(output, labels_loss)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        if cnt % 500 == 0:
            print(f'Epoch {epoch + 1}, {cnt}, Loss: {loss.item()}')
    print(f'Epoch {epoch + 1}, Loss: {total_loss}')


Epoch 1, 500, Loss: 5.050426959991455
Epoch 1, 1000, Loss: 4.451923847198486
Epoch 1, 1500, Loss: 4.266657829284668
Epoch 1, 2000, Loss: 3.7818119525909424
Epoch 1, 2500, Loss: 3.975078582763672
Epoch 1, 3000, Loss: 3.777092456817627
Epoch 1, 3500, Loss: 3.8726651668548584
Epoch 1, 4000, Loss: 3.73300838470459
Epoch 1, 4500, Loss: 3.1478729248046875
Epoch 1, 5000, Loss: 3.5711326599121094
Epoch 1, 5500, Loss: 3.2106220722198486
Epoch 1, 6000, Loss: 3.545823097229004
Epoch 1, Loss: 25492.83483362198
Epoch 2, 500, Loss: 3.3260440826416016
Epoch 2, 1000, Loss: 3.1657755374908447
Epoch 2, 1500, Loss: 3.453291654586792
Epoch 2, 2000, Loss: 3.0722949504852295
Epoch 2, 2500, Loss: 3.2846035957336426
Epoch 2, 3000, Loss: 2.941786289215088
Epoch 2, 3500, Loss: 2.9311320781707764
Epoch 2, 4000, Loss: 3.333153486251831
Epoch 2, 4500, Loss: 3.2012500762939453
Epoch 2, 5000, Loss: 3.0027685165405273
Epoch 2, 5500, Loss: 2.9729247093200684
Epoch 2, 6000, Loss: 2.9846343994140625
Epoch 2, Loss: 19877

### Predict

`index` - number of an random sequence of notes to initialize generation

`loaded_model` - model trained in the loop above and with parameters from the previous code

In [10]:
# torch.save(model, 'transformer_epoch50.pth')
loaded_model = torch.load('generators/neural/transformer/torch_transformer/torch_models/transformer_epoch640.pth', map_location=torch.device('cpu'))

In [51]:
import music21
import random


# Predicting loop
def predict(model, input_seq, index, max_length=100):
    model.eval()
    predicted_notes = []
    with torch.no_grad():
        tgt_seq = torch.tensor(output_sequences[index][:-1])
        for i in range(max_length):
            input_seq = input_seq.unsqueeze(0)
            tgt_seq = tgt_seq.unsqueeze(0)
            output = loaded_model(input_seq, tgt_seq)
            pred_char_index = output.argmax(dim=2)[:,-1].item()
            print(pred_char_index, int_to_note[pred_char_index])
            predicted_notes.append(int_to_note[pred_char_index])
            input_seq.squeeze(0)
            tgt_seq.squeeze(0)
            input_seq = input_seq.squeeze(0)
            tgt_seq = tgt_seq.squeeze(0)
            input_seq = input_seq[1:]
            new_tensor = torch.zeros(input_seq.shape[0] + 1, dtype=input_seq.dtype)
            new_tensor[:-1] = input_seq
            new_tensor[-1] = tgt_seq[0]
            input_seq = new_tensor
            tgt_seq = input_seq[1:]
            new_tensor = torch.zeros(tgt_seq.shape[0] + 1, dtype=tgt_seq.dtype)
            new_tensor[:-1] = tgt_seq
            new_tensor[-1] = pred_char_index
            tgt_seq = new_tensor
    return predicted_notes


# Get predicted sequence
index = random.randint(0, len(input_sequences) - 1)
input_seq = torch.tensor(input_sequences[index])
predicted_notes = predict(model, input_seq, index)

# Create midi file
myStream = music21.stream.Stream()
for n in predicted_notes:
    if '.' in n:
        notes_in_chord = [music21.note.Note(x, quarterLength=0.5) for x in n.split('.')]
        chord = music21.chord.Chord(notes_in_chord,
                                    quarterLength=0.5)
        chord.volume.velocity = np.random.randint(90, 100)
        myStream.append(chord)
    else:
        note = music21.note.Note(n, quarterLength=0.5)
        note.volume.velocity = np.random.randint(70, 80)
        myStream.append(note)
myStream.write('midi', fp='generators/neural/transformer/torch_transformer/out.mid')


2871 G#4
2844 F#4.G#3
291 A#3.F4
2508 D#3
2222 C#4.F3
2844 F#4.G#3
291 A#3.F4
2222 C#4.F3
2508 D#3
2222 C#4.F3
2508 D#3
2222 C#4.F3
2508 D#3
291 A#3.F4
2508 D#3
2222 C#4.F3
2844 F#4.G#3
291 A#3.F4
2508 D#3
2222 C#4.F3
2844 F#4.G#3
291 A#3.F4
2222 C#4.F3
2508 D#3
2222 C#4.F3
2508 D#3
2222 C#4.F3
2508 D#3
2222 C#4.F3
2844 F#4.G#3
291 A#3.F4
2508 D#3
2222 C#4.F3
2508 D#3
2222 C#4.F3
2508 D#3
2222 C#4.F3
2844 F#4.G#3
291 A#3.F4
2222 C#4.F3
2508 D#3
2222 C#4.F3
2508 D#3
2222 C#4.F3
2508 D#3
2222 C#4.F3
2508 D#3
2222 C#4.F3
2844 F#4.G#3
291 A#3.F4
2222 C#4.F3
2508 D#3
291 A#3.F4
2508 D#3
2222 C#4.F3
2508 D#3
2222 C#4.F3
2844 F#4.G#3
291 A#3.F4
2508 D#3
291 A#3.F4
2508 D#3
2508 D#3
2222 C#4.F3
2844 F#4.G#3
291 A#3.F4
2508 D#3
2222 C#4.F3
2508 D#3
291 A#3.F4
2508 D#3
2222 C#4.F3
2844 F#4.G#3
291 A#3.F4
2508 D#3
2222 C#4.F3
2508 D#3
2222 C#4.F3
2508 D#3
291 A#3.F4
2222 C#4.F3
2844 F#4.G#3
291 A#3.F4
2222 C#4.F3
2508 D#3
2222 C#4.F3
2508 D#3
2222 C#4.F3
2508 D#3
2222 C#4.F3
2508 D#3
2222 C#4.F3


'generators/neural/transformer/torch_transformer/out.mid'