# **Import libraries**

In [1]:
!pip install mido

Collecting mido
[?25l  Downloading https://files.pythonhosted.org/packages/20/0a/81beb587b1ae832ea6a1901dc7c6faa380e8dd154e0a862f0a9f3d2afab9/mido-1.2.9-py2.py3-none-any.whl (52kB)
[K     |██████▎                         | 10kB 19.0MB/s eta 0:00:01[K     |████████████▌                   | 20kB 16.7MB/s eta 0:00:01[K     |██████████████████▊             | 30kB 14.1MB/s eta 0:00:01[K     |█████████████████████████       | 40kB 12.8MB/s eta 0:00:01[K     |███████████████████████████████▏| 51kB 8.9MB/s eta 0:00:01[K     |████████████████████████████████| 61kB 4.8MB/s 
[?25hInstalling collected packages: mido
Successfully installed mido-1.2.9


In [2]:
import mido 
import matplotlib.pyplot as plt 
import numpy as np 
import os 
import random
import pandas as pd

from mido import MidiFile, MidiTrack, Message

from sklearn import model_selection

import torch 
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

# **Hyperparameters**

In [11]:
num_epochs = 100
batch_size = 1024

sequence_length = 16
embedding_dim = 128

hidden_size = 512
num_layers = 3
num_classes = 128

learning_rate = 0.05

# **Load data**

In [13]:
df_train = pd.read_csv('train_note.csv', header=None)
df_val = pd.read_csv('val_note.csv', header=None)

array_train = df_train.values.astype(int)
array_val = df_val.values.astype(int)

In [14]:
train_loader = torch.utils.data.DataLoader(dataset=array_train,
                                           batch_size=batch_size, 
                                           shuffle=True)

val_loader = torch.utils.data.DataLoader(dataset=array_val,
                                           batch_size=batch_size, 
                                           shuffle=True)

# **Models**

## Classification approach

In [27]:
# RNN architecture
class RNN(nn.Module):
    def __init__(self, num_classes, embedding_dim,  hidden_size, num_layers, drop_prob=0.):
        super(RNN, self).__init__()

        self.embedding = nn.Embedding(num_classes, embedding_dim)

        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.RNN(embedding_dim, hidden_size, num_layers, dropout=drop_prob, batch_first=True)
        self.fc_1 = nn.Linear(hidden_size, hidden_size)
        self.fc_2 = nn.Linear(hidden_size, num_classes)

        self.relu = nn.ReLU()
    
    def forward(self, x):

        # Embedding layer
        x = self.embedding(x) # Output shape (batch, sequence_length, embedding_dim)

        # Set initial hidden and cell states 
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device) 
        
        # Forward propagate LSTM
        out, hidden = self.lstm(x, h0)  # out: tensor of shape (batch_size, seq_length, hidden_size)
        out = out[:, -1, :] # Hidden state of the last element of the sequence 
        
        #FC
        out = self.fc_1(out)
        out = self.relu(out)
        out = self.fc_2(out)
        return out

In [28]:
# GRU architecture
class GRU(nn.Module):
    def __init__(self, num_classes, embedding_dim,  hidden_size, num_layers, drop_prob=0.):
        super(GRU, self).__init__()

        self.embedding = nn.Embedding(num_classes, embedding_dim)

        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.GRU(embedding_dim, hidden_size, num_layers, dropout=drop_prob, batch_first=True)
        self.fc_1 = nn.Linear(hidden_size, hidden_size)
        self.fc_2 = nn.Linear(hidden_size, num_classes)

        self.relu = nn.ReLU()
    
    def forward(self, x):

        # Embedding layer
        x = self.embedding(x) # Output shape (batch, sequence_length, embedding_dim)

        # Set initial hidden and cell states 
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device) 
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        
        # Forward propagate LSTM
        out, hidden = self.lstm(x, h0)  # out: tensor of shape (batch_size, seq_length, hidden_size)
        out = out[:, -1, :] # Hidden state of the last element of the sequence 
        #FC
        out = self.fc_1(out)
        out = self.relu(out)
        out = self.fc_2(out)
        return out

In [29]:
# LSTM architecture 
class LSTM(nn.Module):
    def __init__(self, num_classes, embedding_dim,  hidden_size, num_layers, drop_prob=0.):
        super(LSTM, self).__init__()

        self.embedding = nn.Embedding(num_classes, embedding_dim)

        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(embedding_dim, hidden_size, num_layers, dropout=drop_prob, batch_first=True)
        self.fc_1 = nn.Linear(hidden_size, hidden_size)
        self.fc_2 = nn.Linear(hidden_size, num_classes)

        self.relu = nn.ReLU()
    
    def forward(self, x):

        # Embedding layer
        x = self.embedding(x) # Output shape (batch, sequence_length, embedding_dim)

        # Set initial hidden and cell states 
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device) 
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        
        # Forward propagate LSTM
        out, hidden = self.lstm(x, (h0, c0))  # out: tensor of shape (batch_size, seq_length, hidden_size)
        out = out[:, -1, :] # Hidden state of the last element of the sequence 
        
        #FC
        out = self.fc_1(out)
        out = self.relu(out)
        out = self.fc_2(out)
        return out

# **Training**

## Accuracy 

In [18]:
def validate_model(model, loader):
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for batch in loader:
            sequence = batch[:,:16].to(device)
            target = batch[:,16].to(device)
            outputs = model(sequence)

            _, predicted = torch.max(outputs.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()
        accuracy = 100 * correct / total

    return (accuracy)

## Training loop

In [19]:
def train_model(model, optimizer, train_loader, val_loader, num_epochs, lr_scheduler=None, display_loss=False):
  criterion = nn.CrossEntropyLoss()

  best_val_accuracy = 0
  best_epoch = 0

  for epoch in range(num_epochs):

    model.train()

    #### UPDATE LEARNING RATE #### 
    if lr_scheduler == 'multi_steps':
        if epoch in [int(num_epochs * 0.5)]:
            for param_group in optimizer.param_groups:
                param_group['lr'] *= 0.1

    for i, batch in enumerate(train_loader):
      sequence = batch[:,:16].to(device)
      target = batch[:,16].to(device)

      optimizer.zero_grad()
      outputs = model(sequence)
      loss = criterion(outputs, target)
      loss.backward()
      optimizer.step()

      if i % 300 == 0 and display_loss:
        print(f'Epoch : {epoch}, Step: {i}, Loss: {round(loss.item(), 2)}')

    # Train accuracy 
    train_accuracy = round(validate_model(model, train_loader), 2)

    # Val accuracy
    val_accuracy = round(validate_model(model, val_loader), 2)
    if val_accuracy > best_val_accuracy:
      best_val_accuracy = val_accuracy
      best_epoch = epoch

    print('################')
    print(f'Epoch : {epoch}, Train accuracy : {train_accuracy} %, Val accuracy : {val_accuracy} %')
    print(f'Best val accuracy at epoch {best_epoch}: {best_val_accuracy} %')

# **Experiments**

## RNN architecture

In [None]:
model = RNN(num_classes, embedding_dim=128, hidden_size=512, num_layers=3).to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.05, nesterov=True, momentum=0.9)

train_model(model, optimizer, train_loader, val_loader, num_epochs=100, lr_scheduler='multi_steps')

## GRU architecture

In [None]:
model = GRU(num_classes, embedding_dim=128, hidden_size=512, num_layers=3).to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.05, nesterov=True, momentum=0.9)

train_model(model, optimizer, train_loader, val_loader, num_epochs=100, lr_scheduler='multi_steps')

## LSTM architecture

In [None]:
model = LSTM(num_classes, embedding_dim=128, hidden_size=512, num_layers=3).to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.05, nesterov=True, momentum=0.9)

train_model(model, optimizer, train_loader, val_loader, num_epochs=100, lr_scheduler='multi_steps')

# **Test predictions**

## Mido utils

In [23]:
def notes_to_track(notes):
    track = MidiTrack()
    for note in notes:
        #track.append(Message('note_on', channel=0, note=note, velocity=64, time=0))
        #track.append(Message('note_on', channel=0, note=note, velocity=0, time=240))
        
        # Longueur par défault 480: 4 mesures. Valeur du time : espace entre chaque note si <480 chevauchement
        track.append(Message('note_on', channel=0, note=note, velocity=64, time=240))
    return(track)
    

In [24]:
def save_track(track, path):
    mid = MidiFile()
    mid.tracks.append(track)
    mid.save(path)

## Generation

In [26]:
# Random sampling
n_predictions = 1000
temp = 2 # temperature parameter
list_notes = [64]

for i in range(n_predictions):
  list_input = list_notes[-16:]
  input = torch.reshape(torch.tensor(list_input),(1,-1)).to(device)
  pred = model(input)

  array_proba = torch.softmax(pred / temp, 1).detach().cpu().numpy()[0]
  note = np.random.choice(range(num_classes), p=array_proba)
  list_notes.append(note)

midi = notes_to_track(list_notes)
save_track(midi, 'file.mid')