In [1]:
%load_ext autoreload
%autoreload 2

import numpy as np
import torch
import torch.nn as nn

import torch.utils.data as data
import os
import random
import numpy as np
from tqdm import tqdm

import pypianoroll

In [71]:
#some constants
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
LEARNING_RATE = 0.001
TRAIN_BATCH_SIZE = 60
VAL_BATCH_SIZE = 30
DATA_PATH = '../data/Nottingham/'
NUM_EPOCHS = 5
POSITIVE_WEIGHT = 2

In [72]:
def path_to_pianoroll(path, poisson=True, resolution = 4):
    #Resolution is set to 3 so that the sequences are not that long
    midi_data = pypianoroll.read(path, resolution=resolution)
    
    piano_roll = midi_data.blend()[:, 21:109] #Taking just 81 usefull notes
    
    #we want to perform multilabel classification at each step so we need to binaryze the roll
    piano_roll[piano_roll > 0] = 1
    
    if poisson:
        current_roll = piano_roll[np.newaxis,0, :] # to have shape (1, num_of_notes)
        count = 1
        counts = []
        new_piano_roll = current_roll
        for i in range(1, piano_roll.shape[0]):
            next_roll = piano_roll[np.newaxis, i, :]
            if np.all(current_roll == next_roll):
                count += 1
            else:
                counts.append(count)
                count = 1
                
                new_piano_roll = np.concatenate((new_piano_roll, next_roll), axis=0)
                
                current_roll = next_roll
                
        counts.append(count)
        new_piano_roll = np.concatenate((new_piano_roll, np.array(counts)[:,np.newaxis]), axis=1)
        return new_piano_roll 
                           
    return piano_roll
    

In [73]:
midi_path = os.path.join(DATA_PATH, "train", "ashover_simple_chords_21.mid")

roll = path_to_pianoroll(midi_path, False,resolution = 8)

roll2 = path_to_pianoroll(midi_path, True,resolution = 8)


In [74]:
print(roll.shape)
print(roll2.shape) # This has less timesteps and the difference will be bigger w.r.t resolution, and this has one additional entry in 2ndim dim which are counts
print(roll2[:,-1])
print(np.sum(roll2[:,-1])) # Now the improvement is much bigger

(512, 88)
(294, 89)
[2 1 1 1 2 1 2 1 2 1 1 1 2 1 1 1 2 1 2 1 2 1 1 1 2 1 2 2 1 2 1 2 2 1 2 1 2
 2 1 7 1 2 1 1 1 2 1 7 1 2 1 1 1 2 1 5 2 1 2 1 2 2 1 2 1 2 2 1 7 1 7 1 2 1
 1 1 2 1 2 1 2 1 1 1 2 1 1 1 2 1 2 1 2 1 1 1 2 1 2 2 1 2 1 2 2 1 2 1 2 2 1
 7 1 2 1 1 1 2 1 7 1 2 1 1 1 2 1 5 2 1 2 1 2 2 1 2 1 2 2 1 7 1 7 1 2 1 1 1
 2 1 2 1 2 1 1 1 2 1 1 1 2 1 2 1 2 1 1 1 2 1 2 2 1 2 1 2 2 1 2 1 2 2 1 7 1
 2 1 1 1 2 1 7 1 2 1 1 1 2 1 2 1 2 1 1 1 2 1 2 2 1 2 1 2 2 1 7 1 7 1 2 1 1
 1 2 1 2 1 2 1 1 1 2 1 1 1 2 1 2 1 2 1 1 1 2 1 2 2 1 2 1 2 2 1 2 1 2 2 1 7
 1 2 1 1 1 2 1 7 1 2 1 1 1 2 1 2 1 2 1 1 1 2 1 2 2 1 2 1 2 2 1 7 1 7 1]
512


In [75]:
def collate(batch):
    #Helper function for DataLoader
    #Batch is a list of tuple in the form (input, target)
    #We do not have to padd everything thanks to pack_sequence
    data = [item[0] for item in batch] #
    data = nn.utils.rnn.pack_sequence(data, enforce_sorted=False)
    targets = [item[1] for item in batch]
    targets = nn.utils.rnn.pack_sequence(targets, enforce_sorted=False)
    return [data, targets]

In [76]:
class NotesGenerationDataset(data.Dataset):
    
    def __init__(self, path,):
        
        self.path = path
        self.full_filenames = []
        
        #Here we assume that all midi files are valid, we do not check anything here.
        for root, subdirs, files in os.walk(path):
            for f in files:
                self.full_filenames.append(os.path.join(root, f))
                    
                        
    def __len__(self):
        return len(self.full_filenames)
    
    
    def __getitem__(self, index):
        full_filename = self.full_filenames[index]
        
        piano_roll = path_to_pianoroll(full_filename)
        
        #input and gt are shifted by one step w.r.t one another.
        #we transpose it since piano_roll has shape [num_of_notes, number_of_event] we want to have format [number of events, num_of_notes]
        input_sequence = piano_roll[:, :-1]
        ground_truth_sequence = piano_roll[:, 1:]
        
        return torch.tensor(input_sequence, dtype=torch.float32), torch.tensor(ground_truth_sequence, dtype=torch.float32)

In [None]:
trainset = NotesGenerationDataset(os.path.join(DATA_PATH, "train"))

#ofc we want big batch_size. However, one training sample takes quite a lot of memory.
#We will use torch.cuda.amp.autocast() so that we can make bigger batches
trainset_loader = torch.utils.data.DataLoader(trainset, batch_size=TRAIN_BATCH_SIZE,
                                              shuffle=True, drop_last=True, collate_fn=collate)

valset = NotesGenerationDataset(DATA_PATH, train=False)

valset_loader = torch.utils.data.DataLoader(valset, batch_size=VAL_BATCH_SIZE, shuffle=False, drop_last=False, collate_fn=collate)