In [1]:
import torch
import numpy as np
import matplotlib.pyplot as plt


print(f"Using Cuda: {torch.cuda.is_available()}")

Using Cuda: True


In [2]:
#creating the vocabulary

# vocabulary as described in Oore et al, 2018

'''
short summary of the vocabulary
vocabulary = [0<pad>,note_on, note_off, time_shift, velocity,414<start>,415<end>]

note_on = 1 - 128 (128 events) - when a note starts
note_off = 129-256 (128 events) - when a note stops
time_shifts = 257-381 (125 events) - units to represent time btw events. one unit = 8ms,can go upto 1s(thus 125 time shift events)
velocity = 382 - 413 (32 events) - how hard key is pressed

1 padding at zeroth index 
<start><end> at 414th and 414th  
'''

note_on_events = 128
note_off_events = 128
note_events = note_on_events + note_off_events
time_shift_events = 125
velocity_events = 32

max_time_btw_events = 1000 # ms (1s)
one_unit_time = max_time_btw_events//time_shift_events
vel_level = 128//velocity_events # midi has 0-127 velcoity range. we decided with 32 events. So a velocity range of approx 4 original midi values

total_midi_events = note_events + time_shift_events + velocity_events #leaving padding


# creating the vocabulary
note_on_vocab = [f"note_on_{i}" for i in range(note_on_events)]
note_off_vocab = [f"note_off_{i}" for i in range(note_off_events)]
time_shift_vocab = [f"time_shift_{i}" for i in range(time_shift_events)]
velocity_vocab = [f"set_velocity_{i}" for i in range(velocity_events)]

vocab = ['<pad>'] + note_on_vocab + note_off_vocab + time_shift_vocab + velocity_vocab + ['<start>', '<end>']

vocab_size = len(vocab)

pad_token = vocab.index("<pad>") #0
start_token = vocab.index("<start>") #414
end_token = vocab.index("<end>") #415

In [3]:
#helper functions

def events_to_indices(event_list, _vocab=None):
    """
    converts event_list to list of indices in vocab
    """
    if _vocab is None:
        _vocab = vocab
    index_list = []
    for event in event_list:
        index_list.append(_vocab.index(event))
    return index_list


def indices_to_events(index_list, _vocab=None):
    """
    converts index_list to list of events in vocab
    """
    if _vocab is None:
        _vocab = vocab
    event_list = []
    for idx in index_list:
        event_list.append(_vocab[idx])
    return event_list

def velocity_to_bin(velocity, step=vel_level):
    """
    Velocity in midi is an int between 0 and 127 inclusive, which is unnecessarily high resolution
    To reduce number of events in vocab, velocity is resolved into (128 / step) bins

    Returns:
        _bin (int): bin into which velocity is placed
    """
    if 128 % step != 0:
        raise ValueError("128 must be divisible by bins")
    if not (0 <= velocity <= 127):
        raise ValueError(f"velocity must be between 0 and 127, not {velocity}")

    # return bin into which velocity is placed
    _bin = velocity // step
    return int(_bin)

def bin_to_velocity(_bin,step=vel_level):
    """
    finds the equivalent velocity from the corresponding bin number
    """
    if not(0 <= _bin*step <= 127):
        raise ValueError(f"bin size must be between 0 and 127, not {_bin}")

    return int(_bin*step)
                         


In [None]:
def round_(a):
    """
    Custom rounding function for consistent rounding of 0.5 to greater integer
    """
    b = a // 1
    decimal_digits = a % 1
    adder = 1 if decimal_digits >= 0.5 else 0
    return int(b + adder)

def time_cutter(time, lth= max_time_btw_events,div=one_unit_time):
    """ 
    the time between events can be expressed as k instances of a maximum time shift followed by a leftover time shift
    time = k * max_time_shift + leftover_time_shift
    where k = time // max_time_shift; leftover_time_shift = time % max_time_shift
    """

    if lth%div!=0:
        raise ValueError("Max time must be divisible by the unit of time you consider")

    time_shifts = []
    k = time // lth
    time_shifts = [round_(lth/div) for i in range(k)]
    left_over_time_shifts = round_((time%lth) / div)

    if leftover_time_shift > 0:
        time_shifts.append(leftover_time_shift)
    else: 
        None

    return time_shifts

def time_to_events(time_btw_events, event_list=None, index_list=None, _vocab=None):
    """
    time between events are converted into time_shifts into _vocab using time_cutter
    event_list, index_list are passed by reference.
    """

    if _vocab is None:
        _vocab = vocab

    time = time_cutter(time_btw_events)

    for i in time:
        idx = note_events + i
        if event_list is not None:
            event_list.append(_vocab[idx])
        if index_list is not None:
            index_list.append(idx)
    return