# Imports

In [1]:
import os
import re
import sys
import torch
import hashlib
import itertools
import logging
import numpy as np

logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')


from progress.bar import Bar
from concurrent.futures import ProcessPoolExecutor

from Utils.NotesSeq import NoteSeq as ns
from Utils.EventSeq import EventSeq as es
from Utils.ControlSeq import ControlSeq as cs
from Utils import utils

import warnings
warnings.filterwarnings("ignore")

In [2]:
import torch.nn as nn
import torch.nn.functional as F
from torch.distributions import Categorical, Gumbel
from torch import optim

from config import device

In [3]:
from pretty_midi import PrettyMIDI, Note, Instrument

# options

In [4]:
sess_path = 'save/train.sess'
data_path = 'Processed-RAW'
saving_interval = 60.
reset_optimizer = False
enable_logging = False

# Dataset

In [5]:
class Dataset:
    def __init__(self, root, verbose=False):
        assert os.path.isdir(root), root
        paths = utils.find_files_by_extensions(root, ['.data'])

        self.root = root
        self.samples = []
        self.seqlens = []
        self.samples2 = []
        self.seqlens2 = []

        if verbose:
            paths = Bar(root).iter(list(paths))
        for path in paths:
            eventseq, eventseq2, controlseq, controlseq2 = torch.load(path)
            controlseq = cs.recover_compressed_array(controlseq)
            controlseq2 = cs.recover_compressed_array(controlseq2)
            assert len(eventseq) == len(controlseq)
            assert len(eventseq2) == len(controlseq2)
            self.samples.append((eventseq, controlseq))
            self.seqlens.append(len(eventseq))
            self.samples2.append((eventseq2,controlseq2))
            self.seqlens2.append(len(eventseq2))

        self.avglen = np.mean(self.seqlens)
        self.avglen2 = np.mean(self.seqlens2)
    
    def batches(self, batch_size, window_size, stride_size):
        indeces = [(i, range(j, j + window_size))
                   for i, seqlen in enumerate(self.seqlens)
                   for j in range(0, seqlen - window_size, stride_size)]
        while True:
            eventseq_batch = []
            controlseq_batch = []
            eventseq_batch2 = []
            controlseq_batch2 = []
            n = 0
            for ii in np.random.permutation(len(indeces)):
                i, r = indeces[ii]

                eventseq, controlseq = self.samples[i]
                eventseq2, controlseq2 = self.samples2[i]

                eventseq = eventseq[r.start:r.stop]
                eventseq2 = eventseq2[r.start:r.stop]

                controlseq = controlseq[r.start:r.stop]
                controlseq2 = controlseq2[r.start:r.stop]

                eventseq_batch.append(eventseq)
                controlseq_batch.append(controlseq)
                eventseq_batch2.append(eventseq2)
                controlseq_batch2.append(controlseq2)

                n += 1
                if n == batch_size:
                    yield (np.stack(eventseq_batch, axis=1),
                           np.stack(controlseq_batch, axis=1),
                           np.stack(eventseq_batch, axis=1),
                           np.stack(controlseq_batch, axis=1))
                    eventseq_batch.clear()
                    controlseq_batch.clear()
                    eventseq_batch2.clear()
                    controlseq_batch2.clear()
                    n = 0
    
    def __repr__(self):
        return (f'Dataset(root="{self.root}", '
                f'samples={len(self.samples)}, '
                f'avglen={self.avglen})')


In [6]:
dataset = Dataset(data_path, verbose=True)

In [7]:
dataset_size = len(dataset.samples)
assert dataset_size > 0

# Training

In [8]:
# Variables for training process

init_dim = 32
event_dim = es.dim()
control_dim = cs.dim()
hidden_dim = 512
gru_layers = 3
gru_droput = 0.3

In [9]:
learning_rate = 0.001
batch_size = 64
window_size = 200
stride_size = 10
use_transposition = False
control_ratio = 1.0
teacher_forcing_ratio = 1.0

In [10]:
model_config = {
    'init_dim': init_dim,
    'event_dim': event_dim,
    'control_dim': control_dim,
    'hidden_dim': hidden_dim,
    'gru_layers': gru_layers,
    'gru_dropout': gru_droput,
}

In [11]:
init = torch.randn(batch_size, init_dim).to(device)

In [12]:
from model import PerformanceRNN

model = PerformanceRNN(**model_config).to(device)

In [13]:
params = list(model.parameters()) + list(model.parameters())

optimizer = optim.Adam(params, lr=learning_rate)

In [14]:
loss_function = nn.CrossEntropyLoss()

In [15]:
def save_model(model, optimizer, model_config, sess_path):
    print('Saving to', sess_path)
    torch.save({'model_config': model_config,
                'model_state': model.state_dict(),
                'model_optimizer_state': optimizer.state_dict()}, sess_path)
    print('Done saving')

In [16]:
enable_logging = True

In [17]:
if enable_logging:
    from torch.utils.tensorboard import SummaryWriter
    writer = SummaryWriter()

2024-08-12 21:41:26,887 - DEBUG - Falling back to TensorFlow client; we recommended you install the Cloud TPU client directly with pip install cloud-tpu-client.
2024-08-12 21:41:27,208 - DEBUG - Creating converter from 7 to 5
2024-08-12 21:41:27,208 - DEBUG - Creating converter from 5 to 7
2024-08-12 21:41:27,209 - DEBUG - Creating converter from 7 to 5
2024-08-12 21:41:27,209 - DEBUG - Creating converter from 5 to 7


In [20]:
batch_gen = dataset.batches(batch_size, window_size, stride_size)
for iteration, data in enumerate(batch_gen):
    e1 = data[0]
    c1 = data[1]

    # First Model process

    events = torch.LongTensor(e1).to(device)
    assert events.shape[0] == window_size
    assert len(events.shape) == 2
    assert events.shape[0] >= window_size - 1

    if np.random.random() < control_ratio:
        controls = torch.FloatTensor(c1).to(device)
        assert controls.shape[0] == window_size
    else:
        controls = None

    init = torch.randn(batch_size, model.init_dim).to(device)
    outputs = model.generate(init, window_size, 
                            events[:-1], controls,output_type = 'logit')
    
    assert outputs.shape[:2] == events.shape[:2]
    loss = loss_function(outputs.view(-1, event_dim), events.view(-1))
    #print(outputs)

    model.zero_grad()
    loss.backward()

    #concatenated_vectors = torch.cat((outputs, outputsB),2)
    #print(outputsF.shape)
    #print(outputsB.shape)
    #print(events.shape)

    norm = utils.compute_gradient_norm(model.parameters())
    nn.utils.clip_grad_norm_(model.parameters(), 1.0)
    
    optimizer.step()

    if enable_logging:
            writer.add_scalar('model/loss', loss.item(), iteration)
            writer.add_scalar('model/norm', norm.item(), iteration)

    if iteration % 5 == 0:
         save_model(model, optimizer, model_config, 'save/tempA.sess')

    print(f'iter {iteration}, loss: {loss.item()}')


Saving to save/tempA.sess
Done saving
iter 0, loss: 5.477005481719971
iter 1, loss: 5.209493637084961
iter 2, loss: 6.461246490478516
iter 3, loss: 5.502777099609375
iter 4, loss: 4.967595100402832
Saving to save/tempA.sess
Done saving
iter 5, loss: 5.046258926391602
iter 6, loss: 4.907918930053711
iter 7, loss: 4.843714714050293
iter 8, loss: 4.799515247344971
iter 9, loss: 4.772924423217773
Saving to save/tempA.sess
Done saving
iter 10, loss: 4.765472888946533
iter 11, loss: 4.662515163421631
iter 12, loss: 4.6570515632629395
iter 13, loss: 4.678713321685791
iter 14, loss: 4.595309257507324
Saving to save/tempA.sess
Done saving
iter 15, loss: 4.534029006958008
iter 16, loss: 4.724520683288574
iter 17, loss: 4.654621124267578
iter 18, loss: 4.524623394012451
iter 19, loss: 4.5000529289245605
Saving to save/tempA.sess
Done saving
iter 20, loss: 4.362791538238525
iter 21, loss: 4.4422454833984375
iter 22, loss: 4.332684516906738
iter 23, loss: 4.154935836791992
iter 24, loss: 4.07329511

KeyboardInterrupt: 

# Generation

In [21]:
controls = None
control = 'NONE'

max_len = 10000

In [22]:
model_config = {
    'init_dim': max_len,
    'event_dim': event_dim,
    'control_dim': control_dim,
    'hidden_dim': hidden_dim,
    'gru_layers': gru_layers,
    'gru_dropout': gru_droput,
}

In [24]:
state = torch.load('save/tempA.sess', map_location=device)
model = PerformanceRNN(**state['model_config']).to(device)
model.load_state_dict(state['model_state'])

<All keys matched successfully>

In [25]:
model.eval()

PerformanceRNN(
  (inithid_fc): Linear(in_features=32, out_features=1536, bias=True)
  (inithid_fc_activation): Tanh()
  (event_embedding): Embedding(240, 240)
  (concat_input_fc): Linear(in_features=265, out_features=512, bias=True)
  (concat_input_fc_activation): LeakyReLU(negative_slope=0.1, inplace=True)
  (gru): GRU(512, 512, num_layers=3, dropout=0.3)
  (output_fc): Linear(in_features=1536, out_features=240, bias=True)
  (output_fc_activation): Softmax(dim=-1)
)

In [26]:
outputF = []
old = []

In [28]:
with torch.no_grad():
    outputs = model.generate(init,max_len, controls)

In [29]:
outputs = outputs.cpu().numpy().T 

In [30]:
outputs

array([[ 50, 197,  48, ..., 197,  48, 197],
       [ 53, 192, 192, ..., 192, 192, 192],
       [ 53, 203, 203, ..., 203, 203, 203],
       ...,
       [207, 207, 207, ..., 207, 207, 207],
       [ 53, 207, 207, ..., 207, 207, 207],
       [ 46,  46, 222, ...,  48, 197,  48]])

In [None]:
with torch.no_grad():
    
    batch_gen = dataset.batches(batch_size, window_size, stride_size)
    for iteration, data in enumerate(batch_gen):
        e1 = data[0]
        c1 = data[1]
        e2 = data[2]
        c2 = data[3]

        # First Model process
        max_len = c1.shape[0]

        events = torch.LongTensor(e1).to(device)
        assert events.shape[0] == window_size
        assert len(events.shape) == 2
        assert events.shape[0] >= window_size - 1

        if np.random.random() < control_ratio:
            controls = torch.FloatTensor(c1).to(device)
            assert controls.shape[0] == window_size
        else:
            controls = None

        init = torch.randn(batch_size, model.init_dim).to(device)
        outputs = model.generate(init,max_len, 
                                events[:-1], controls)
        
        assert outputs.shape[:2] == events.shape[:2]
        #print(outputs)

        # Second Model process

        eventsB = torch.LongTensor(e2).to(device)
        assert eventsB.shape[0] == window_size
        assert len(eventsB.shape) == 2
        assert eventsB.shape[0] >= window_size - 1

        if np.random.random() < control_ratio:
            controls = torch.FloatTensor(c2).to(device)
            assert controls.shape[0] == window_size
        else:
            controls = None

        init = torch.randn(batch_size, model2.init_dim).to(device)
        outputsB = model2.generate(init, max_len, 
                                events=eventsB[:-1], events2=outputs[:-1], controls=controls,
                                output_type='index')
        outputF.append([outputsB, events])
        old.append(outputs)
        if iteration == 50:
            break


In [None]:
old = old[0].cpu().numpy().T

In [None]:
old

array([[[-3.033329  , -3.0852256 , -2.9709718 , ..., -2.6993012 ,
         -2.7076375 , -2.7189739 ],
        [-2.7676702 , -2.940926  , -2.876566  , ..., -2.7019038 ,
         -2.6886117 , -2.6757598 ],
        [-2.8451457 , -2.9187024 , -2.854751  , ..., -2.7001088 ,
         -2.7084332 , -2.7197595 ],
        ...,
        [-2.4381003 , -2.566773  , -2.5372918 , ..., -2.651637  ,
         -2.6424255 , -2.6344972 ],
        [-3.2426212 , -3.1992981 , -3.0019915 , ..., -2.6715646 ,
         -2.6597784 , -2.6492589 ],
        [-2.4098597 , -2.4663043 , -2.4977996 , ..., -2.6990337 ,
         -2.7073605 , -2.7186904 ]],

       [[-3.2805376 , -3.2945604 , -3.1769314 , ..., -2.9629111 ,
         -2.9700105 , -2.9842064 ],
        [-2.637222  , -2.9386058 , -2.9709218 , ..., -2.9761665 ,
         -2.964363  , -2.9525466 ],
        [-2.9294026 , -3.0802238 , -3.0566714 , ..., -2.9650445 ,
         -2.972115  , -2.9862797 ],
        ...,
        [-2.4925306 , -2.6304078 , -2.6430595 , ..., -

In [None]:
len(outputF)

130

In [None]:
class Event:

    def __init__(self, type, time, value):
        self.type = type
        self.time = time
        self.value = value

    def __repr__(self):
        return 'Event(type={}, time={}, value={})'.format(
            self.type, self.time, self.value)


In [None]:
def from_array(event_indeces):
    time = 0
    events = []
    for event_index in event_indeces:
        for event_type, feat_range in es.feat_ranges().items():
            if (feat_range.start <= event_index) < feat_range.stop:
                event_value = event_index - feat_range.start
                events.append(Event(event_type, time, event_value))
                if event_type == 'time_shift':
                    time += es.time_shift_bins[event_value]
                break

    print(events)

    return es(events)

In [None]:
DEFAULT_SAVING_PROGRAM = 1
DEFAULT_LOADING_PROGRAMS = range(128)
DEFAULT_RESOLUTION = 220
DEFAULT_TEMPO = 120
DEFAULT_VELOCITY = 64
DEFAULT_PITCH_RANGE = range(21, 109)
DEFAULT_VELOCITY_RANGE = range(21, 109)
DEFAULT_NORMALIZATION_BASELINE = 60  # C4

In [None]:
USE_VELOCITY = True
BEAT_LENGTH = 60 / DEFAULT_TEMPO
DEFAULT_TIME_SHIFT_BINS = 1.15 ** np.arange(32) / 65
DEFAULT_VELOCITY_STEPS = 32
DEFAULT_NOTE_LENGTH = BEAT_LENGTH * 2
MIN_NOTE_LENGTH = BEAT_LENGTH / 2

In [None]:
def to_note_seq(events):
    time = 0
    notes = []

    velocity = DEFAULT_VELOCITY
    velocity_bins = es.get_velocity_bins()

    last_notes = {}

    print(events)

    for event in events:
        if event.type == 'note_on':
            pitch = event.value + es.pitch_range.start
            note = Note(velocity, pitch, time, None)
            notes.append(note)
            last_notes[pitch] = note

        elif event.type == 'note_off':
            pitch = event.value + es.pitch_range.start

            if pitch in last_notes:
                note = last_notes[pitch]
                note.end = max(time, note.start + MIN_NOTE_LENGTH)
                del last_notes[pitch]

        elif event.type == 'velocity':
            index = min(event.value, velocity_bins.size - 1)
            velocity = velocity_bins[index]

        elif event.type == 'time_shift':
            time += es.time_shift_bins[event.value]

    for note in notes:
        if note.end is None:
            note.end = note.start + DEFAULT_NOTE_LENGTH

        note.velocity = int(note.velocity)

    return ns(notes)

In [None]:
for processed, inputstream in outputF:
    itemS = processed.cpu().numpy().T

    arr = np.stack(itemS)
    print(arr)

    #for i, output in enumerate(itemS):
    #    name = f'output-{i:03d}.mid'
    #    print(name)
    #    print(type(output))
    #    es1 = from_array(output)

    es1 = es.from_array(itemS)
    ns = to_note_seq(es1)
    print(ns)
    break

[[[1.13601229e-04 1.20252291e-04 2.39152669e-05 ... 4.50986045e-06
   2.89303589e-05 2.58752425e-06]
  [1.06278727e-04 2.20269021e-06 1.26815185e-05 ... 2.00609811e-05
   6.55290060e-05 2.45055198e-05]
  [1.54919588e-04 8.30569261e-05 4.96174725e-05 ... 8.20196510e-05
   6.82196041e-05 6.37302510e-05]
  ...
  [9.87264357e-05 2.83538247e-06 1.13704518e-04 ... 3.17189770e-05
   4.24269456e-06 1.49969674e-05]
  [1.06813059e-04 4.25334692e-05 2.05532524e-05 ... 4.50605075e-05
   3.19493483e-05 2.19254889e-05]
  [9.12944306e-05 2.62257163e-06 1.20948229e-04 ... 1.19153128e-05
   1.30850267e-05 1.19445132e-04]]

 [[2.70751305e-04 9.92179848e-05 2.18342375e-05 ... 9.65514482e-06
   3.74713272e-05 6.27170903e-06]
  [2.34336243e-04 6.56342490e-06 1.61588468e-05 ... 2.58053551e-05
   7.41890763e-05 3.48760332e-05]
  [2.29328143e-04 5.17725202e-05 4.37416420e-05 ... 4.32448614e-05
   4.54655165e-05 3.98923112e-05]
  ...
  [2.23968862e-04 7.46548812e-06 1.24028986e-04 ... 3.45237058e-05
   9.71635

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()