# Download and unpack files (Colab)

In [None]:
!wget https://www.dropbox.com/s/0oz27mpojtbemyj/Generalist.zip

In [None]:
!unzip Generalist.zip -d .

In [None]:
!pip install -r helpers/requirements_colab.txt

# Setup model

In [None]:
# Setup and instantiate the network model
import torch
from torch import nn
from torch.autograd import Variable
import os
from helpers import dataset as ds
from helpers import datapreparation as dp
import numpy as np
import time

class Generalist(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, batch_size):
        super(Generalist, self).__init__()
        self.input_size = input_size
        self.output_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.batch_size = batch_size
        self.rnn = nn.LSTM(input_size, hidden_size, num_layers)#, dropout=0.25)
        #self.rnn = nn.GRU(input_size, hidden_size, num_layers)
        #self.notes_encoder = nn.Linear(in_features=input_size, out_features=hidden_size)
        self.notes_decoder = nn.Linear(hidden_size, self.output_size)
        #self.out1 = nn.ReLU()
        self.out2 = nn.Sigmoid()
        self.hidden = self.init_hidden()

    def init_hidden(self, tag=None):
        return (Variable(torch.zeros(self.num_layers, self.batch_size, self.hidden_size)).cuda(),
                Variable(torch.zeros(self.num_layers, self.batch_size, self.hidden_size)).cuda())

    def forward(self, input_sequence, tag=None):
        output, self.hidden = self.rnn(input_sequence, self.hidden)
        #output = self.out1(output)
        output = self.notes_decoder(output)
        output = self.out2(output)
        return output
    
dirpath = os.path.join('datasets', 'training', 'piano_roll_fs5')
X = ds.pianoroll_dataset_batch(dirpath)
input_size = X[0][0].size()[-1]
hidden_size = 256
num_layers = 2
batch_size = 1

model = Generalist(input_size, hidden_size, num_layers, batch_size).cuda()

In [None]:
# Instantiate hyperparameters
#loss_func = nn.BCEWithLogitsLoss()
loss_func = nn.BCELoss()
#loss_func = nn.MSELoss()

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
#optimizer = torch.optim.SGD(model.parameters(), lr=1e-3, momentum=0.1) 
#optimizer = torch.optim.RMSprop(model.parameters(), lr=1e-3)

#for song_x, tag, song_y in X:
  #print(len(song_x), len(song_y))
  #print((song_x[-1] - song_y[-2]).squeeze(1).cpu().data.numpy().sum())

# Train model

In [None]:
with torch.no_grad():
    model.hidden = model.init_hidden()
    output = model(X[0][0].cuda()).detach()
    print(output[42])

epochs = 32
chunk_size = 32
chunk_overlap = 8
start = time.time()

for epoch in range(epochs):
    sum_loss = 0
    chunk_count = 0
    for input_sequence, tag, target_sequence in X:
        i = 0
        length = len(input_sequence)
        while i < length:
          j = min(i+chunk_size, length)
          input_seq = input_sequence[i:j]
          target_seq = target_sequence[i:j]
          # Step 1. Remember that Pytorch accumulates gradients.
          # We need to clear them out before each instance
          model.zero_grad()

          # Also, we need to clear out the hidden state of the LSTM,
          # detaching it from its history on the last instance.
          model.hidden = model.init_hidden()

          # Step 3. Run our forward pass.
          pred_seq = model(input_seq.cuda())

          # Step 4. Compute the loss, gradients, and update the parameters by
          #  calling optimizer.step()
          loss = loss_func(pred_seq.cuda(), target_seq.cuda())
          loss.backward()
          optimizer.step()
        
          sum_loss += loss
          chunk_count += 1
          i += chunk_size-chunk_overlap
    
    print('loss: ' + str(sum_loss/(chunk_count)))
    print(str(epoch+1) + '/' + str(epochs))#, end='\r')
    
print("runtime: ", time.time() - start)

with torch.no_grad():
    model.hidden = model.init_hidden()
    output = model(X[0][0].cuda()).detach()
    print(output[42])
    print(X[0][-1][42] > 0)

# Save/load

In [None]:
torch.save(model.state_dict(), 'GenState256_32_BCE')

In [None]:
model.load_state_dict(torch.load('GenState256_16'))

# Test output

In [None]:
with torch.no_grad():
    model.hidden = model.init_hidden()
    output = model(X[0][0].cuda())

prediction = output.squeeze(1).cpu().data.numpy().T
#prediction = np.array([np.pad(row, (22, 22), 'constant', constant_values=0) for row in prediction])
#prediction /= prediction.max()
prediction = prediction > 0.15
dp.visualize_piano_roll(prediction)

In [None]:
dp.embed_play_v1(prediction)

In [None]:
original = X[0][0].squeeze(1).cpu().data.numpy().T
#song = np.array([np.pad(row, (22, 22), 'constant', constant_values=0) for row in original])
dp.visualize_piano_roll(original)

In [None]:
dp.embed_play_v1(original)

# Compose music

In [None]:
def compose(model, init_song, init_len, max_len):
    composition = []
    with torch.no_grad():
        model.hidden = model.init_hidden()
        for i in range(max_len+1):
            if i <= init_len:
                notes = init_song[0][i].unsqueeze(0)
            else:
                notes = output.ge(0.1).float() #/torch.max(output)

            composition.append(notes.squeeze().cpu().data.numpy())
            if i < max_len:
              output = model(notes.cuda()).detach()

    return np.array(composition).T

composition = compose(model, X[0], 10, 1000)
dp.visualize_piano_roll(composition)
dp.embed_play_v1(composition)

In [None]:
model.hidden = model.init_hidden()
dp.gen_music_seconds_smooth(model, X[0][0].cuda())