In [1]:
import matplotlib
matplotlib.use('Agg')
import numpy as np
import glob
import pypianoroll as ppr
import time
import music21
import os
import torch
import torch.utils.data
from torch import nn, optim
from torch.nn import functional as F
import torch.utils.data as data
from utils.utilsPreprocessing import *
import time
#np.set_printoptions(threshold=np.inf)
#torch.set_printoptions(threshold=50000)

In [2]:
###HYPERPARAMETERS######################
epochs = 500
batch_size = 7
learning_rate = 1e-3
log_interval = 1
hidden_size = 1024
num_layers = 1
num_classes = 61
validate = False #play/show a random sample after each epoch

# Beat resolution
If beat resolution = 1 --> 1 tick = 1/4 note<br>
If beat resolution = 2 --> 1 tick = 1/8 note<br>
If beat resolution = 4 --> 1 tick = 1/16 note<br>
If beat resolution = 8 --> 1 tick = 1/32 note<br>
...



In [3]:
beat_resolution = 4

In [4]:
pathToFiles = "/media/EXTHD/niciData/Datasets/Nottingham/"
midiDatasetTrain = createDatasetLSTM(pathToFiles + "train/*.mid", beat_res = beat_resolution,
                                    force_length=True, force_value=16)
midiDatasetTrain.setMaxLength()
##TO DO
#midiDatasetTrain.setMeanLength()

midiDatasetTest = createDatasetLSTM(pathToFiles + "test/*.mid", beat_res = beat_resolution,
                                   force_length=True, force_value=16)
midiDatasetTest.setMaxLength()

midiDatasetVal = createDatasetLSTM(pathToFiles + "valid/*.mid",
                                  force_length=True, force_value=16)
midiDatasetVal.setMaxLength()

train_loader = torch.utils.data.DataLoader(midiDatasetTrain, batch_size=batch_size, shuffle=False, drop_last=True)
test_loader = torch.utils.data.DataLoader(midiDatasetTest, batch_size=batch_size, shuffle=False, drop_last=True)
val_loader = torch.utils.data.DataLoader(midiDatasetVal, batch_size=1, shuffle=True, drop_last=True)

Longest sequences contains 16 ticks
Longest sequences contains 16 ticks
Longest sequences contains 16 ticks


In [5]:
print("\nThere are {} songs in the training set\n".format(len(midiDatasetTrain)))
print("There are {} songs in the test set\n".format(len(midiDatasetTest)))
print("There are {} songs in the validation set\n".format(len(midiDatasetVal)))


There are 694 songs in the training set

There are 170 songs in the test set

There are 173 songs in the validation set



In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [7]:
class LSTM_notewise(nn.Module):
    def __init__(self, hidden_size=400, num_layers=2, batch_size=1):
        super(LSTM_notewise, self).__init__()
        
        self.input_size = 61
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.batch_size = batch_size
        self.num_classes = 61

        
        self.i2h = nn.Linear(self.input_size, self.hidden_size)
        #self.batch_norm = nn.BatchNorm1d(self.hidden_size)
        self.lstm = nn.LSTM(self.hidden_size, self.hidden_size,
                            self.num_layers, batch_first=True,
                            dropout=0)
        self.h2o = nn.Linear(self.hidden_size, self.num_classes)
        
    def initState(self):
        state = torch.zeros(self.num_layers,
                            self.batch_size, 
                            self.hidden_size).double().to(device)
        return state
    
    def forward(self, input, seq_lengths):
        h_t = self.initState()
        c_t = self.initState()
        
        embedded_notes = self.i2h(input)
        #embedded_notes = self.batch_norm(embedded_notes)
        embedded_notes = torch.nn.utils.rnn.pack_padded_sequence(embedded_notes, 
                                                                 seq_lengths, 
                                                                 batch_first=True)    
        out, (h_t, c_t) = self.lstm(embedded_notes, (h_t, c_t))
        out, out_lengths = torch.nn.utils.rnn.pad_packed_sequence(out, batch_first=True)
        out = self.h2o(out)
        neg_out = (1-out)
        
        #print('out', out)
        #print('neg_out', neg_out)
        out = torch.stack((out, neg_out), dim=3)
        out = out.view(-1, 2)

        
        return out

In [8]:
model = LSTM_notewise(hidden_size=hidden_size, num_layers=num_layers, 
                      batch_size=batch_size).double().to(device)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

#initialize lstm weights
from torch.nn import init
init_range = 1.
for layer_p in model.lstm._all_weights:
    for p in layer_p:
        if 'weight' in p:
            init.xavier_uniform_(model.lstm.__getattr__(p), gain=10)

In [9]:
#load model
#from loadModel import loadStateDict
#pathToModel = "../models/LSTM_notewise.pth"

#model = loadStateDict(model, pathToModel)


In [10]:
def train(epoch):
    
    model.train()
    train_loss = 0
    criterion = nn.CrossEntropyLoss()
    for batch_idx, data in enumerate(train_loader):
        optimizer.zero_grad()
        input_lstm, ground_truth, seq_lengths = reorderBatch(data)
        prediction = model(input_lstm.double().to(device), seq_lengths)
        ground_truth = ground_truth.to(device)
        
        #print('inptu_lstm: ', input_lstm.size())
        #print("prediction: ",prediction.size(),"ground_truths: ", ground_truth.size())
        #print('prediction', torch.argmax(prediction[0,:,:]))
        #print('ground_truth', torch.argmax(ground_truth[0,:,:]))
        
        loss = criterion(prediction, ground_truth.contiguous().view(-1).long())
        loss.backward()
        
        print("lstm gradients: ", model.lstm.weight_ih_l0.grad)

        train_loss += loss.item()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        #reshape
        prediction = prediction.view(model.batch_size,-1, num_classes, 2)
        #delete negative outputs
        prediction = prediction[:,:,:,0] 
        print("prediction: ",prediction.size(),"ground_truth: ", ground_truth.size())
        if(batch_idx % log_interval == 0):
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * input_lstm.size(0), len(train_loader.dataset),
                100. * batch_idx / len(train_loader),
                loss.item() / input_lstm.size(0)))
            print('ground_truth', torch.argmax(ground_truth[0], dim=1))
            print('prediction', torch.argmax(prediction[0], dim=1))
            
    print('====> Epoch: {} Average Loss: {:.4f}'.format(
          epoch, train_loss / len(train_loader.dataset)))
        
    return train_loss / len(train_loader.dataset)

def test(epoch):
    model.eval()
    test_loss = 0
    criterion = nn.CrossEntropyLoss()
    with torch.no_grad():
        for i, data in enumerate(test_loader):
            input_lstm, ground_truth, seq_lengths = reorderBatch(data)
            prediction = model(input_lstm.double().to(device), seq_lengths)
            test_loss += criterion(prediction, ground_truth.view(-1).long().to(device)).item()
            prediction = prediction.view(model.batch_size,-1,num_classes)
            if(False):#i % log_interval == 0):
                print('prediction', torch.argmax(prediction,dim=2))
                print('ground_truth', torch.argmax(ground_truth,dim=2))
    
    print('====> Test set Loss: {:.4f}'.format(test_loss/len(test_loader.dataset)))
    return test_loss/len(test_loader.dataset)

def val(epoch):
    model.eval()
    val_loss = 0
    criterion = nn.CrossEntropyLoss()
    with torch.no_grad():
        for i, data in enumerate(val_loader):
            input_lstm, ground_truth, seq_lengths = reorderBatch(data)
            prediction = model(input_lstm.double().to(device), seq_lengths)
            val_loss += criterion(prediction, ground_truth.to(device)).item()
            
            break
    print('====> Validation Loss: {:.4f}'.format(val_loss))
    return val_loss
        

In [11]:
import matplotlib.pyplot as plt


train_losses = []
test_losses = []
if validate:
    val_losses = []
best_test_loss = 999
for epoch in range(1, epochs + 1):
    #train
    current_train_loss = (train(epoch))
    train_losses.append(current_train_loss)
    #test
    current_test_loss = test(epoch)
    test_losses.append(current_test_loss)
    #save if model better than best model
    if(current_test_loss < best_test_loss):
        best_test_loss = current_test_loss
        #torch.save(model.state_dict(),'/media/EXTHD/niciData/LSTM_notewise.pth')
    
    #validate
    if validate:
        current_val_loss = val(epoch)
        val_losses.append(current_val_loss)
 
    
plt.plot(train_losses, color='red', label='Train loss')
plt.plot(test_losses, color='orange', label='Test loss')
if validate:
    plt.plot(val_losses, color='yellow', label='Validation loss')
plt.legend()
plt.savefig('LSTM_notewise_Nottingham.png')
#plt.show()



lstm gradients:  tensor([[ 8.0819e-05, -2.7150e-05,  4.6065e-05,  ...,  1.2552e-04,
         -1.9836e-04,  2.9426e-05],
        [ 8.4046e-05,  1.3505e-04,  1.1301e-04,  ...,  1.6413e-04,
          3.5838e-05,  1.6570e-05],
        [-8.0412e-05, -1.7736e-05, -7.4031e-05,  ...,  2.3825e-05,
          5.1795e-05, -1.3786e-05],
        ...,
        [-5.3285e-05,  9.8481e-05, -2.0244e-04,  ..., -2.2879e-04,
          2.4916e-04,  3.4715e-05],
        [ 9.4228e-06, -3.9444e-05,  1.5473e-04,  ...,  1.3631e-04,
         -5.8519e-05, -8.7096e-05],
        [-8.5752e-05,  1.0426e-05,  1.7060e-05,  ..., -5.5685e-05,
          1.4286e-04, -6.6517e-05]], device='cuda:0', dtype=torch.float64)
prediction:  torch.Size([7, 15, 61]) ground_truth:  torch.Size([7, 15, 61])
ground_truth tensor([31, 60, 60, 38, 43, 60, 60, 35, 38, 60, 60, 60, 38, 38, 60], device='cuda:0')
prediction tensor([16, 35, 24, 55, 55, 23, 23, 55, 23, 55, 23, 23, 16, 40, 40], device='cuda:0')
lstm gradients:  tensor([[-8.7534e-06, -2

lstm gradients:  tensor([[ 3.9123e-07, -1.7047e-06,  1.6164e-06,  ...,  5.8197e-07,
         -2.2746e-06,  1.0797e-07],
        [-4.7490e-06,  6.7280e-06, -2.3730e-06,  ..., -2.5603e-06,
          8.1819e-06, -1.1116e-06],
        [-7.1696e-06, -2.3546e-06, -1.0719e-05,  ..., -9.4640e-06,
          7.6076e-06,  2.6490e-06],
        ...,
        [-2.2609e-06,  1.5030e-06, -4.9950e-06,  ..., -6.7135e-06,
          4.1072e-06,  2.1926e-06],
        [-2.1208e-07, -1.0527e-07,  4.7642e-08,  ..., -4.0741e-07,
          2.8511e-07, -1.0502e-07],
        [-1.9082e-06,  1.2174e-06, -2.4236e-06,  ..., -4.3674e-06,
          3.9649e-06,  4.5412e-07]], device='cuda:0', dtype=torch.float64)
prediction:  torch.Size([7, 15, 61]) ground_truth:  torch.Size([7, 15, 61])
ground_truth tensor([33, 60, 60, 60, 26, 60, 60, 33, 38, 60, 60, 38, 37, 60, 60], device='cuda:0')
prediction tensor([ 4, 48, 48, 12, 16, 21, 21, 16, 21, 21, 21, 16, 21, 16, 21], device='cuda:0')
lstm gradients:  tensor([[ 7.9591e-07, -7

lstm gradients:  tensor([[-2.4586e-07, -8.4294e-08,  9.1515e-07,  ..., -2.1931e-07,
         -6.0649e-07, -1.5005e-07],
        [-3.1400e-06,  1.2986e-06,  8.7187e-07,  ..., -1.8193e-06,
          3.4273e-06, -1.2875e-06],
        [-1.7524e-08,  5.7108e-07, -3.6643e-06,  ..., -1.4457e-06,
          2.6396e-06,  9.4960e-07],
        ...,
        [-3.8429e-07,  3.4390e-07, -2.8305e-07,  ..., -9.3757e-07,
         -9.2990e-07,  4.2536e-07],
        [-2.0343e-07,  4.9278e-07, -6.0369e-07,  ..., -2.1537e-07,
          9.8148e-07,  6.9104e-08],
        [ 1.0272e-06, -1.0531e-06,  1.8680e-06,  ...,  2.5294e-06,
         -3.4967e-06, -8.4689e-07]], device='cuda:0', dtype=torch.float64)
prediction:  torch.Size([7, 15, 61]) ground_truth:  torch.Size([7, 15, 61])
ground_truth tensor([31, 31, 60, 36, 36, 36, 60, 40, 40, 60, 60, 36, 36, 36, 60], device='cuda:0')
prediction tensor([54, 54, 48, 54, 16, 21, 21, 21, 21, 21, 16, 16, 16, 16, 16], device='cuda:0')
lstm gradients:  tensor([[-7.7678e-07, -7

lstm gradients:  tensor([[-1.7921e-08, -2.0804e-07,  9.9322e-08,  ..., -5.2761e-08,
         -6.3379e-08, -1.3097e-07],
        [ 1.8206e-06, -2.4912e-06,  2.4416e-06,  ...,  4.5787e-06,
         -4.4383e-06, -3.5893e-07],
        [-5.6997e-07,  4.6348e-07, -1.4257e-06,  ..., -1.6508e-06,
          1.1958e-06,  7.7542e-07],
        ...,
        [-4.1822e-07,  3.5178e-07, -9.4990e-07,  ..., -1.4248e-06,
         -2.9212e-07,  8.9244e-07],
        [-2.9277e-07,  1.2853e-07, -1.4554e-07,  ..., -4.6141e-07,
          2.1446e-07,  8.3924e-08],
        [-4.3777e-07, -1.9967e-07, -1.1380e-07,  ..., -1.1009e-06,
          5.4831e-07, -3.2448e-07]], device='cuda:0', dtype=torch.float64)
prediction:  torch.Size([7, 15, 61]) ground_truth:  torch.Size([7, 15, 61])
ground_truth tensor([38, 60, 60, 35, 31, 60, 60, 31, 31, 60, 60, 38, 38, 38, 38], device='cuda:0')
prediction tensor([11, 48, 55, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 16], device='cuda:0')
lstm gradients:  tensor([[ 5.8142e-07, -4

lstm gradients:  tensor([[-1.6606e-08,  3.0170e-08,  2.6625e-07,  ...,  1.2682e-07,
         -3.4440e-07, -6.2879e-08],
        [ 3.0436e-06, -2.4882e-06,  3.5654e-06,  ...,  4.6642e-06,
         -5.5640e-06, -9.4344e-07],
        [-5.9244e-07,  4.4221e-07, -8.0429e-07,  ..., -4.9734e-07,
          1.2351e-06,  5.2700e-07],
        ...,
        [-2.0476e-07,  3.9236e-07, -3.1597e-07,  ...,  2.2500e-07,
         -1.8325e-07,  3.0789e-07],
        [-8.2763e-08,  2.1440e-07, -2.9854e-07,  ..., -5.6606e-08,
          4.2028e-07,  2.2605e-07],
        [-5.5545e-07,  7.9430e-07, -7.7882e-07,  ..., -7.8684e-07,
          1.2169e-06,  6.7895e-08]], device='cuda:0', dtype=torch.float64)
prediction:  torch.Size([7, 15, 61]) ground_truth:  torch.Size([7, 15, 61])
ground_truth tensor([35, 60, 60, 40, 40, 60, 60, 40, 42, 60, 60, 40, 40, 60, 60], device='cuda:0')
prediction tensor([ 4, 48, 54, 12, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16], device='cuda:0')
lstm gradients:  tensor([[-6.4921e-08, -9

lstm gradients:  tensor([[-4.3753e-07, -1.1040e-07,  5.3745e-08,  ..., -5.7400e-07,
         -2.4125e-07, -4.1867e-08],
        [ 2.2032e-07, -1.0497e-06,  2.3408e-06,  ...,  1.7434e-06,
         -1.8299e-06, -1.1282e-07],
        [ 1.4461e-07,  3.8318e-07, -1.7034e-06,  ..., -1.0924e-06,
          7.6084e-07,  1.0337e-06],
        ...,
        [ 1.5892e-06,  9.5319e-07,  2.3021e-06,  ...,  1.4393e-06,
         -1.4579e-06,  5.8541e-07],
        [-1.0275e-08, -8.6206e-08,  3.1061e-07,  ...,  1.2643e-07,
         -3.3542e-07, -5.7356e-08],
        [-2.6640e-07,  2.8134e-07,  5.3212e-07,  ...,  7.8922e-07,
          1.1234e-07, -2.7690e-07]], device='cuda:0', dtype=torch.float64)
prediction:  torch.Size([7, 15, 61]) ground_truth:  torch.Size([7, 15, 61])
ground_truth tensor([60, 35, 60, 33, 33, 33, 60, 33, 60, 35, 60, 33, 33, 33, 60], device='cuda:0')
prediction tensor([11, 54, 16, 54, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16], device='cuda:0')
lstm gradients:  tensor([[-2.4766e-07, -2

lstm gradients:  tensor([[-2.7695e-07, -9.0684e-08, -2.8069e-07,  ..., -4.4323e-07,
          2.6807e-07,  8.2279e-08],
        [-1.3926e-06,  1.4730e-07, -2.3344e-06,  ..., -3.0280e-06,
          2.2455e-06,  6.7983e-07],
        [-8.1663e-07, -7.6043e-08, -1.1820e-06,  ..., -1.4609e-06,
          1.1726e-06,  2.9185e-07],
        ...,
        [-1.3503e-06,  3.9626e-07, -2.7538e-06,  ..., -2.4459e-06,
          2.0964e-06,  4.0150e-07],
        [ 2.9575e-09, -5.7524e-08,  8.8595e-09,  ...,  9.4939e-08,
         -1.0292e-07,  2.3543e-08],
        [-7.5223e-08, -3.4423e-08, -3.7605e-08,  ..., -4.4974e-07,
          1.3276e-07,  7.4005e-08]], device='cuda:0', dtype=torch.float64)
prediction:  torch.Size([7, 15, 61]) ground_truth:  torch.Size([7, 15, 61])
ground_truth tensor([35, 60, 60, 38, 38, 60, 60, 30, 30, 60, 60, 31, 31, 60, 60], device='cuda:0')
prediction tensor([ 4, 48, 54, 54, 54, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16], device='cuda:0')
lstm gradients:  tensor([[-1.8555e-07,  9

lstm gradients:  tensor([[-2.0657e-07,  3.0726e-08, -4.7790e-07,  ..., -6.4163e-07,
          5.7539e-07, -2.7348e-08],
        [ 2.1735e-06, -1.4489e-06,  4.1985e-06,  ...,  3.3735e-06,
         -6.2676e-06,  2.4522e-07],
        [ 7.1680e-07, -6.3053e-07,  1.0323e-06,  ...,  1.7260e-06,
         -9.5077e-07, -4.3773e-07],
        ...,
        [ 1.3160e-06,  9.3885e-08, -3.1740e-07,  ...,  1.0168e-06,
         -1.6938e-06,  4.3280e-07],
        [-1.0142e-07,  9.9554e-08, -4.3959e-07,  ..., -3.7832e-07,
          3.3213e-07,  1.7588e-08],
        [-1.6259e-07,  9.1104e-08, -2.3116e-07,  ..., -8.0823e-09,
          2.3653e-07, -4.9978e-08]], device='cuda:0', dtype=torch.float64)
prediction:  torch.Size([7, 15, 61]) ground_truth:  torch.Size([7, 15, 61])
ground_truth tensor([60, 60, 60, 60, 60, 60, 60, 31, 60, 34, 60, 60, 60, 31, 60], device='cuda:0')
prediction tensor([ 4, 48, 48, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16], device='cuda:0')
lstm gradients:  tensor([[-1.7958e-07, -4

lstm gradients:  tensor([[-6.4355e-08, -1.1148e-07,  9.4015e-08,  ..., -2.2510e-07,
          2.6380e-07, -1.1029e-07],
        [ 1.5308e-08,  4.2992e-07,  3.0963e-06,  ...,  3.2267e-06,
         -1.3157e-06, -1.4470e-06],
        [ 5.1325e-07, -5.5250e-07,  2.0160e-06,  ...,  1.6728e-06,
         -1.1203e-06, -6.7036e-07],
        ...,
        [ 1.4797e-06,  5.8698e-07,  1.7278e-07,  ..., -5.5324e-07,
         -2.6708e-06,  1.8941e-06],
        [-5.0049e-08,  1.2224e-07, -3.3006e-07,  ..., -1.8728e-07,
          9.2131e-09,  2.4215e-07],
        [ 1.3363e-07, -3.8813e-08,  6.6071e-07,  ...,  3.7675e-07,
         -6.7110e-07, -2.8882e-08]], device='cuda:0', dtype=torch.float64)
prediction:  torch.Size([7, 15, 61]) ground_truth:  torch.Size([7, 15, 61])
ground_truth tensor([38, 60, 60, 40, 38, 38, 60, 33, 38, 60, 60, 40, 38, 38, 60], device='cuda:0')
prediction tensor([ 4, 48, 54, 54, 54, 54, 16, 16, 16, 16, 16, 16, 16, 16, 16], device='cuda:0')
lstm gradients:  tensor([[ 8.2250e-09, -5

lstm gradients:  tensor([[ 5.5309e-07, -2.2367e-07,  7.8577e-07,  ...,  2.9736e-07,
         -1.2132e-06, -3.9534e-07],
        [-1.1521e-06, -1.7495e-06,  1.1487e-06,  ...,  1.4883e-06,
          4.1453e-06, -2.0604e-06],
        [ 4.9834e-08,  4.6857e-07, -6.6227e-07,  ..., -2.6483e-07,
          8.3129e-07,  2.9301e-07],
        ...,
        [-9.0449e-07,  9.7612e-07, -2.2125e-06,  ..., -1.6532e-06,
          3.8092e-07, -1.4255e-07],
        [-1.2144e-07,  2.1071e-07, -2.2547e-07,  ..., -2.7634e-07,
         -5.0537e-09,  3.4173e-07],
        [ 5.9346e-07, -4.4715e-07,  7.5748e-07,  ...,  1.1578e-06,
         -1.0032e-06, -1.2051e-07]], device='cuda:0', dtype=torch.float64)
prediction:  torch.Size([7, 15, 61]) ground_truth:  torch.Size([7, 15, 61])
ground_truth tensor([40, 40, 60, 33, 33, 33, 60, 37, 35, 60, 60, 40, 60, 33, 60], device='cuda:0')
prediction tensor([11, 54, 48, 54, 54, 54, 16, 16, 16, 16, 16, 16, 16, 16, 16], device='cuda:0')
lstm gradients:  tensor([[-1.1188e-08, -4

====> Test set Loss: 0.0064
lstm gradients:  tensor([[-7.0845e-08, -1.7687e-07,  1.1429e-07,  ...,  3.3573e-07,
         -3.9788e-07,  9.8622e-08],
        [ 1.3511e-06, -1.0155e-06,  2.3483e-06,  ...,  2.7054e-06,
         -2.5326e-06, -1.1251e-06],
        [ 2.3431e-07,  5.2082e-07, -7.6876e-07,  ..., -2.4487e-07,
         -2.0888e-07,  7.4139e-07],
        ...,
        [-1.1555e-07, -3.5306e-07,  2.7406e-06,  ...,  1.4115e-06,
         -2.6994e-06, -4.0567e-07],
        [-2.7358e-07,  2.4503e-07, -5.1079e-07,  ..., -2.7321e-07,
          2.5235e-07,  3.8746e-07],
        [ 5.0050e-07, -5.7696e-07,  7.6890e-07,  ...,  1.1199e-06,
         -1.0966e-06, -3.7654e-07]], device='cuda:0', dtype=torch.float64)
prediction:  torch.Size([7, 15, 61]) ground_truth:  torch.Size([7, 15, 61])
ground_truth tensor([31, 60, 60, 38, 43, 60, 60, 35, 38, 60, 60, 60, 38, 38, 60], device='cuda:0')
prediction tensor([ 4, 54, 55, 54, 16, 55, 12, 16, 18, 12, 12, 16, 16, 12, 16], device='cuda:0')
lstm gradient

lstm gradients:  tensor([[ 1.4516e-07, -1.1537e-07,  3.5607e-07,  ...,  6.6871e-08,
         -2.1507e-07,  6.7528e-08],
        [-4.7314e-06,  6.2955e-07, -2.7453e-06,  ..., -5.2078e-06,
          5.2935e-06, -1.4086e-06],
        [-5.2787e-07, -1.6748e-07,  9.2607e-07,  ...,  4.8111e-07,
         -4.3571e-07, -3.7461e-07],
        ...,
        [-3.5996e-07, -3.7236e-07,  3.2263e-07,  ..., -2.6048e-07,
         -4.2418e-07, -5.8413e-07],
        [-7.6046e-07,  5.8733e-07, -8.8766e-07,  ..., -8.9470e-07,
          1.1284e-06,  3.7006e-07],
        [-3.5282e-07, -8.3401e-08, -6.0610e-07,  ...,  5.2517e-07,
         -7.0675e-08, -2.6836e-07]], device='cuda:0', dtype=torch.float64)
prediction:  torch.Size([7, 15, 61]) ground_truth:  torch.Size([7, 15, 61])
ground_truth tensor([60, 38, 60, 40, 60, 37, 60, 37, 60, 37, 60, 40, 60, 45, 60], device='cuda:0')
prediction tensor([ 4, 54, 16, 55, 54, 55, 18, 55, 18, 55, 18, 55, 18, 55, 18], device='cuda:0')
lstm gradients:  tensor([[ 1.4603e-07, -4

lstm gradients:  tensor([[-4.7818e-08,  2.3615e-07, -6.3456e-07,  ..., -5.7259e-07,
          1.0012e-06,  2.0892e-07],
        [ 9.1567e-07, -7.4041e-07, -2.3029e-06,  ..., -4.8400e-07,
          3.9937e-06, -9.4925e-07],
        [ 5.1554e-07, -7.7853e-07,  1.0694e-06,  ...,  1.3392e-06,
          5.5713e-07, -7.7353e-07],
        ...,
        [ 1.5985e-07,  3.2243e-06, -1.7405e-06,  ..., -1.6725e-06,
          1.4563e-06,  2.7582e-06],
        [ 2.7394e-08,  4.8482e-07, -1.1332e-06,  ..., -4.6419e-07,
          1.0065e-06,  5.2051e-07],
        [ 1.9555e-07, -3.2021e-07, -2.6437e-07,  ...,  8.6063e-08,
         -6.6338e-07, -2.8969e-07]], device='cuda:0', dtype=torch.float64)
prediction:  torch.Size([7, 15, 61]) ground_truth:  torch.Size([7, 15, 61])
ground_truth tensor([38, 60, 60, 35, 38, 60, 60, 31, 35, 60, 60, 26, 31, 60, 60], device='cuda:0')
prediction tensor([ 4, 54, 55, 55, 54, 55, 16, 16, 16, 12, 16, 16, 16, 12, 16], device='cuda:0')
lstm gradients:  tensor([[-8.9933e-08,  1

lstm gradients:  tensor([[ 7.4269e-08,  2.2095e-07, -7.3695e-08,  ...,  1.9244e-07,
          1.8173e-07, -1.6096e-08],
        [-6.8547e-07,  2.7035e-07, -9.4226e-07,  ..., -5.5070e-07,
          9.8657e-07,  2.9375e-08],
        [ 7.4016e-07,  8.0825e-08,  6.7898e-07,  ...,  1.1761e-06,
          7.4904e-07, -7.2861e-07],
        ...,
        [ 7.2220e-07,  2.6105e-07,  1.2686e-06,  ..., -2.9609e-07,
         -6.7700e-07,  4.9609e-07],
        [-4.0466e-08, -4.1579e-07,  3.0382e-07,  ...,  1.9473e-08,
         -3.0332e-07, -2.6885e-07],
        [-3.1955e-07,  6.7023e-08, -5.1889e-07,  ..., -5.9206e-07,
          7.1988e-07,  2.0955e-07]], device='cuda:0', dtype=torch.float64)
prediction:  torch.Size([7, 15, 61]) ground_truth:  torch.Size([7, 15, 61])
ground_truth tensor([40, 60, 60, 33, 36, 36, 60, 38, 40, 60, 60, 43, 40, 40, 60], device='cuda:0')
prediction tensor([ 4, 18, 55, 55, 16, 18, 16, 16, 16, 16, 12, 16, 16, 12, 16], device='cuda:0')
lstm gradients:  tensor([[ 7.6744e-07,  1

lstm gradients:  tensor([[-6.4998e-08,  2.1766e-07, -6.6211e-07,  ..., -6.9125e-07,
          5.9422e-07, -4.0025e-07],
        [ 3.5569e-06, -5.4148e-06,  6.3125e-06,  ...,  3.6772e-06,
         -9.6537e-06,  5.6202e-07],
        [-4.1842e-07,  8.7549e-07, -7.2822e-07,  ...,  4.1858e-07,
          9.9254e-07, -9.1315e-08],
        ...,
        [-1.1877e-06, -8.5438e-07,  4.8115e-06,  ...,  4.1726e-07,
         -3.6862e-06, -1.3408e-06],
        [-2.9883e-07,  4.4887e-07, -1.0794e-06,  ..., -7.8837e-07,
          6.7233e-07,  8.7230e-07],
        [ 1.4823e-07, -2.9040e-07,  8.6287e-07,  ...,  6.7388e-07,
         -1.9603e-07, -3.6692e-07]], device='cuda:0', dtype=torch.float64)
prediction:  torch.Size([7, 15, 61]) ground_truth:  torch.Size([7, 15, 61])
ground_truth tensor([45, 45, 60, 40, 40, 40, 60, 40, 40, 40, 60, 38, 60, 40, 60], device='cuda:0')
prediction tensor([ 4, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 16], device='cuda:0')
lstm gradients:  tensor([[ 4.3944e-07,  4

lstm gradients:  tensor([[-4.9360e-07,  6.2171e-07, -1.5219e-06,  ..., -1.5073e-06,
          2.0947e-06,  3.1322e-07],
        [-3.1649e-06,  3.2507e-06, -1.8700e-06,  ..., -3.2859e-07,
          1.3909e-06,  8.9177e-07],
        [-2.9012e-07, -7.4816e-08,  7.6357e-07,  ...,  2.6225e-07,
         -9.9343e-08, -5.3540e-07],
        ...,
        [-5.9208e-07, -9.9369e-07,  2.8914e-06,  ..., -2.1250e-06,
         -6.7773e-07, -4.4170e-07],
        [-2.4702e-07,  8.4535e-08,  2.1869e-07,  ...,  2.7648e-07,
          6.9089e-08, -1.2338e-07],
        [ 3.6093e-07,  2.4482e-10,  4.7836e-07,  ..., -9.1855e-07,
          6.6813e-07, -1.2774e-07]], device='cuda:0', dtype=torch.float64)
prediction:  torch.Size([7, 15, 61]) ground_truth:  torch.Size([7, 15, 61])
ground_truth tensor([42, 60, 60, 43, 38, 60, 35, 35, 36, 60, 60, 38, 35, 60, 31], device='cuda:0')
prediction tensor([11, 54, 55, 55, 54, 12, 16, 16, 16, 18, 12, 16, 16, 18, 12], device='cuda:0')
lstm gradients:  tensor([[-3.4893e-07, -1

lstm gradients:  tensor([[ 2.8221e-07,  3.3052e-07,  3.3543e-07,  ...,  1.6217e-06,
         -6.5587e-07,  2.4055e-07],
        [ 1.2111e-06, -3.8244e-07,  1.1962e-06,  ...,  3.8262e-06,
         -3.5540e-06,  2.4129e-07],
        [ 6.4620e-07, -8.0221e-07,  2.1918e-06,  ...,  2.3674e-06,
         -3.4650e-06, -9.3954e-08],
        ...,
        [ 6.5630e-07, -2.3058e-06,  4.1973e-06,  ...,  1.4014e-07,
         -2.7413e-06, -3.0583e-07],
        [ 1.9300e-07, -1.5189e-07,  5.7459e-08,  ...,  2.1758e-07,
         -3.5204e-07,  2.6539e-07],
        [-1.5160e-07,  4.0724e-07, -7.7535e-07,  ...,  1.0632e-06,
          4.5245e-07, -2.7532e-07]], device='cuda:0', dtype=torch.float64)
prediction:  torch.Size([7, 15, 61]) ground_truth:  torch.Size([7, 15, 61])
ground_truth tensor([60, 43, 60, 45, 45, 45, 60, 42, 42, 42, 60, 38, 38, 38, 60], device='cuda:0')
prediction tensor([ 4, 54, 12, 55, 54, 54, 16, 16, 16, 54, 16, 16, 16, 16, 16], device='cuda:0')
lstm gradients:  tensor([[-5.3151e-07,  2

lstm gradients:  tensor([[ 5.5684e-07,  2.5876e-07, -1.9404e-07,  ...,  4.1612e-07,
         -2.6038e-07,  2.4455e-07],
        [-3.9133e-06,  1.3867e-06, -6.3266e-06,  ..., -8.7780e-06,
          1.0300e-05,  1.4018e-06],
        [-1.4044e-06,  6.3694e-07, -1.2250e-06,  ..., -1.5786e-06,
          2.4335e-06,  5.6704e-07],
        ...,
        [ 3.3492e-07, -3.9463e-07, -2.7767e-06,  ...,  4.7914e-07,
          2.2069e-06,  8.4681e-07],
        [-9.0165e-08,  3.4853e-07, -5.1481e-07,  ...,  3.1057e-08,
          6.9354e-07,  3.7592e-07],
        [-9.4041e-07, -2.1441e-08, -9.7368e-07,  ..., -1.3669e-06,
          1.7778e-06,  3.6276e-07]], device='cuda:0', dtype=torch.float64)
prediction:  torch.Size([7, 15, 61]) ground_truth:  torch.Size([7, 15, 61])
ground_truth tensor([40, 60, 60, 40, 41, 60, 60, 40, 38, 60, 60, 38, 35, 60, 60], device='cuda:0')
prediction tensor([ 4, 16, 16, 54, 16, 18, 16, 16, 16, 16, 16, 16, 16, 16, 16], device='cuda:0')
lstm gradients:  tensor([[-3.3585e-07,  3

lstm gradients:  tensor([[ 5.6871e-07,  3.2814e-07,  1.9637e-08,  ...,  2.5343e-07,
         -1.4365e-07,  1.2590e-07],
        [-2.8290e-07,  1.2613e-06, -3.7524e-06,  ..., -5.4560e-06,
          6.3780e-06,  1.3176e-07],
        [-8.6877e-07, -1.4547e-06,  1.4296e-06,  ...,  1.4094e-06,
         -6.5203e-07, -1.0329e-06],
        ...,
        [ 1.0010e-06,  1.5623e-06, -1.1650e-07,  ..., -1.1222e-06,
         -4.1333e-06,  2.3760e-06],
        [-5.7753e-08,  2.3927e-07, -9.4672e-07,  ..., -5.7502e-07,
          1.1554e-06,  3.0046e-07],
        [ 3.2288e-07, -8.6868e-08, -1.0613e-07,  ...,  7.3770e-07,
         -2.3241e-07,  1.7380e-07]], device='cuda:0', dtype=torch.float64)
prediction:  torch.Size([7, 15, 61]) ground_truth:  torch.Size([7, 15, 61])
ground_truth tensor([40, 60, 60, 38, 37, 60, 60, 37, 33, 60, 60, 33, 38, 60, 60], device='cuda:0')
prediction tensor([11, 54, 16, 16, 20, 54, 16, 16, 54, 12, 16, 16, 54, 12, 16], device='cuda:0')
lstm gradients:  tensor([[ 1.0477e-06, -1

lstm gradients:  tensor([[ 3.7397e-07, -8.9821e-08,  2.4966e-07,  ...,  6.9222e-07,
         -8.7674e-07,  1.8667e-07],
        [ 3.0218e-06, -1.1662e-06,  4.9129e-06,  ...,  7.2998e-06,
         -6.8596e-06, -2.3086e-07],
        [ 3.0340e-07, -1.6716e-07,  3.6373e-07,  ...,  7.7408e-07,
         -9.3625e-07,  1.3993e-07],
        ...,
        [-3.3641e-06, -8.0918e-07, -4.0420e-06,  ..., -1.2739e-06,
          1.3723e-06,  5.6701e-07],
        [-9.9791e-08, -8.1321e-08, -1.5326e-07,  ..., -7.5141e-08,
          5.1008e-08, -7.8053e-08],
        [ 4.1575e-07,  4.0538e-07,  6.7897e-08,  ...,  4.4479e-08,
         -3.6616e-07,  2.3700e-07]], device='cuda:0', dtype=torch.float64)
prediction:  torch.Size([7, 15, 61]) ground_truth:  torch.Size([7, 15, 61])
ground_truth tensor([33, 60, 60, 26, 26, 60, 60, 30, 30, 60, 60, 26, 26, 60, 60], device='cuda:0')
prediction tensor([ 4, 54,  4, 55, 18, 54, 12, 16, 54, 54, 12, 54, 54, 54, 12], device='cuda:0')
lstm gradients:  tensor([[ 1.9222e-08, -2

ground_truth tensor([41, 60, 60, 42, 43, 60, 45, 45, 43, 60, 60, 38, 36, 60, 31], device='cuda:0')
prediction tensor([11,  8,  4, 54, 54, 58, 12, 16, 18, 18, 52, 52, 52, 12, 52], device='cuda:0')
lstm gradients:  tensor([[-1.4305e-06,  2.4002e-06, -1.9805e-06,  ..., -7.9395e-07,
          3.0870e-06,  9.3300e-07],
        [ 4.6053e-06, -5.3314e-06,  2.0463e-06,  ...,  1.9419e-07,
         -5.0988e-06, -2.1079e-06],
        [-3.9542e-07,  7.0916e-07,  1.3184e-07,  ...,  9.6388e-07,
          1.7965e-07,  2.4418e-08],
        ...,
        [ 1.4475e-06, -1.4396e-06,  5.6463e-06,  ...,  5.8356e-06,
         -6.1682e-06, -2.0396e-06],
        [-2.1937e-07, -2.2312e-07,  2.8205e-07,  ...,  1.4522e-07,
         -9.0594e-08, -2.1117e-07],
        [-4.4257e-07,  6.1095e-07, -5.1892e-07,  ..., -4.3855e-07,
          1.0109e-06,  5.0525e-08]], device='cuda:0', dtype=torch.float64)
prediction:  torch.Size([7, 15, 61]) ground_truth:  torch.Size([7, 15, 61])
ground_truth tensor([31, 60, 60, 38, 43, 

lstm gradients:  tensor([[ 8.7374e-07,  3.9386e-07,  4.7053e-07,  ...,  8.3289e-07,
         -5.4870e-07,  1.7760e-07],
        [ 7.1443e-07,  3.8810e-06, -5.6462e-06,  ..., -4.0768e-06,
          6.0612e-06,  1.9766e-06],
        [ 9.5391e-07, -1.6027e-07,  4.3324e-07,  ...,  2.0713e-06,
         -2.0102e-06,  3.2664e-07],
        ...,
        [-2.8188e-07,  1.3000e-06,  1.8190e-06,  ...,  6.0850e-07,
         -2.0110e-06, -7.8103e-07],
        [-1.2760e-07,  3.3337e-07, -6.6228e-07,  ..., -5.2010e-07,
          8.8580e-07,  3.0107e-07],
        [ 1.5211e-07, -2.2246e-07,  4.1582e-07,  ..., -5.3402e-08,
         -7.2079e-07,  1.6233e-07]], device='cuda:0', dtype=torch.float64)
prediction:  torch.Size([7, 15, 61]) ground_truth:  torch.Size([7, 15, 61])
ground_truth tensor([22, 60, 60, 29, 34, 60, 60, 60, 34, 60, 60, 34, 29, 29, 60], device='cuda:0')
prediction tensor([ 4, 18,  4, 55,  0, 18, 16, 16, 18, 18, 16, 16, 18, 18, 16], device='cuda:0')
lstm gradients:  tensor([[ 1.6766e-07, -2

lstm gradients:  tensor([[-7.2037e-07, -5.5759e-07, -2.5508e-07,  ..., -8.6545e-07,
          9.4563e-07, -2.3068e-07],
        [-3.8283e-07,  2.8216e-06, -8.7472e-07,  ...,  7.5133e-07,
          9.6004e-07,  1.7592e-06],
        [-7.8677e-07,  3.9692e-07, -1.4501e-06,  ..., -5.6769e-07,
         -2.2044e-07,  1.1110e-06],
        ...,
        [ 1.6407e-06, -1.9444e-06,  3.1360e-06,  ...,  1.0486e-06,
         -5.1345e-06, -9.0656e-07],
        [ 8.9729e-08,  3.7271e-07, -1.7054e-06,  ..., -1.3082e-06,
          1.0442e-06,  8.9050e-07],
        [-1.1156e-06,  6.4346e-08, -8.1368e-07,  ..., -1.9077e-06,
          2.0272e-06, -6.4491e-08]], device='cuda:0', dtype=torch.float64)
prediction:  torch.Size([7, 15, 61]) ground_truth:  torch.Size([7, 15, 61])
ground_truth tensor([33, 33, 60, 38, 38, 38, 60, 33, 60, 31, 60, 30, 60, 26, 60], device='cuda:0')
prediction tensor([ 4, 54, 16, 55, 20, 55, 52, 55, 16, 55, 18, 55, 16, 55, 18], device='cuda:0')
lstm gradients:  tensor([[-4.8451e-07,  1

lstm gradients:  tensor([[-6.6337e-07,  4.2858e-07, -5.0755e-07,  ..., -8.6273e-07,
          4.1029e-07,  3.6796e-07],
        [ 2.4753e-06, -4.9737e-06,  4.2567e-06,  ...,  6.4213e-06,
         -2.4410e-06, -3.1184e-06],
        [-4.9801e-07, -1.3174e-06,  1.4007e-06,  ..., -5.3811e-07,
         -8.4312e-07,  1.8076e-07],
        ...,
        [-1.9773e-06,  3.7178e-06, -5.9360e-06,  ..., -1.7465e-06,
          2.4877e-06,  2.3775e-06],
        [ 8.1231e-07, -4.6382e-07,  6.2894e-07,  ...,  8.5485e-07,
         -9.2711e-07, -2.0191e-07],
        [ 3.2845e-07, -3.4075e-07, -5.6590e-07,  ...,  9.0493e-07,
         -8.2355e-07, -2.0618e-07]], device='cuda:0', dtype=torch.float64)
prediction:  torch.Size([7, 15, 61]) ground_truth:  torch.Size([7, 15, 61])
ground_truth tensor([28, 28, 60, 28, 28, 28, 60, 30, 30, 30, 60, 28, 28, 28, 60], device='cuda:0')
prediction tensor([ 4, 54,  4,  4, 18, 58, 18, 55, 20, 20, 51, 11, 18, 58, 18], device='cuda:0')
lstm gradients:  tensor([[ 1.2840e-07,  1

lstm gradients:  tensor([[-4.8304e-07, -4.9197e-07,  4.0247e-07,  ..., -1.2777e-06,
          4.7887e-08,  1.5044e-07],
        [-7.3751e-07, -1.7043e-06,  3.4131e-06,  ...,  2.2128e-07,
          1.3606e-06, -1.8527e-06],
        [ 9.0554e-07, -1.5975e-06,  2.6897e-06,  ...,  5.6280e-07,
         -1.5156e-06, -1.9755e-07],
        ...,
        [-7.2822e-07, -1.0329e-06,  7.8401e-07,  ..., -7.4687e-07,
          7.5186e-07, -2.7857e-06],
        [ 4.3225e-07,  1.3662e-06, -1.0526e-06,  ..., -7.6314e-07,
          9.5280e-07,  8.6216e-07],
        [ 1.5963e-07,  6.3840e-07, -9.4960e-07,  ...,  1.1907e-06,
          1.9026e-07,  1.3892e-07]], device='cuda:0', dtype=torch.float64)
prediction:  torch.Size([7, 15, 61]) ground_truth:  torch.Size([7, 15, 61])
ground_truth tensor([35, 60, 60, 38, 38, 38, 60, 37, 60, 38, 60, 40, 40, 40, 60], device='cuda:0')
prediction tensor([ 4, 55,  4, 55, 51, 12, 11, 55, 18, 12, 12, 11, 18, 11, 12], device='cuda:0')
lstm gradients:  tensor([[ 5.5917e-08,  6

lstm gradients:  tensor([[-4.8176e-07,  1.0695e-07, -6.8181e-07,  ..., -1.1170e-06,
          1.4128e-06, -1.2588e-07],
        [ 4.3213e-06, -2.1371e-06,  6.9570e-06,  ...,  4.3757e-06,
         -3.9126e-06, -9.5562e-07],
        [-2.2051e-06, -3.0320e-07, -1.1367e-06,  ..., -3.0978e-06,
          2.9311e-06, -6.2137e-07],
        ...,
        [ 3.2899e-08,  1.5333e-06, -4.2594e-06,  ..., -3.2903e-06,
          2.3999e-06,  7.4019e-07],
        [ 1.0975e-07, -2.7174e-07,  1.0328e-06,  ...,  6.4487e-07,
         -1.3670e-06, -2.3751e-07],
        [ 5.9389e-07, -2.9293e-07,  5.4615e-07,  ...,  1.2914e-06,
         -1.7171e-06, -1.6564e-07]], device='cuda:0', dtype=torch.float64)
prediction:  torch.Size([7, 15, 61]) ground_truth:  torch.Size([7, 15, 61])
ground_truth tensor([31, 31, 60, 30, 30, 30, 30, 30, 30, 30, 60, 26, 26, 26, 60], device='cuda:0')
prediction tensor([54, 54, 55, 55, 55, 55, 16, 16, 16, 55, 16, 55, 16, 55, 16], device='cuda:0')
lstm gradients:  tensor([[ 1.5946e-07,  1

lstm gradients:  tensor([[ 3.7912e-07,  7.6954e-08,  3.6066e-07,  ...,  6.1020e-07,
         -6.0764e-07, -1.2608e-07],
        [ 5.7318e-06, -1.5198e-06,  8.2932e-06,  ...,  1.2577e-05,
         -9.5127e-06, -2.4341e-06],
        [ 7.1338e-08,  8.1296e-07,  2.8072e-07,  ...,  1.4660e-06,
          2.7863e-07,  5.2776e-07],
        ...,
        [-9.5064e-07,  2.7839e-06, -1.2430e-06,  ..., -4.1412e-06,
          4.2798e-06,  1.6063e-07],
        [ 7.2027e-07, -4.2871e-07, -2.8296e-07,  ..., -4.7438e-07,
         -2.5716e-07, -1.6288e-07],
        [-1.9615e-06,  1.0089e-06, -4.8352e-06,  ..., -6.3028e-06,
          4.3391e-06,  1.1165e-06]], device='cuda:0', dtype=torch.float64)
prediction:  torch.Size([7, 15, 61]) ground_truth:  torch.Size([7, 15, 61])
ground_truth tensor([38, 60, 60, 30, 30, 60, 60, 31, 31, 60, 60, 33, 35, 60, 60], device='cuda:0')
prediction tensor([ 4, 54, 55, 55, 55, 18, 16, 16, 16, 18, 16, 16, 16, 18, 16], device='cuda:0')
lstm gradients:  tensor([[-8.0756e-08, -2

lstm gradients:  tensor([[ 1.6246e-06,  7.6421e-07,  5.0113e-07,  ...,  2.0325e-06,
         -5.5834e-07, -1.8630e-07],
        [ 2.9227e-06, -2.6960e-06,  6.9009e-06,  ...,  1.1752e-05,
         -1.2017e-05, -9.2276e-07],
        [ 5.1029e-08, -2.7251e-08,  3.4625e-07,  ...,  1.6246e-07,
          9.2069e-07, -5.3790e-07],
        ...,
        [ 2.2233e-06, -8.2143e-07,  2.4063e-06,  ...,  7.4569e-06,
         -7.7073e-06,  2.0097e-06],
        [-1.8013e-06,  9.7677e-07, -2.7771e-06,  ..., -3.8375e-06,
          5.2003e-06, -6.6662e-07],
        [-1.5044e-06, -1.2986e-07, -1.2904e-06,  ..., -2.8278e-06,
          3.5366e-06, -5.1850e-07]], device='cuda:0', dtype=torch.float64)
prediction:  torch.Size([7, 15, 61]) ground_truth:  torch.Size([7, 15, 61])
ground_truth tensor([60, 28, 28, 60, 33, 33, 60, 60, 60, 60, 60, 60, 33, 33, 60], device='cuda:0')
prediction tensor([ 4, 18, 18, 16, 16,  0, 16, 55, 16,  0, 18, 16, 16,  0, 16], device='cuda:0')
lstm gradients:  tensor([[-1.3969e-07,  1

KeyboardInterrupt: 

# Generate

In [None]:
np.set_printoptions(threshold=np.inf, suppress=1)


In [None]:

model.eval()
model.batch_size=1
with torch.no_grad():
    #get unseeen sample from validation set
    for data in val_loader:
        input_lstm, ground_truth, seq_lengths = reorderBatch(data)
        print(input_lstm.size())
        prediction = model(input_lstm.double().to(device), seq_lengths)
        print(prediction.size())
        


        prediction = prediction.squeeze(0).cpu().numpy()
        #batch size must be 1 for generation
        prediction = prediction[:,0:1].reshape(input_lstm.size(1),input_lstm.size(2))
        
        print(prediction.shape)

        #normalize to [0,1]
        pred_max = np.max(prediction)
        pred_min = np.min(prediction)
        prediction = (prediction - pred_min) / (pred_max - pred_min)
        #print(prediction)
        
        prediction[prediction < 0.7] = 0
        
        prediction = debinarizeMidi(prediction, prediction=True)
        prediction = addCuttedOctaves(prediction)
        #print(prediction)
        pianorollMatrixToTempMidi(prediction, show=True, showPlayer=True, autoplay=True,
                                 path='../temp/temp.mid')
        

print('')