In [1]:
import matplotlib
matplotlib.use('Agg')
import sys
sys.path.append('/home/nici/workspace/tss18-robotsinmusicalimprovisation/')
import numpy as np
import glob
import pypianoroll as ppr
import time
import music21
import os
import torch
import torch.utils.data
from torch import nn, optim
from torch.nn import functional as F
from utils.utils import *
#np.set_printoptions(threshold=np.inf)
#torch.set_printoptions(threshold=50000)

In [2]:
############HYPERPARAMS#####################
epochs = 100
learning_rate = 1e-3
batch_size = 100
seq_length = 8
log_interval = 100 # Log/show loss per batch
input_size = 100
############LSTM PARAMS#####################
hidden_size = 128
lstm_layers = 2
############################################
############################################
batch_loader = batch_size*seq_length

In [6]:
#load variational autoencoder
from utils.VAE import VAE
from loadModel import loadModel, loadStateDict
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

###VARIATIONAL CONV AUTOENCODER############
autoencoder_model = VAE()
path_to_model = '../pretrained/YamahaPC2002_VAE_Reconstruct_NoTW_20Epochs.model'
###########################################

autoencoder_model = loadModel(autoencoder_model, path_to_model, dataParallelModel=False)
#autoencoder_model = loadStateDict(autoencoder_model, path_to_model)
autoencoder_model = autoencoder_model.to(device)


--------GPU model restored--------



In [7]:
#load dataset from npz
data = np.load('../../../Uni/WikifoniaPartlyNoTranspose.npz')
train_dataset = data['train']
test_dataset = data['test']
data.close()

print(train_dataset.shape)
print(test_dataset.shape)

(4056, 1, 96, 60)
(1034, 1, 96, 60)


In [8]:
#train_dataset = train_dataset[0:1000]
train_dataset = torch.from_numpy(train_dataset)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_loader, shuffle=False, drop_last=True)

#test_dataset = test_dataset[0:100]
test_dataset = torch.from_numpy(test_dataset)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_loader, shuffle=False, drop_last=True)

print(train_dataset.size())
print(test_dataset.size())

torch.Size([4056, 1, 96, 60])
torch.Size([1034, 1, 96, 60])


In [10]:
class LSTM_Many2Many(nn.Module):
    def __init__(self, batch_size=7, lstm_layers=2, hidden_size=32, seq_length=7, input_size=100):
        super(LSTM_Many2Many, self).__init__()
        
        self.batch_size = batch_size
        self.hidden_size = hidden_size
        self.lstm_layers = lstm_layers
        self.seq_length = seq_length
        self.input_size = input_size
        
        ###LSTM###########
        self.lstm = nn.LSTM(input_size=self.hidden_size, hidden_size=self.hidden_size,
                            num_layers=self.lstm_layers, batch_first=True, dropout=0.3)
        ##################
        
        ###LINEAR LAYERS###
        self.i2h = nn.Linear(self.input_size, self.hidden_size)
        self.h2o = nn.Linear(self.hidden_size,self.input_size)
        ###################

        ###LSTMCells######
        #self.lstmC1 = nn.LSTMCell(input_size=self.input_size, hidden_size=self.hidden_size, bias=True)
        #self.drop1 = nn.Dropout(p=0.1)
        #self.lstmC2 = nn.LSTMCell(input_size=self.hidden_size, hidden_size=self.hidden_size, bias=True)
        #self.drop2 = nn.Dropout(p=0.2)
        #self.lstmC3 = nn.LSTMCell(input_size=self.hidden_size, hidden_size=self.hidden_size, bias=True)
        #################      
    
    def reorder_batch(self, embedded_seqs):
        return torch.stack(torch.chunk(embedded_seqs, int(self.batch_size/self.seq_length)))
         
    def hidden_init(self):
        return torch.zeros(self.lstm_layers, int(self.batch_size/self.seq_length), 
                           self.hidden_size, dtype=torch.double).to(device)

    def forward(self, embed, future = 0):#, lenghts):
        h_t0 = self.hidden_init()
        c_t0 = self.hidden_init()
        
        lstm_input = self.i2h(embed)
        output, (h_t1, c_t1) = self.lstm(lstm_input, (h_t0, c_t0))
        output = self.h2o(output[:,:,:])
        
        """
        (h_t, c_t),(h2_t, c2_t),(h3_t,c3_t)= self.hiddenInitLSTMCell()

        outputs = []
        for i in range(0,self.seq_length-1):
            h_t, c_t = self.lstmC1(embed[:,i,:],(h_t, c_t))
            h2_t, c2_t = self.lstmC2(h_t,(h2_t, c2_t))
            h3_t, c3_t = self.lstmC3(h2_t,(h3_t, c3_t))
            output = self.fc(h3_t)
            #outputs += [output]
            
        
        for i in range(future):
            h_t, c_t = self.lstmC1(output,(h_t, c_t))
            h2_t, c2_t = self.lstmC2(h_t, (h2_t, c2_t))
            h3_t, c3_t = self.lstmC3(h2_t,(h3_t, c3_t))
            output = self.fc(h3_t)
            #outputs += [output]
        
        #outputs = torch.stack(outputs, 1)
        """
        return embed, output
    

model = LSTM_Many2Many(batch_size=batch_loader, seq_length=seq_length, 
             input_size=input_size, hidden_size=hidden_size,
             lstm_layers = lstm_layers).double().to(device)

optimizer = optim.Adam(model.parameters(), lr=learning_rate)
#optimizer = optim.RMSprop(model.parameters(),lr=learning_rate, momentum=0.9)


def train(epoch):
    model.train()
    train_loss = 0
    criterion = nn.MSELoss()
    half_seq_length = int(model.seq_length/2)
    for batch_idx, data in enumerate(train_loader):
        optimizer.zero_grad()
        #float byte tensor
        data = data.float().to(device)
        #embed data with autoencoder
        with torch.no_grad():
            mu, logvar = autoencoder_model.encoder(data)
        
        #prepare for input lstm
        embedding = model.reorder_batch(mu)
        print("embedding.size() {}".format(embedding.size()))
        embedding = embedding.double()
        
        #Normalize to mean 0 and std 1
        #mean_batch = torch.mean(embedding)
        #std_batch = torch.std(embedding)
        #embedding_norm = (embedding - mean_batch) / std_batch
        
        g_truth = embedding[:,half_seq_length:,:]
        input_lstm = embedding[:,:half_seq_length,:]
        _ , output_lstm = model(input_lstm, future = 0)

        loss = criterion(output_lstm, g_truth)
        loss.backward()
        train_loss += loss.item()
        
        optimizer.step()
        if(batch_idx % log_interval == 0):
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, 
                batch_idx * len(data), 
                len(train_loader.dataset),
                100. * batch_idx / len(train_loader),
                loss.item()/(half_seq_length)))
#             print('ground_truth');print(g_truth.size());print(g_truth[:2,15:30])
#             print('output_lstm');print(output_lstm.size());print(output_lstm[:2,15:30])
    
    # average train loss
    train_loss /= (batch_idx+1)*(half_seq_length)
    print('====> Epoch: {} Average Loss: {:.4f}'.format(epoch, train_loss))
    return train_loss
    
def test(epoch):
    model.eval()
    test_loss = 0
    criterion = nn.MSELoss()
    half_seq_length = int(model.seq_length/2)
    with torch.no_grad():
        for i, data in enumerate(test_loader):
            data = data.float().to(device)
            mu, logvar = autoencoder_model.encoder(data)
            
            #prepare for input lstm
            embedding = model.reorder_batch(mu)
            embedding = embedding.double()

            #Normalize to mean 0 and std 1
            #mean_batch = torch.mean(embedding)
            #std_batch = torch.std(embedding)
            #embedding_norm = (embedding - mean_batch) / std_batch

            g_truth = embedding[:,half_seq_length:,:]
            input_lstm = embedding[:,:half_seq_length,:]
            _ , output_lstm = model(input_lstm, future = 0)

            test_loss += criterion(output_lstm, g_truth).item()

    # average test loss
    test_loss /= (i+1)*(half_seq_length)
#     print('ground_truth');print(g_truth.size());print(g_truth[:,15:30])
#     print('output_lstm');print(output_lstm.size());print(output_lstm[:,15:30])

    print('====> Test set Loss: {:.4f}'.format(test_loss))
    print('')
    return test_loss

In [11]:
# %matplotlib inline
import matplotlib.pyplot as plt

train_losses = []
test_losses = []
best_test_loss = np.inf
plot_save_path = '../plots/LSTM_NEW_WikifoniaTP12_'+str(hidden_size)+'hidden_' + str(epochs) +'epoch_Many2Many.png'
# plot_save_path = '../plots/LSTM_YamahaPCNoTP_'+str(hidden_size)+'hidden' + str(epochs) +'epoch_Many2Many.png'
# plot_save_path = '../plots/LSTM_YamahaPCTP60_'+str(hidden_size)+'hidden' + str(epochs) +'epoch_Many2Many.png'

for epoch in range(1, epochs + 1):
    train_losses.append(train(epoch))
    
    current_test_loss = test(epoch)
    test_losses.append(current_test_loss)
#     if(current_test_loss < best_test_loss):
#         best_test_loss = current_test_loss
#         torch.save(model,'/media/EXTHD/niciData/models/LSTM_WikifoniaTP12_' + str(hidden_size) + 'hidden_'+ str(epochs) + 'epochs_Many2Many_.model')
    if(epoch % 50 == 0):
        # for plots during training
        plt.plot(train_losses, color='red', label='Train Loss')
        plt.plot(test_losses, color='orange', label='Test Loss')
        plt.savefig(plot_save_path)
        # plt.show()   

plt.legend()
plt.savefig(plot_save_path)       



embedding.size() torch.Size([100, 8, 100])
embedding.size() torch.Size([100, 8, 100])
embedding.size() torch.Size([100, 8, 100])


KeyboardInterrupt: 