In [1]:
# import numpy as np
import glob
import pypianoroll as ppr
import time
import music21
import os
import torch
import torch.utils.data
from torch import nn, optim
from torch.nn import functional as F
from utils.utilsPreprocessing import *
#np.set_printoptions(threshold=np.inf)
torch.set_printoptions(threshold=50000)

In [14]:
##################################
#HYPERPARAMS
##################################
epochs = 5
learning_rate = 1e-3
batch_size= 14
log_interval = 1  #Log/show loss per batch

# Load MIDI files from npz

In [15]:
data = np.load('../YamahaPianoCompetition2002NoTranspose.npz')

midiDatasetTrain = data['train']
midiDatasetTest = data['test']

data.close()

"""
print("Training set: ({}, {}, {}, {})".format(midiDatasetTrain.size()[0],
                                                midiDatasetTrain.size()[1],
                                                midiDatasetTrain.size()[2],
                                                midiDatasetTrain.size()[3]))
print("Test set: ({}, {}, {}, {})".format(midiDatasetTest.size()[0],
                                                midiDatasetTest.size()[1],
                                                midiDatasetTest.size()[2],
                                                midiDatasetTest.size()[3]))
"""

print("Training set: {}".format(midiDatasetTrain.shape))
print("Test set: {}".format(midiDatasetTest.shape))

Training set: (39782, 1, 96, 60)
Test set: (9691, 1, 96, 60)


In [16]:
fullPitch = 128
_, _, length, reducedPitch = midiDatasetTrain.shape

In [17]:
from utils.CDVAE import CDVAE
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

autoencoderModel = CDVAE()
#LOAD MODEL
pathToModel = 'model/YamahaPianoCompetition2002Transposedby60_10Epochs.model'

try:
    #LOAD TRAINED MODEL INTO GPU
    if(torch.cuda.is_available()):
        autoencoderModel = torch.load(pathToModel)
        
    #LOAD MODEL TRAINED ON GPU INTO CPU
    else:
        autoencoderModel = torch.load(pathToModel, map_location=lambda storage, loc: storage)
    print("\n--------model restored--------\n")
except:
    print("\n--------no saved model found--------\n")
autoencoderModel = autoencoderModel.to(device)


--------model restored--------



# CDVAE

In [18]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [19]:
midiDatasetTrain = torch.from_numpy(midiDatasetTrain)
trainLoader = torch.utils.data.DataLoader(midiDatasetTrain, batch_size=batch_size, shuffle=False, drop_last=True)

midiDatasetTest = torch.from_numpy(midiDatasetTest)
testLoader = torch.utils.data.DataLoader(midiDatasetTest, batch_size=batch_size, shuffle=False, drop_last=True)

In [75]:
class LSTM(nn.Module):
    def __init__(self, batch_size=7, lstmLayers=2, hiddenSize=100):
        super(LSTM, self).__init__()
        
        self.batch_size = batch_size
        self.hidden_size = hiddenSize
        self.lstm_layers = lstmLayers
        ###LSTM###########
        self.embedding = nn.Embedding(100,1)
        
        self.lstm = nn.LSTM(input_size=100, hidden_size=self.hidden_size,
                            num_layers=self.lstm_layers, batch_first=True, dropout=0.5)
        ##################
        ###LSTMCells######
        self.lstmC1 = nn.LSTMCell(input_size=100, hidden_size=self.hidden_size, bias=True)
        self.drop1 = nn.Dropout(p=0.2)
        self.lstmC2 = nn.LSTMCell(input_size=400, hidden_size=self.hidden_size, bias=True)
        self.drop2 = nn.Dropout(p=0.2)
        self.lstmC3 = nn.LSTMCell(input_size=400, hidden_size=self.hidden_size, bias=True)
        #################
        
        self.fc = nn.Linear(self.hidden_size,100)
        self.eluFC = nn.ELU()
    
    def splitEmbedding(self,embed):
        if(embed.size()[0]>7):
            embedTemp = torch.chunk(embed, int(self.batch_size/7),dim=0)
            embed7s = embedTemp[0].unsqueeze(0)
            for emb in embedTemp[1:]:
                #print("inloop");print(emb.unsqueeze(1).size())
                embed7s = torch.cat((embed7s, emb.unsqueeze(0)),dim=0)
                #print("afterconcat");print(embed7s.size())
        else:
            embed7s = embed.unsqueeze(0)
        return embed7s
    
    def hiddenInitLSTM(self,embed):
        hiddenState = torch.zeros(self.lstm_layers,int(embed.size()[0]),self.hidden_size).to(device)
        cellState = torch.zeros(self.lstm_layers,int(embed.size()[0]),self.hidden_size).to(device)
        return hiddenState,cellState
    
    def hiddenInitLSTMCell(self,embed):
        hiddenState = torch.zeros(int(embed.size()[0]/7),self.hidden_size).to(device)
        cellState = torch.zeros(int(embed.size()[0]/7),self.hidden_size).to(device)
        hS2 = torch.zeros(int(embed.size()[0]/7),self.hidden_size).to(device)
        cS2 = torch.zeros(int(embed.size()[0]/7),self.hidden_size).to(device)
        
        return (hiddenState, cellState), (hS2, cS2)

    def forward(self, embed):

        h_t, c_t = self.hiddenInitLSTM(embed)
        #(h_t, c_t),(h2_t, c2_t) = self.hiddenInitLSTMCell(embed)
        #print(h_t.size())
        ###LSTMCells######
        #print(embed.size())
        if(embed.size()[0]==7):
            embed = embed.unsqueeze(1)
        #print(embed.size())
        """
        output = []
        for i in range(7):
            h_t, c_t = self.lstmC1(embed[i],(h_t,c_t))
            #print(h_t.size())
            #h_t = self.drop1(h_t)
            #h2_t, c2_t = self.lstmC2(h_t,(h2_t,c2_t))
            
            output.append(h_t)
        #h_t = nn.Dropout(h_t)
        #h_t, c_t = self.lstmC2(h_t)
        lstmOut = torch.Tensor(7,100).to(device)
        lstmOut = torch.cat(output)
        
        #print("lstmout");print(lstmOut.size())
        """
        print(embed.size())
        ###LSTM###########
        print(embed[0])
        embed = self.embedding(embed.long())
        print(embed[0].squeeze(2))
        lstmOut, (h_t, c_t) = self.lstm(embed,(h_t, c_t))
        print('embedding');print(embed[0,1,:20]);print('lstmOut');print(lstmOut[0,0,:20])
        ##################
        
        lstmOut = self.fc(lstmOut)
        
        #print(embed)
        #print(lstmOut)
        
        #lstmOut = self.eluFC(lstmOut)
        if(embed.size()[0]==7):
            embed = embed.squeeze(1)
        return embed, lstmOut

    

model = LSTM(batch_size=batch_size).to(device)

#optimizer = optim.Adam(model.parameters(), lr=learning_rate)
optimizer = optim.RMSprop(model.parameters(),lr=learning_rate, weight_decay=0.9)

def loss_function(embed, lstmOut):
    
    cosLSTM = nn.CosineSimilarity(dim=1, eps=1e-8) 
    batch_sizeMin1 = embed.size()[0]-1
    MSELoss = nn.MSELoss(reduction='sum')
    #BATCHSIZE 7
    ###WRONG SINCE LOSS CHANGED TO COMPARE EVERY
    ###PREDICTED SEQUENCE WITH THE NEXT
    if(embed.size()[0]==7):
        #print("loss");print(embed.size());print(lstmOut.size())
        cosSimLSTM = torch.sum(cosLSTM(embed[1:],lstmOut[:-1]))
        totalLoss = batch_sizeMin1 - cosSimLSTM
     
    #BATCHSIZE > 7
    else:
        print("LOSS")
        print(embed.size());print(lstmOut.size())
        """
        cosSimLSTM = 0
        for emb, lOut in zip(embed,lstmOut):
            print(emb[1:].size());print(lOut[:-1].size())
            print(cosLSTM(emb[1:],lOut[:-1]))
            cosSimLSTM += torch.sum(cosLSTM(emb[1:],lOut[:-1]))
        print(cosSimLSTM)
        numberCosSims = embed.size()[0]*(embed.size()[1]-1)
        #print(numberCosSims)
        totalLoss = numberCosSims - cosSimLSTM
        """

        mseLSTM = MSELoss(lstmOut[:,:-1,:],embed[:,1:,:])
        totalLoss = mseLSTM / (embed.size()[0]*(embed.size()[1]-1))
        
        
    return totalLoss
        

def train(epoch):
    model.train()
    trainLoss = 0

    for batch_idx, data in enumerate(trainLoader):
        #print(batch_idx)
        data = data.float().to(device)
        optimizer.zero_grad()
        embedding = autoencoderModel.encoder(data)
        embedding = model.splitEmbedding(embedding)
        #print(embedding.size())
        embedding, lstmOut = model(embedding)
        loss = loss_function(embedding, lstmOut)
        
        ###LSTM###############
        #reconPrediction = autoencoderModel.decoder(lstmOut[:,-1,:])
        ######################
        ###LSTMCell###########
        #reconPrediction = autoencoderModel.decoder(lstmOut)
        ######################

        loss.backward()
        trainLoss += loss.item()
        optimizer.step()
        if(batch_idx % log_interval == 0):
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(trainLoader.dataset),
                100. * batch_idx / len(trainLoader),
                loss.item() / (len(data)-(model.batch_size/7))))
        #if(batch_idx==1):
        #   break
    print('====> Epoch: {} Average Loss: {:.4f}'.format(
          epoch, trainLoss / (len(trainLoader.dataset)-batch_idx*(model.batch_size/7))))

def test(epoch):
    model.eval()
    testLoss = 0
    with torch.no_grad():
        for i, data in enumerate(testLoader):
            data = data.float().to(device)
            embedding = autoencoderModel.encoder(data)
            #embedding = model.splitEmbedding(embedding)
            embedding, lstmOut = model(embedding)
            loss = loss_function(embedding, lstmOut)
            ###LSTM#########
            #reconPrediction = autoencoderModel.decoder(lstmOut[:,-1,:])
            ################
            ###LSTMCell#####
            reconPrediction = autoencoderModel.decoder(lstmOut)
            ################
            testLoss += loss_function(embedding, lstmOut).item()
            
            #if(i==1):
            #    break
    testLoss /= (len(testLoader.dataset)-i*(model.batch_size/7))

    print('====> Test set Loss: {:.4f}'.format(testLoss))

In [76]:
"""
#LOAD MODEL
pathToModel = '../models/WikifoniaNoTranpose_10Epochs_LSTM_noTW_dropout50.model'

try:
    #LOAD TRAINED MODEL INTO GPU
    if(torch.cuda.is_available()):
        model = torch.load(pathToModel)
        
    #LOAD MODEL TRAINED ON GPU INTO CPU
    else:
        model = torch.load(pathToModel, map_location=lambda storage, loc: storage)
    print("\n--------model restored--------\n")
except:
    print("\n--------no saved model found--------\n")
"""
print('')




In [77]:
for epoch in range(1, epochs + 1):
    train(epoch)
    test(epoch)

torch.Size([2, 7, 100])
tensor([[2.5448, 1.9012, 2.0805, 1.3013, 1.6549, 2.0362, 1.6548, 2.6756, 1.4527,
         1.5936, 1.8229, 1.4256, 1.0402, 1.7841, 0.3481, 2.0761, 2.0810, 1.5436,
         1.5320, 2.9451, 0.7784, 1.7424, 1.4497, 1.5300, 2.1819, 1.4154, 2.1641,
         1.4190, 0.7857, 1.6166, 1.7055, 2.5857, 1.7286, 1.9286, 1.5975, 1.8182,
         1.0389, 2.1091, 1.5535, 2.1466, 2.2400, 2.0019, 2.5337, 0.5830, 1.9268,
         2.3171, 0.7730, 1.9442, 1.7191, 0.4469, 1.8385, 1.3577, 2.2958, 2.1344,
         1.3279, 2.0894, 1.8719, 1.2606, 2.3819, 1.4657, 1.5775, 2.1000, 1.7351,
         1.2304, 0.6315, 1.2131, 2.2732, 1.8445, 1.7833, 1.2896, 1.9840, 1.3908,
         2.4299, 1.8995, 2.3911, 0.8352, 1.7965, 1.9803, 1.9781, 2.4562, 2.2378,
         1.3083, 1.1804, 1.4861, 1.5230, 2.0371, 2.1190, 2.2801, 1.2843, 1.2626,
         1.3010, 2.0785, 1.0952, 2.0311, 1.5465, 0.7218, 2.1923, 1.4647, 2.4173,
         0.7257],
        [2.5621, 2.0696, 2.4994, 1.3108, 1.3814, 1.3434, 1.5317, 2.

RuntimeError: input must have 3 dimensions, got 4

In [None]:
test(1)

In [None]:
#torch.save(model,'/media/EXTHD/niciData/models/YamahaPianoComp2002_5Epochs_LSTM_noTW.model')

In [None]:
np.set_printoptions(precision=2, suppress=True, threshold=1)


In [None]:
playSeq = 0
pathToSampleSeq = "../DougMcKenzieFiles/train/Mad About the Boy.mid"
if(model.train()):
    model.eval()
if(autoencoderModel.train()):
    autoencoderModel.eval()

###PREDICT 8th SEQUENCE
with torch.no_grad():
    
    sampleNp1 = getSlicedPianorollMatrixNp(pathToSampleSeq)
    sampleNp1 = deleteZeroMatrices(sampleNp1)
    sample = np.expand_dims(sampleNp1[0,:,36:-32],axis=0)
    print(sample.shape)
    for i, sampleNp in enumerate(sampleNp1[playSeq:playSeq+6]):
        #print(sampleNp.shape)
        if(np.any(sampleNp)):
            sampleNp = sampleNp[:,36:-32]
            sampleNp = np.expand_dims(sampleNp,axis=0)
            sample = np.concatenate((sample,sampleNp),axis=0)
    samplePlay = sample[0,:,:]
    for s in sample[1:]:
        samplePlay = np.concatenate((samplePlay,s),axis=0)
    samplePlay = addCuttedOctaves(samplePlay)
    print(samplePlay.shape)
    sample = torch.from_numpy(sample).float().to(device)
    sample = torch.unsqueeze(sample,1)
    print(sample.size())
    embed = autoencoderModel.encoder(sample)
    #embed = model.splitEmbedding(embed)
    embed, lstmOut = model(embed)
    print(lstmOut.size())
    #pred = autoencoderModel.decoder(lstmOut[0,:,:])
    pred = autoencoderModel.decoder(lstmOut)
    prediction = pred.squeeze(0).squeeze(0).cpu().numpy()
    predict = np.squeeze(prediction, axis=1)
    prediction = predict[0,:,:]
    print(prediction.shape)
    for pred in predict[1:]:
        prediction = np.concatenate((prediction, pred), axis =0)
    
    print(prediction[:,:])
    #print(np.sum(sampleNp.numpy(), axis=1))
    
    #NORMALIZE PREDICTIONS
    #reconstruction /= np.abs(np.max(reconstruction))
    prediction /= np.abs(np.max(prediction))
    #print(prediction)

    #CHECK MIDI ACTIVATIONS IN PREDICTION TO INCLUDE RESTS
    #reconstruction[reconstruction < 0.3] = 0
    prediction[prediction < 0.8] = 0



    ###MONOPHONIC OUTPUT MATRIX POLOYPHONIC POSSIBLE WITH ACTIVATION THRESHOLD###
    #score = music21.converter.parse(
    #'WikifoniaServer/samples/The-Doors---Don\'t-you-love-her-Madly?.mid')
    #score.show()

    samplePlay = debinarizeMidi(samplePlay, prediction=False)
    samplePlay = addCuttedOctaves(samplePlay)
    #reconstruction = debinarizeMidi(reconstruction, prediction=True)
    #reconstruction = addCuttedOctaves(reconstruction)
    prediction = debinarizeMidi(prediction, prediction=True)
    prediction = addCuttedOctaves(prediction)
    print("INPUT")
    print(samplePlay.shape)
    pianorollMatrixToTempMidi(samplePlay, show=True,showPlayer=True,autoplay=False)
    #print("RECONSTRUCTION")
    #pianorollMatrixToTempMidi(reconstruction, show=True,
    #                            showPlayer=True,autoplay=True, prediction=True)
    print("PREDICTION")
    pianorollMatrixToTempMidi(prediction, prediction=True, 
                              show=True,showPlayer=True,autoplay=True)        
    print("\n\n")
            

print('')