In [1]:
# import numpy as np
import glob
import pypianoroll as ppr
import time
import music21
import os
import torch
import torch.utils.data
from torch import nn, optim
from torch.nn import functional as F
from utils.utilsPreprocessing import *
#np.set_printoptions(threshold=np.inf)
#torch.set_printoptions(threshold=50000)

In [2]:
##################################
#HYPERPARAMS
##################################
epochs = 1
learning_rate = 1e-3
batch_size= 98
log_interval = 1  #Log/show loss per batch

# Load MIDI files from npz

In [3]:
data = np.load('/Volumes/EXT/DATASETS/YamahaPianoCompetition2002NoTranspose.npz')

midiDatasetTrain = data['train']
midiDatasetTest = data['test']

data.close()

"""
print("Training set: ({}, {}, {}, {})".format(midiDatasetTrain.size()[0],
                                                midiDatasetTrain.size()[1],
                                                midiDatasetTrain.size()[2],
                                                midiDatasetTrain.size()[3]))
print("Test set: ({}, {}, {}, {})".format(midiDatasetTest.size()[0],
                                                midiDatasetTest.size()[1],
                                                midiDatasetTest.size()[2],
                                                midiDatasetTest.size()[3]))
"""

print("Training set: {}".format(midiDatasetTrain.shape))
print("Test set: {}".format(midiDatasetTest.shape))

Training set: (39782, 1, 96, 60)
Test set: (9691, 1, 96, 60)


In [4]:
fullPitch = 128
_, _, length, reducedPitch = midiDatasetTrain.shape

In [5]:
#MODEL FOR TRANSFER LEARNING
from utils.CDVAE import CDVAE
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

weightModel = CDVAE().to(device)

print(weightModel.encode1[0].weight.size())

torch.Size([100, 1, 16, 5])


# CDVAE

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [7]:
midiDatasetTrain = torch.from_numpy(midiDatasetTrain)
trainLoader = torch.utils.data.DataLoader(midiDatasetTrain, batch_size=batch_size, shuffle=False, drop_last=True)

midiDatasetTest = torch.from_numpy(midiDatasetTest)
testLoader = torch.utils.data.DataLoader(midiDatasetTest, batch_size=batch_size, shuffle=False, drop_last=True)

In [8]:
class CDVAE(nn.Module):
    def __init__(self, batch_size=7, tie_weights=True):
        super(CDVAE, self).__init__()
        
        self.batch_size = batch_size
        
        ###LSTM###
        self.lstm = nn.LSTM(input_size=100, hidden_size=400,
                            num_layers=3, batch_first=True, dropout=0.3)
        self.fc = nn.Linear(400,100)
        self.eluFC = nn.ELU()
    
    def encoder(self, hEnc):
        #print("ENOCDER")
        i=0
        for j in range(4):
            hEnc = F.conv2d(hEnc,
                            weight=weightModel.encode1[i].weight,
                            bias=weightModel.encode1[i].bias, 
                            stride=weightModel.encode1[i].stride,
                            padding=0)
            i+=1
            hEnc = F.batch_norm(hEnc, 
                                running_mean=weightModel.encode1[i].running_mean, 
                                running_var=weightModel.encode1[i].running_var, 
                                weight=weightModel.encode1[i].weight,
                                bias=weightModel.encode1[i].bias,
                                training=self.training)
            hEnc = F.elu(hEnc)            
            i+=2
        hEnc = torch.squeeze(hEnc,3).view(-1,800*3)
        i=0
        for j in range(3):
            hEnc = F.linear(hEnc,weightModel.encode2[i].weight,weightModel.encode2[i].bias)
            i+=1
            hEnc = F.batch_norm(hEnc, 
                                running_mean=weightModel.encode2[i].running_mean, 
                                running_var=weightModel.encode2[i].running_var, 
                                weight=weightModel.encode2[i].weight,
                                bias=weightModel.encode2[i].bias,
                                training=self.training)
            hEnc = F.elu(hEnc)        
            i+=2
        return hEnc

    def decoder(self, hDec):
        #print("DECODER")
        i=0
        for j in range(3):
            hDec = F.linear(hDec, weightModel.decode1[i].weight, weightModel.decode1[i].bias)
            i+=1
            hDec = F.batch_norm(hDec, 
                                running_mean=weightModel.decode1[i].running_mean, 
                                running_var=weightModel.decode1[i].running_var, 
                                weight=weightModel.decode1[i].weight,
                                bias=weightModel.decode1[i].bias,
                                training=self.training)        
            hDec = F.elu(hDec)
            i+=2
        hDec = hDec.view(hDec.size()[0],800,-1).unsqueeze(2)
        i=0
        for j in range(4):
            hDec = F.conv_transpose2d(hDec, 
                                      weight=weightModel.decode2[i].weight, 
                                      bias=weightModel.decode2[i].bias, 
                                      stride=weightModel.decode2[i].stride, 
                                      padding=0)
            i+=1
            hDec = F.batch_norm(hDec, 
                                running_mean=weightModel.decode2[i].running_mean, 
                                running_var=weightModel.decode2[i].running_var, 
                                weight=weightModel.decode2[i].weight,
                                bias=weightModel.decode2[i].bias,
                                training=self.training) 
            hDec = F.elu(hDec)
            i+=2
        return hDec


    def forward(self, x):
        embed = self.encoder(x)
        
        ####MOVE TO HIDDEN_INIT
        h_t = torch.zeros(3,int(embed.size()[0]/7),400).to(device)
        c_t = torch.zeros(3,int(embed.size()[0]/7),400).to(device)        
        ###HIDDEN INIT END
        
        #IF FOR TESTING UNKNOWN SEQUENCES
        if(embed.size()[0]>7):
            embedTemp = torch.chunk(embed, int(self.batch_size/7),dim=0)
            embed7s = embedTemp[0].unsqueeze(0)
            for emb in embedTemp[1:]:
                #print("inloop");print(emb.unsqueeze(1).size())
                embed7s = torch.cat((embed7s, emb.unsqueeze(0)),dim=0)
                #print("afterconcat");print(embed7s.size())
        else:
            embed7s = embed.unsqueeze(0)
            
        lstmOut, (h_t, c_t) = self.lstm(embed7s,(h_t, c_t))
        lstmOut = self.fc(lstmOut)
        lstmOut = self.eluFC(lstmOut)
        #print(lstmOut.size())
        
        recon_lstm = lstmOut[0,:,:]
        #print(recon_lstm.size())
        for output in lstmOut[1:]:
            recon_lstm = torch.cat((recon_lstm,output),dim=0)
        #print(recon_lstm.size())
        return embed, lstmOut, self.decoder(recon_lstm)

    

model = CDVAE(batch_size=batch_size).to(device)

#optimizer = optim.Adam(model.parameters(), lr=learning_rate)
optimizer = optim.RMSprop(model.parameters(),lr=learning_rate, weight_decay=1e-1)

def loss_function(x, recon_lstm, embed, lstmOut):
    
    cosLSTM = nn.CosineSimilarity(dim=0, eps=1e-8) 
    batch_sizeMin1 = x.size()[0]-1
    
    #BATCHSIZE 7
    ###WRONG SINCE LOSS CHANGED TO COMPARE EVERY
    ###PREDICTED SEQUENCE WITH THE NEXT
    if(embed.size()[0]==7):
        #print("batchsize = 7 ?")
        cosSimLSTM = cosLSTM(lstmOut.squeeze(1)[-1], embedNext[0])
     
    #BATCHSIZE > 7
    else:
        cosSimLSTM = 0
        k=1
        for batchOut in lstmOut:
            for out in batchOut:
                #print(k)
                cosSimLSTM += cosLSTM(out,embed[k])
                k+=1
                if(k==embed.size()[0]-1):
                    break
        ###TAKE 1st sequence of next batch and compare it to last LSTM output           
        #cosSimLSTM += cosLSTM(lstmOut[-1,-1,:],embedNext[0])
        cosSimLSTM = batch_sizeMin1-cosSimLSTM
        
        ###RECONSTRUCTION LOS ON PREDICTIONS
        #cos = nn.CosineSimilarity(dim=1, eps=1e-8) 
        #cosSim = torch.sum(cos(x[1:].view(batch_sizeMin1,-1),
        #                       recon_lstm[:-1].view(batch_sizeMin1,-1)))
        #cosSim = batch_sizeMin1-cosSim
        ###RECONSTRUCTION LOSS END
        
        totalLoss = cosSimLSTM# + cosSim
        
    return totalLoss
        

def train(epoch):
    model.train()
    trainLoss = 0

    for batch_idx, data in enumerate(trainLoader):
        #print(batch_idx)
        data = data.float().to(device)
        optimizer.zero_grad()
        embedding, lstmOut, reconPrediction = model(data)
        #nextBatch = next(iter(trainLoader)).float().to(device)
        #with torch.no_grad():
        #    embeddingNext, _, _ = model(nextBatch)
        #print(nextBatch.size())
        loss = loss_function(data, reconPrediction, embedding, lstmOut)
        loss.backward()
        trainLoss += loss.item()
        optimizer.step()
        if(batch_idx % log_interval == 0):
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(trainLoader.dataset),
                100. * batch_idx / len(trainLoader),
                loss.item() / len(data)))
        #if(batch_idx==1):
        #   break
    print('====> Epoch: {} Average Loss: {:.4f}'.format(
          epoch, trainLoss / (len(trainLoader.dataset)-batch_idx)))

def test(epoch):
    model.eval()
    testLoss = 0
    with torch.no_grad():
        for i, data in enumerate(testLoader):
            data = data.float().to(device)
            embedding, lstmOut, reconPrediction = model(data)
            #nextBatch = next(iter(testLoader)).float().to(device)
            #embeddingNext, _, _= model(nextBatch)
            testLoss += loss_function(data, reconPrediction, embedding, lstmOut).item()
            
            #if(i==1):
            #    break
    testLoss /= (len(testLoader.dataset)-i)

    print('====> Test set Loss: {:.4f}'.format(testLoss))

In [9]:
"""
#LOAD MODEL
pathToModel = '../models/WikifoniaNoTranpose_10Epochs_LSTM_noTW_dropout50.model'

try:
    #LOAD TRAINED MODEL INTO GPU
    if(torch.cuda.is_available()):
        model = torch.load(pathToModel)
        
    #LOAD MODEL TRAINED ON GPU INTO CPU
    else:
        model = torch.load(pathToModel, map_location=lambda storage, loc: storage)
    print("\n--------model restored--------\n")
except:
    print("\n--------no saved model found--------\n")
"""
print('')




In [10]:
for epoch in range(1, epochs + 1):
    train(epoch)
    test(epoch)





====> Epoch: 1 Average Loss: 0.8492
====> Test set Loss: 0.8486


In [None]:
#torch.save(model,'/media/EXTHD/niciData/models/YamahaPianoComp2002_5Epochs_LSTM_noTW.model')

In [None]:
#np.set_printoptions(precision=2, suppress=True, threshold=np.inf)


In [None]:
playSeq = 0
pathToSampleSeq = "/Volumes/EXT/DATASETS/WikifoniaServer/test/Charlie-Tobias,-Nat-Simon---No-Can-Do.mid"
if(model.train()):
    model.eval()

###PREDICT 8th SEQUENCE
with torch.no_grad():
    
    sampleNp1 = getSlicedPianorollMatrixNp(pathToSampleSeq)
    sampleNp1 = deleteZeroMatrices(sampleNp1)
    sample = np.expand_dims(sampleNp1[0,:,36:-32],axis=0)
    print(sample.shape)
    for i, sampleNp in enumerate(sampleNp1[playSeq:playSeq+6]):
        print(sampleNp.shape)
        if(np.any(sampleNp)):
            sampleNp = sampleNp[:,36:-32]
            sampleNp = np.expand_dims(sampleNp,axis=0)
            sample = np.concatenate((sample,sampleNp),axis=0)
    samplePlay = sample[0,:,:]
    for s in sample[1:]:
        samplePlay = np.concatenate((samplePlay,s),axis=0)
    samplePlay = addCuttedOctaves(samplePlay)
    print(samplePlay.shape)
    sample = torch.from_numpy(sample).float().to(device)
    sample = torch.unsqueeze(sample,1)
    print(sample.size())
    _,_, pred = model(sample)
    #reconstruction = recon.squeeze(0).squeeze(0).cpu().numpy()
    prediction = pred.squeeze(0).squeeze(0).cpu().numpy()

    #print(sampleNp[:,:])
    #print(prediction[:,:])
    #print(np.sum(sampleNp.numpy(), axis=1))

    #NORMALIZE PREDICTIONS
    #reconstruction /= np.abs(np.max(reconstruction))
    prediction /= np.abs(np.max(prediction))
    #print(prediction)

    #CHECK MIDI ACTIVATIONS IN PREDICTION TO INCLUDE RESTS
    #reconstruction[reconstruction < 0.3] = 0
    prediction[prediction < 0.65] = 0



    ###MONOPHONIC OUTPUT MATRIX POLOYPHONIC POSSIBLE WITH ACTIVATION THRESHOLD###
    #score = music21.converter.parse('WikifoniaServer/samples/The-Doors---Don\'t-you-love-her-Madly?.mid')
    #score.show()

    samplePlay = debinarizeMidi(samplePlay, prediction=False)
    samplePlay = addCuttedOctaves(samplePlay)
    #reconstruction = debinarizeMidi(reconstruction, prediction=True)
    #reconstruction = addCuttedOctaves(reconstruction)
    prediction = debinarizeMidi(prediction, prediction=True)
    prediction = addCuttedOctaves(prediction)

    #print(np.argmax(samplePlay, axis=1))
    #print('')
    #print(np.argmax(prediction, axis=1))
    print("INPUT")
    print(samplePlay.shape)
    pianorollMatrixToTempMidi(samplePlay, show=True,showPlayer=True,autoplay=False)
    #print("RECONSTRUCTION")
    #pianorollMatrixToTempMidi(reconstruction, show=True,
    #                            showPlayer=True,autoplay=True, prediction=True)
    print("PREDICTION")
    pianorollMatrixToTempMidi(prediction, prediction=False, 
                              show=True,showPlayer=True,autoplay=True)        
    print("\n\n")
            

print('')