In [1]:
##*************************Task 1*********************#
#
#   (Class "Critic" should be a subclass of the class CriticBase. You must use the exact class name.)
#   You should implement a multi-layer (2 or 3 layers) LSTM model in this class.
#   The Model (the score function) takes a sequence of envents as input and outputs a score judging
#   whether the piano music corresponding to the sequence is good music or bad music.
#   A function to generate random music is provided in the "midi2seq.py".
#   Use the function to create a collection of random piano plays as examples of bad music.
#   Use the piano plays in the downloaded data as example of good music.
#   (You don't need to use all the downloaded data. A sufficiently large subset will be enough.)
#   Train the model in this class using both the good and the bad examples.

In [2]:
from midi2seq import process_midi_seq, random_piano
import glob
good_music_midi = process_midi_seq(datadir='.', n=5000, maxlen=100, rd_seed=1000)

bad_music_midi = [random_piano(n=5000) for _ in range(good_music_midi.shape[1])]
bad_music_midi = process_midi_seq(all_midis=bad_music_midi, datadir='.', n=5000, maxlen=100)

In [3]:
good_music_midi.shape, bad_music_midi.shape

((5706, 101), (5567, 101))

In [15]:
import numpy as np
with open('good_music_midi.npy', 'wb') as f:
    np.save(f, good_music_midi)

with open('bad_music_midi.npy', 'wb') as f:
    np.save(f, bad_music_midi)

In [13]:
import torch
import torch.nn as nn

num_embeddings = 382
embedding_dim = 100
embedding = nn.Embedding(num_embeddings, embedding_dim)

good_music_midi = torch.tensor(good_music_midi, dtype=torch.long)
print(good_music_midi.shape)

embedded = embedding(good_music_midi)
print(embedded.shape)

torch.Size([5706, 101])
torch.Size([5706, 101, 100])


In [14]:
rnn_size = 64
n_labels = 2

rnn = nn.LSTM(input_size=embedding_dim, hidden_size=rnn_size, bidirectional=True, num_layers=3)
top_layer = nn.Linear(2*rnn_size, n_labels)

rnn_out, _ = rnn(embedded)
print(rnn_out.shape)

out = top_layer(rnn_out[:, -1, :])
print(out.shape)

torch.Size([5706, 101, 128])
torch.Size([5706, 2])


In [15]:
out

tensor([[ 0.0273,  0.0025],
        [ 0.0378,  0.0040],
        [ 0.0421,  0.0045],
        ...,
        [ 0.0430, -0.0043],
        [ 0.0462, -0.0053],
        [ 0.0474,  0.0016]], grad_fn=<AddmmBackward0>)

In [16]:
# Critic model
# n x max_len -> embedding -> n x nax_len x max_len - 1 -> LSTM (with hidden size=3)-> n x 2
class LSTMCritic(nn.Module):
    def __init__(self, num_embeddings=382, embedding_dim=100, hidden_dim=128, num_layers=3, n_classes=2):
        super(LSTMCritic, self).__init__()
        self.num_embeddings = num_embeddings # number of unique words in the vocabulary
        self.embedding_dim = embedding_dim #
        self.hidden_dim = hidden_dim # Hidden dimension
        self.num_layers = num_layers # Number of LSTM layers
        self.device = torch.device("cuda" if torch.cuda.is_available() else ("mps" if torch.backends.mps.is_available() else "cpu"))
        
        self.embedding = nn.Embedding(self.num_embeddings, self.embedding_dim).to(self.device)
        self.lstm = nn.LSTM(self.embedding_dim, self.hidden_dim, num_layers=self.num_layers, batch_first=True, dropout=0.2).to(self.device)
        self.fc = nn.Linear(self.hidden_dim, n_classes).to(self.device)
        

    def forward(self, x):
        # Embedding layer
        x = self.embedding(x).to(self.device)
        # LSTM forward pass
        batch_size = x.size(0)
        # Initialize hidden and cell states with zeros
        h0 = torch.zeros(self.num_layers, batch_size, self.hidden_dim).to(self.device)
        c0 = torch.zeros(self.num_layers, batch_size, self.hidden_dim).to(self.device)
        lstm_out, _ = self.lstm(x, (h0, c0))
        # Decode the hidden state of the last time step
        out = lstm_out[:, -1, :]
        # Linear layer
        out = self.fc(out)
        return out
    
model = LSTMCritic()
pred = model(good_music_midi.to(model.device))

RuntimeError: MPS backend out of memory (MPS allocated: 14.68 GB, other allocations: 6.58 GB, max allowed: 18.13 GB). Tried to allocate 44.75 KB on private pool. Use PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0 to disable upper limit for memory allocations (may cause system failure).

# Post - Tuesday Oct 10, 2023

In [18]:
# Importing libraries
import numpy as np
import os, sys, time, datetime, pickle, copy, random, glob, logging

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data.sampler import SubsetRandomSampler

from midi2seq import piano2seq, random_piano, process_midi_seq
from torch.utils.data import DataLoader, TensorDataset
from model_base import ComposerBase, CriticBase

from google_drive_downloader import GoogleDriveDownloader as gdd

In [25]:
## Training data
class MidiDataProcessor:

    def __init__(self, data_directory, maxlen=100, test_size=0.2, random_state=42, batch_size=32):
        self.data_directory = data_directory
        self.maxlen = maxlen
        self.test_size = test_size
        self.random_state = random_state
        self.batch_size = batch_size

    def __get__(self, idx):
        return self.all_data[idx], self.all_labels[idx]

    def prepare_data(self):
        device = torch.device("cuda" if torch.cuda.is_available() else ("mps" if torch.backends.mps.is_available() else "cpu"))
        all_midis = glob.glob(f'{self.data_directory}/maestro-v1.0.0/**/*.midi')

        good_music_midi = process_midi_seq(all_midis=all_midis, datadir=self.data_directory, n=10000, maxlen=self.maxlen)
        bad_music_midi = [random_piano(n=self.maxlen) for _ in range(len(all_midis))]
        bad_music_midi = process_midi_seq(all_midis=bad_music_midi, datadir=self.data_directory, n=10000, maxlen=self.maxlen)

        good_music = torch.tensor(good_music_midi, dtype=torch.float32)
        bad_music = torch.tensor(bad_music_midi, dtype=torch.float32)

        good_labels = torch.ones((len(good_music), 1))
        bad_labels = torch.zeros((len(bad_music), 1))

        self.all_data = torch.cat([good_music, bad_music], dim=0)
        self.all_labels = torch.cat([good_labels, bad_labels], dim=0)

        features_train, features_test, label_train, label_test = train_test_split(
                                                                                    self.all_data, self.all_labels,
                                                                                    test_size=self.test_size,
                                                                                    random_state=self.random_state,
                                                                                    shuffle=True)
        

        def convert_labels(labels):
            converted = torch.zeros(labels.size(0), 2)
            converted[labels.view(-1) == 1, 0] = 1
            converted[labels.view(-1) == 0, 1] = 1
            return converted
        
        # label_train = convert_labels(label_train)
        # label_test = convert_labels(label_test)

        features_train = torch.Tensor(features_train).to(device)
        features_test = torch.Tensor(features_test).to(device)

        label_train = torch.Tensor(label_train).to(device)
        label_test = torch.Tensor(label_test).to(device)

        train_dataset = TensorDataset(features_train, label_train)
        test_dataset = TensorDataset(features_test, label_test)

        self.train_loader = DataLoader(train_dataset, shuffle=True, batch_size=self.batch_size)
        self.test_loader = DataLoader(test_dataset, shuffle=True, batch_size=self.batch_size)

        return self.train_loader, self.test_loader

    def __repr__(self):
        return f'MidiDataProcessor(data_directory={self.data_directory!r}, maxlen={self.maxlen}, test_size={self.test_size}, random_state={self.random_state}, batch_size={self.batch_size}, train_loader size={len(self.train_loader.dataset)}, test_loader size={len(self.test_loader.dataset)})'


In [26]:
processor = MidiDataProcessor(data_directory='.')
train_loader, test_loader = processor.prepare_data()

In [27]:
processor

MidiDataProcessor(data_directory='.', maxlen=100, test_size=0.2, random_state=42, batch_size=32, train_loader size=16140, test_loader size=4035)

In [28]:
train_loader.dataset[0]

(tensor([256., 190., 285., 256., 366.,  38., 306., 256., 370., 118., 264., 246.,
         285., 256., 364.,   8., 257., 220., 266., 136., 282., 256., 371., 111.,
         280., 256., 217., 259., 256., 227., 258., 256., 166., 264., 364.,  67.,
         269., 256., 195., 281., 256., 369.,  60., 298., 256., 366., 100., 267.,
         367.,  26., 239., 264., 370.,  46., 312., 256., 367.,  56., 269., 256.,
         184., 268., 256., 154., 271., 256., 367.,  31., 281., 256., 174., 259.,
         256., 188., 281., 256., 159., 263., 361.,  54., 272., 256., 362.,  59.,
         280., 256., 182., 260., 256., 228., 257., 370.,  63., 262., 191., 266.,
         363.,  53., 257., 365.,   6.], device='mps:0'),
 tensor([0.], device='mps:0'))

In [30]:
def convert_labels(labels):
    converted = torch.zeros(labels.size(0), 2)
    converted[labels.view(-1) == 1, 0] = 1
    converted[labels.view(-1) == 0, 1] = 1
    return converted

In [32]:
for i, (data, label) in enumerate(train_loader):
    print(data.shape, convert_labels(label).shape)
    break

torch.Size([32, 101]) torch.Size([32, 2])


In [34]:
convert_labels(label[:10]), label[:10]

(tensor([[0., 1.],
         [0., 1.],
         [1., 0.],
         [0., 1.],
         [0., 1.],
         [1., 0.],
         [1., 0.],
         [0., 1.],
         [0., 1.],
         [0., 1.]]),
 tensor([[0.],
         [0.],
         [1.],
         [0.],
         [0.],
         [1.],
         [1.],
         [0.],
         [0.],
         [0.]], device='mps:0'))

In [13]:
train_loader.dataset[0][0].min()

tensor(17., device='mps:0')

In [55]:
# Critic model
# n x max_len -> embedding -> n x nax_len x max_len - 1 -> LSTM (with hidden size=3)-> n x 2
# n x max_len -> embedding -> n x nax_len x max_len - 1 -> LSTM (with hidden size=3)-> n x 2
class LSTMCritic(nn.Module):
    def __init__(self, num_embeddings=382, embedding_dim=100, hidden_dim=128, num_layers=3, n_classes=2):
        super(LSTMCritic, self).__init__()
        self.num_embeddings = num_embeddings # number of unique words in the vocabulary
        self.embedding_dim = embedding_dim #
        self.hidden_dim = hidden_dim # Hidden dimension
        self.num_layers = num_layers # Number of LSTM layers
        self.device = torch.device("cuda" if torch.cuda.is_available() else ("mps" if torch.backends.mps.is_available() else "cpu"))
        
        self.embedding = nn.Embedding(self.num_embeddings, self.embedding_dim).to(self.device)
        self.lstm = nn.LSTM(self.embedding_dim, self.hidden_dim, num_layers=self.num_layers, batch_first=True, dropout=0.2).to(self.device)
        self.fc = nn.Linear(self.hidden_dim, n_classes).to(self.device)
        

    def forward(self, x):
        # Embedding layer
        x = self.embedding(x).to(self.device)
        # LSTM forward pass
        batch_size = x.size(0)
        # Initialize hidden and cell states with zeros
        h0 = torch.zeros(self.num_layers, batch_size, self.hidden_dim).to(self.device)
        c0 = torch.zeros(self.num_layers, batch_size, self.hidden_dim).to(self.device)
        lstm_out, _ = self.lstm(x, (h0, c0))
        # Decode the hidden state of the last time step
        out = lstm_out[:, -1, :]
        # Linear layer
        out = self.fc(out)
        return out

In [63]:
class AccumulationMeter(object):
    """Computes and stores the average and current value"""

    def __init__(self):
        self.reset()

    def reset(self):
        self.value = 0.0
        self.avg = 0.0
        self.sum = 0
        self.count = 0.0

    def update(self, value, n=1):
        self.value = value
        self.sum += value * n
        self.count += n
        self.avg = self.sum / self.count
        self.sqrt = self.value ** 0.5
        self.rmse = self.avg ** 0.5

class EarlyStopping:
    def __init__(self, tolerance=5, min_delta=0):

        self.tolerance = tolerance
        self.min_delta = min_delta
        self.counter = 0
        self.early_stop = False

    def __call__(self, train_loss, validation_loss):
        if (validation_loss - train_loss) > self.min_delta:
            self.counter +=1
            if self.counter >= self.tolerance:  
                self.early_stop = True

In [91]:
# Critic class
class Critic(CriticBase):
    def __init__(self, load_trained=False):
        '''
        :param load_trained
            If load_trained is True, load a trained model from a file.
            Should include code to download the file from Google drive if necessary.
            else, construct the model
        '''    
    
        self.load_trained = load_trained
        self.device = torch.device("cuda" if torch.cuda.is_available() else ("mps" if torch.backends.mps.is_available() else "cpu"))
        self.model = LSTMCritic(vocab_size=500, hidden_dim=128, num_layers=3, n_classes=2).to(self.device)
        self.criterion = nn.CrossEntropyLoss()
        if self.load_trained:
            logging.info('load model from file ...')
            gdd.download_file_from_google_drive(file_id='18YkTrsqa0dWCVC4PpE2_7q8nN3jxdzhD',
                                    dest_path='./critic.pth',
                                    unzip=True)
            self.model = torch.load('critic.pth')
            self.model.eval()

    def score(self, x):
        '''
        Compute the score of a music sequence
        :param x: a music sequence
        :return: the score between 0 and 1 that reflects the quality of the music: the closer to 1, the better
        '''
        with torch.set_grad_enabled(False):
            logging.info('Compute score ...')
        
            outputs = self.model(x.to(self.device))
            outputs = torch.argmax(outputs, dim=1)
            outputs ^= 1 # index 0 is good and index 1 is bad 
                    
        return outputs  
    
    def validate(self, val_loader, model):
        """Evaluate the network on the entire validation set."""

        loss_accum = AccumulationMeter()
        model.eval()
        with torch.set_grad_enabled(False):

            for i, (feature, label) in enumerate(val_loader):
                feature, label = feature.to(self.device).long(), label.to(self.device)

                outputs = self.model(feature)

                loss = self.criterion(outputs, label)
                loss_accum.update(loss.item(), label.size(0))
 

        return loss_accum.rmse

    def train(self, x, epochs=10, lr=1e-5):
        '''
        Train the model on one batch of data
        :param x: train data. For critic training, x will be a tuple of two tensors (data, label). expect a batch of dataloader
        :return: (mean) loss of the model on the batch
        '''
            
        optimizer = optim.Adam(self.model.parameters(), lr=lr)
        loss_accum_train = AccumulationMeter()

        # split data for K-fold cross validation to avoid overfitting
        indices = list(range(len(x.dataset)))
        kf = KFold(n_splits=5, shuffle=True)
        cv_index = 0
        index_list_train = []
        index_list_valid = []
        for train_indices, valid_indices in kf.split(indices):
            index_list_train.append(train_indices)
            index_list_valid.append(valid_indices)

            train_sampler = SubsetRandomSampler(train_indices)
            valid_sampler = SubsetRandomSampler(valid_indices)

            train_loader = DataLoader(x.dataset, batch_size=32,
                                                       sampler=train_sampler,
                                                       shuffle=False)
            val_loader = DataLoader(x.dataset, batch_size=32,
                                                     sampler=valid_sampler,
                                                     shuffle=False)

            logging.info('Start training ...')
            self.model.train()
            early_stopping = EarlyStopping(tolerance=5, min_delta=10)
            epoch_train_loss = []
            epoch_validate_loss = []
            for epoch in range(epochs):
                for idx, (feature, label) in enumerate(train_loader):
                    feature, label = feature.to(self.device).long(), label.to(self.device)

                    outputs = self.model(feature)
                    loss = self.criterion(outputs, label)
                    #total_loss += loss.item()
                    loss_accum_train.update(loss.item(), label.size(0))
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                # training loss
                epoch_train_loss.append(loss_accum_train.avg)
                
                # validation loss
                val_loss_avg = self.validate(val_loader, self.model)
                epoch_validate_loss.append(val_loss_avg)

                print(f"Epoch {epoch+1}/{epochs}, Train Loss: {loss_accum_train.avg}, Val Loss: {val_loss_avg}")

                # early stopping
                early_stopping(loss_accum_train.avg, val_loss_avg)
                if early_stopping.early_stop:
                    print("We are at epoch:", epoch)
                    logging.info("Finished training ...Model saved")
                    torch.save(self.model, 'critic.pth') 
                    break
        
            cv_index += 1   # increment cv index
        
        return loss_accum_train.avg

In [92]:
rt = Critic(load_trained=False)
rt.train(train_loader, epochs=100)

INFO:Start training ...


Epoch 1/100, Train Loss: 0.6917002614877992, Val Loss: 0.8294968625707562
Epoch 2/100, Train Loss: 0.6691033990720859, Val Loss: 0.6616604525014153
Epoch 3/100, Train Loss: 0.4930773457685144, Val Loss: 0.26094386346398285
Epoch 4/100, Train Loss: 0.3847082929710066, Val Loss: 0.23794809452955532
Epoch 5/100, Train Loss: 0.31714117089455124, Val Loss: 0.22171827681297912
Epoch 6/100, Train Loss: 0.2714204840186782, Val Loss: 0.21149176010041
Epoch 7/100, Train Loss: 0.23789563396844443, Val Loss: 0.20193328822853523
Epoch 8/100, Train Loss: 0.212300402979822, Val Loss: 0.19181441002380759
Epoch 9/100, Train Loss: 0.19182193755770774, Val Loss: 0.18392870302532874
Epoch 10/100, Train Loss: 0.17528449735721488, Val Loss: 0.1728595726870176
Epoch 11/100, Train Loss: 0.16144992917491977, Val Loss: 0.16551293523815055
Epoch 12/100, Train Loss: 0.14978490334799432, Val Loss: 0.17387458425859195
Epoch 13/100, Train Loss: 0.1397782665165306, Val Loss: 0.1522921362799879
Epoch 14/100, Train Los

KeyboardInterrupt: 

In [84]:
len(train_loader.dataset)

12907