In [19]:
from hw1 import Composer
from midi2seq import process_midi_seq, seq2piano, random_piano, piano2seq, segment
import torch
from torch.utils.data import DataLoader, TensorDataset, Dataset 
import torch.nn as nn
import numpy as np
import random
from sklearn.preprocessing import MinMaxScaler
from sklearn.compose import ColumnTransformer
import os

In [2]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'mps:0' if torch.backends.mps.is_available() else 'cpu')
print('Using device:', device)

Using device: mps:0


In [3]:
sequence = process_midi_seq(maxlen=50, n=15000, shuffle_seed=3) #fixed shuffle_seed for debugging purpose and get fixed labels
print(sequence.shape)

notes = np.unique(sequence)
print(f'number of unique notes are {len(notes)} notes')

scaler = MinMaxScaler(feature_range=(0,1))

# Fitting scaler with the complete space and transforming the whole dataset on the scaler
normalized_sequence = scaler.fit_transform(sequence.reshape((-1,1))).reshape(sequence.shape)
print(f'max feature is {scaler.data_max_}')
print(f'min feature is {scaler.data_min_}')

normalized_notes = np.unique(normalized_sequence)
print(f'number of unique notes after normalization are {len(normalized_notes)}')

(15734, 51)
number of unique notes are 302 notes
max feature is [381.]
min feature is [21.]
number of unique notes after normalization are 302


In [4]:
X_train = normalized_sequence[:,:-1]
X_train = X_train.reshape((-1,X_train.shape[1],1))

Y_train = sequence[:,-1]
Y_train = Y_train.reshape((-1,1))

X_train = torch.tensor(X_train).float()
Y_train = torch.tensor(Y_train).float()

X_train.shape, Y_train.shape

(torch.Size([15734, 50, 1]), torch.Size([15734, 1]))

In [5]:
class MidiComposerDataset(Dataset):
    def __init__(self,labels, x_sequence, y_next):
        self.x_sequence = x_sequence
        self.y_next = y_next
        self.labels = labels

    def __len__(self):
        return len(self.y_next)

    def one_hot_encode(self, note):
        return torch.tensor(note == self.labels).float()
        
    def __getitem__(self, idx):
        action = self.y_next[idx][0].item()
        encode_action = self.one_hot_encode(action)
        return dict(
            sequence = self.x_sequence[idx],
            action = encode_action
        )

In [6]:
train_dataset = MidiComposerDataset(notes, X_train, Y_train)

In [7]:
BATCH_SIZE = 100

train_loader = DataLoader(train_dataset,batch_size = BATCH_SIZE, shuffle=True)

In [8]:
for _, batch in enumerate(train_loader):
    sequence_batch , action_batch = batch['sequence'].to(device) , batch['action'].to(device) 
    print(sequence_batch.shape, action_batch.shape)
    break

torch.Size([100, 50, 1]) torch.Size([100, 302])


In [9]:
class ComposerModel(nn.Module):
    def __init__(self, n_classes, n_input=1, n_hidden=256, n_layers=2):
        super().__init__()
        self.num_stacked_layers = n_layers
        self.hidden_size = n_hidden
        
        self.lstm = nn.LSTM(input_size=n_input, hidden_size=n_hidden, num_layers=n_layers, batch_first=True, dropout=0.2)
        self.dropout = nn.Dropout(0.2)
        # Output layer
        self.linear = nn.Linear(n_hidden, n_classes)

    def forward(self, x):
        batch_size = x.size(0)

        h0 = torch.zeros(self.num_stacked_layers, batch_size, self.hidden_size).to(device)
        c0 = torch.zeros(self.num_stacked_layers, batch_size, self.hidden_size).to(device)
        
        lstm_out, _ = self.lstm(x, (h0, c0))
        # take only the last output
        out = lstm_out[:, -1, :]
        # produce output
        out = self.linear(self.dropout(out))
        return out

In [10]:
classes = len(notes)
model = ComposerModel(classes,1,256, 2)
model.to(device)

ComposerModel(
  (lstm): LSTM(1, 256, num_layers=2, batch_first=True, dropout=0.2)
  (dropout): Dropout(p=0.2, inplace=False)
  (linear): Linear(in_features=256, out_features=302, bias=True)
)

In [11]:
learning_rate = 0.0001
loss_function = nn.CrossEntropyLoss(reduction="sum")
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [12]:
def train_one_epoch():
    model.train(True)
    print(f'Epoch: {epoch + 1}')
    running_loss = 0.0
    
    for batch_index, batch in enumerate(train_loader):
        sequence_batch , action_batch = batch['sequence'].to(device) , batch['action'].to(device)
        
        output = model(sequence_batch)
        loss = loss_function(output, action_batch)
        running_loss += loss.item()
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch_index % 100 == 99:  # print every 100 batches
            avg_loss_across_batches = running_loss / 100
            print('Batch {0}, Loss: {1:.3f}'.format(batch_index+1,
                                                    avg_loss_across_batches))
            running_loss = 0.0
    print()

In [13]:
num_epochs = 2000
for epoch in range(num_epochs):
    train_one_epoch()
torch.save(model, "composer.pth")

Epoch: 1
Batch 100, Loss: 496.899

Epoch: 2
Batch 100, Loss: 441.254

Epoch: 3
Batch 100, Loss: 441.427

Epoch: 4
Batch 100, Loss: 441.138

Epoch: 5
Batch 100, Loss: 441.203

Epoch: 6
Batch 100, Loss: 439.871

Epoch: 7
Batch 100, Loss: 439.950

Epoch: 8
Batch 100, Loss: 439.925

Epoch: 9
Batch 100, Loss: 439.993

Epoch: 10
Batch 100, Loss: 439.406

Epoch: 11
Batch 100, Loss: 439.676

Epoch: 12
Batch 100, Loss: 436.365

Epoch: 13
Batch 100, Loss: 435.949

Epoch: 14
Batch 100, Loss: 433.751

Epoch: 15
Batch 100, Loss: 429.694

Epoch: 16
Batch 100, Loss: 424.822

Epoch: 17
Batch 100, Loss: 418.911

Epoch: 18
Batch 100, Loss: 412.427

Epoch: 19
Batch 100, Loss: 409.331

Epoch: 20
Batch 100, Loss: 402.700

Epoch: 21
Batch 100, Loss: 398.261

Epoch: 22
Batch 100, Loss: 396.260

Epoch: 23
Batch 100, Loss: 391.730

Epoch: 24
Batch 100, Loss: 387.566

Epoch: 25
Batch 100, Loss: 385.296

Epoch: 26
Batch 100, Loss: 383.439

Epoch: 27
Batch 100, Loss: 383.461

Epoch: 28
Batch 100, Loss: 381.247

E

In [14]:
state = {'epoch': num_epochs + 1, 'state_dict': model.state_dict(),
             'optimizer': optimizer.state_dict(), 'losslogger': None}
torch.save(state, "composer_checkpoint.pth.tar")

In [17]:
def load_checkpoint(model, optimizer, losslogger=None, filename='composer_checkpoint.pth.tar'):
    # Note: Input model & optimizer should be pre-defined.  This routine only updates their states.
    start_epoch = 0
    if os.path.isfile(filename):
        print("=> loading checkpoint '{}'".format(filename))
        checkpoint = torch.load(filename)
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        losslogger = checkpoint['losslogger']
        print("=> loaded checkpoint '{}' (epoch {})"
                  .format(filename, checkpoint['epoch']))
    else:
        print("=> no checkpoint found at '{}'".format(filename))

    return model, optimizer, start_epoch, losslogger

In [21]:
model, optimizer, start_epoch, losslogger = load_checkpoint(model, optimizer)
model = model.to(device)
# now individually transfer the optimizer parts...
for state in optimizer.state.values():
    for k, v in state.items():
        if isinstance(v, torch.Tensor):
            state[k] = v.to(device)

=> loading checkpoint 'composer_checkpoint.pth.tar'
=> loaded checkpoint 'composer_checkpoint.pth.tar' (epoch 2001)


In [178]:
with torch.no_grad():
    rint = random.randint(0,sequence.shape[0]-1)
    prompt_sequence = train_dataset.__getitem__(rint)['sequence']

    prompt_sequence = prompt_sequence.reshape((-1,prompt_sequence.shape[0],1))

    for i in range(1000):
        output = model(prompt_sequence.to(device))
        predicted_index = int(torch.argmax(output, dim=1))
        predicted_note = training_notes[predicted_index]
        print(f'predicted in sequence note is {predicted_note}')

        # append the new note into the prompt for the next iteration
        print(prompt_sequence)
        print(output[0][predicted_index])
        #z = torch.zeros((10,1))
        #torch.cat((your_tensor,z),1)
        
        prompt_sequence.append(predicted_note)
        # new prompt sequence of length 50
        prompt_sequence = prompt_sequence[1:]
        print(len(prompt_sequence))

predicted in sequence note is 184
tensor([[[0.9747],
         [0.1681],
         [0.6629],
         [0.6555],
         [0.4607],
         [0.4719],
         [0.6657],
         [0.6544],
         [0.9720],
         [0.1127],
         [0.6667],
         [0.6555],
         [0.4790],
         [0.6639],
         [0.6527],
         [0.5281],
         [0.6535],
         [0.9690],
         [0.1096],
         [0.9663],
         [0.1718],
         [0.9635],
         [0.1011],
         [0.6629],
         [0.6554],
         [0.4607],
         [0.4706],
         [0.6657],
         [0.6555],
         [0.9719],
         [0.1208],
         [0.6572],
         [0.9745],
         [0.1681],
         [0.6507],
         [0.5282],
         [0.6583],
         [0.6555],
         [0.4790],
         [0.6639],
         [0.6573],
         [0.9690],
         [0.0986],
         [0.9691],
         [0.1573],
         [0.9662],
         [0.1124],
         [0.6685],
         [0.6545],
         [0.5226]]])
tensor(0.1069)

AttributeError: 'Tensor' object has no attribute 'append'