In [6]:
import sys
sys.path.insert(0, '../')


In [7]:
import os
import numpy as np
from bert.load_midi_bert import load_model
from finetune.finetune_dataset import FinetuneDataset
from torch.utils.data import DataLoader
from finetune.finetune_trainer import FinetuneTrainer


In [8]:
dict_path = '../bert/CP.pkl'
ckpt_path = '../bert/pretrain_model.ckpt'


In [9]:
def load_data(dataset):
    data_root = '../data/'

    if dataset not in ['pop909', 'pianist8', 'emopia']:
        print(f'Dataset {dataset} not supported')
        exit(1)

    X_train = np.load(os.path.join(data_root, f'{dataset}/{dataset}_train.npy'), allow_pickle=True)
    X_val = np.load(os.path.join(data_root, f'{dataset}/{dataset}_valid.npy'), allow_pickle=True)
    X_test = np.load(os.path.join(data_root, f'{dataset}/{dataset}_test.npy'), allow_pickle=True)

    print('X_train: {}, X_valid: {}, X_test: {}'.format(X_train.shape, X_val.shape, X_test.shape))

    if dataset == 'pop909':
        y_train = np.load(os.path.join(data_root, f'{dataset}/{dataset}_train_ans.npy'), allow_pickle=True)
        y_val = np.load(os.path.join(data_root, f'{dataset}/{dataset}_valid_ans.npy'), allow_pickle=True)
        y_test = np.load(os.path.join(data_root, f'{dataset}/{dataset}_test_ans.npy'), allow_pickle=True)
    else:
        y_train = np.load(os.path.join(data_root, f'{dataset}/{dataset}_train_ans.npy'), allow_pickle=True)
        y_val = np.load(os.path.join(data_root, f'{dataset}/{dataset}_valid_ans.npy'), allow_pickle=True)
        y_test = np.load(os.path.join(data_root, f'{dataset}/{dataset}_test_ans.npy'), allow_pickle=True)

    print('y_train: {}, y_valid: {}, y_test: {}'.format(y_train.shape, y_val.shape, y_test.shape))

    return X_train, X_val, X_test, y_train, y_val, y_test


In [10]:
batch_size = 12
num_workers = 5
index_layer = -1
lr = 2e-5
class_num = 3
hs = 768
cpu = False
cuda_devices = [0,1,2,3]
seq_class = True # composer, emotion, False = velocity, melody
task = 'composer'
epochs = 10


In [12]:
midibert = load_model(dict_path, ckpt_path)

X_train, X_val, X_test, y_train, y_val, y_test = load_data('pianist8')
trainset = FinetuneDataset(X=X_train, y=y_train)
validset = FinetuneDataset(X=X_val, y=y_val)
testset = FinetuneDataset(X=X_test, y=y_test)
train_loader = DataLoader(trainset, batch_size=batch_size, num_workers=num_workers, shuffle=True)
print("len of train_loader",len(train_loader))
valid_loader = DataLoader(validset, batch_size=batch_size, num_workers=num_workers)
print("len of valid_loader",len(valid_loader))
test_loader = DataLoader(testset, batch_size=batch_size, num_workers=num_workers)
print("len of valid_loader",len(test_loader))

print("\nCreating Finetune Trainer using index layer", index_layer)
trainer = FinetuneTrainer(midibert, train_loader, valid_loader, test_loader, index_layer, lr, class_num,
                        hs, y_test.shape, cpu, cuda_devices, None, seq_class)


Loading Dictionary
X_train: (1186, 512, 4), X_valid: (156, 512, 4), X_test: (126, 512, 4)
y_train: (1186,), y_valid: (156,), y_test: (126,)
len of train_loader 99
len of valid_loader 13
len of valid_loader 11

Creating Finetune Trainer using index layer -1
   device: cuda
init a fine-tune model, sequence-level task? True




In [13]:
print("\nTraining Start")
save_dir = os.path.join('result/finetune/', task)
os.makedirs(save_dir, exist_ok=True)
filename = os.path.join(save_dir, f'{task} model.ckpt')
print("   save model at {}".format(filename))

best_acc, best_epoch = 0, 0
bad_cnt = 0

#    train_accs, valid_accs = [], []
with open(os.path.join(save_dir, 'log'), 'a') as outfile:
    #outfile.write("Loading pre-trained model from " + best_mdl.split('/')[-1] + '\n')
    for epoch in range(epochs):
        train_loss, train_acc = trainer.train()
        valid_loss, valid_acc = trainer.valid()
        test_loss, test_acc, _ = trainer.test()

        is_best = valid_acc >= best_acc
        best_acc = max(valid_acc, best_acc)

        if is_best:
            bad_cnt, best_epoch = 0, epoch
        else:
            bad_cnt += 1

        print('epoch: {}/{} | Train Loss: {} | Train acc: {} | Valid Loss: {} | Valid acc: {} | Test loss: {} | Test acc: {}'.format(
            epoch+1, epochs, train_loss, train_acc, valid_loss, valid_acc, test_loss, test_acc))

#            train_accs.append(train_acc)
#            valid_accs.append(valid_acc)
        trainer.save_checkpoint(epoch, train_acc, valid_acc,
                                valid_loss, train_loss, is_best, filename)


        outfile.write('Epoch {}: train_loss={}, valid_loss={}, test_loss={}, train_acc={}, valid_acc={}, test_acc={}\n'.format(
            epoch+1, train_loss, valid_loss, test_loss, train_acc, valid_acc, test_acc))

        if bad_cnt > 3:
            print('valid acc not improving for 3 epochs')
            break



Training Start
   save model at result/finetune/composer/composer model.ckpt


  0%|          | 0/99 [00:00<?, ?it/s]../aten/src/ATen/native/cuda/Loss.cu:186: nll_loss_forward_no_reduce_cuda_kernel: block: [0,0,0], thread: [0,0,0] Assertion `cur_target >= 0 && cur_target < n_classes` failed.
../aten/src/ATen/native/cuda/Loss.cu:186: nll_loss_forward_no_reduce_cuda_kernel: block: [0,0,0], thread: [2,0,0] Assertion `cur_target >= 0 && cur_target < n_classes` failed.
../aten/src/ATen/native/cuda/Loss.cu:186: nll_loss_forward_no_reduce_cuda_kernel: block: [0,0,0], thread: [3,0,0] Assertion `cur_target >= 0 && cur_target < n_classes` failed.
../aten/src/ATen/native/cuda/Loss.cu:186: nll_loss_forward_no_reduce_cuda_kernel: block: [0,0,0], thread: [4,0,0] Assertion `cur_target >= 0 && cur_target < n_classes` failed.
../aten/src/ATen/native/cuda/Loss.cu:186: nll_loss_forward_no_reduce_cuda_kernel: block: [0,0,0], thread: [5,0,0] Assertion `cur_target >= 0 && cur_target < n_classes` failed.
../aten/src/ATen/native/cuda/Loss.cu:186: nll_loss_forward_no_reduce_cuda_kernel: 

RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
