In [6]:
from dataloader import MIDI_Loader,MIDI_Render
import numpy as np
import os
import shutil
import random
from model_seq2seq import AutoDropoutNN
import torch
import sklearn.utils
from torch.nn import functional as F

pitch_num = 130
chord_num = 25
rest_pitch = 128
hold_pitch = 129
none_chord = 24
recog_level = "Mm"
train_path = "../dataset/Nottingham/train/"
validate_path = "../dataset/Nottingham/validate/"
test_path = "../dataset/Nottingham/test/"
min_step = 0.03125
total_len = 640 # 20.0 s
known_len = 320 # 10.0 s
shift_len = 128 # 5.0 s
total_epoch = 2
batch_size = 64
learning_rate = 1e-3


In [7]:
def split_dataset(directory, rate = [0.6,0.8,1.0]):
    path = os.listdir(directory)
    random.shuffle(path)
    nums = len(path)
    train_files = path[:int(nums * rate[0])]
    vali_files = path[int(nums * rate[0]):int(nums * rate[1])]
    test_files = path[int(nums * rate[1]):int(nums * rate[2])]
    for i in train_files:
        shutil.copyfile(directory + i, "dataset/Nottingham/train/" + i)
        print("copy %s success!\n" %i)
    for i in vali_files:
        shutil.copyfile(directory + i, "dataset/Nottingham/validate/" + i)
        print("copy %s success!\n" %i)
    for i in test_files:
        shutil.copyfile(directory + i, "dataset/Nottingham/test/" + i)
        print("copy %s success!\n" %i)

def alignment_data(data):
    new_data = []
    alignment_num = 0
    for e in data:
        delta = len(e["notes"]) - len(e["chord_seq"])
        if delta < 0:
            if e["notes"] == []:
                continue
            if e["notes"][-1] == rest_pitch or e["notes"][-1] == hold_pitch:
                q = e["notes"][-1]
                alignment_num += 1
                for i in range(-delta):
                    e["notes"].append(q)
            elif 0 <= e["notes"][-1] <= 127:
                q = hold_pitch
                alignment_num += 1
                for i in range(-delta):
                    e["notes"].append(q)
        elif delta > 0:
            if e["chord_seq"] == []:
                continue
            q = e["chord_seq"][-1]
            alignment_num += 1
            for i in range(delta):
                e["chord_seq"].append(q)
        new_data.append(e)
    print("finished %d data, %d data need aligment" %(len(data),alignment_num),flush = True)
    return new_data

def split_data(data, fix_len = 640, shift_len = 128):
    new_data = []
    print("begin split_data",flush = True)
    for i, d in enumerate(data):
        if i % 500 == 0:
            print("finish %d data" %i)
        mi = d["notes"]
        ci = d["chord_seq"]
        sta_pos = 0
        while ci[sta_pos] == none_chord:
            sta_pos += 1
        for j in range(sta_pos, len(ci) - 2 * fix_len, shift_len):
            split_sta = j
            split_flag = False
            while 1 == 1:
                if split_sta >= j + shift_len:
                    break
                if (ci[split_sta] != none_chord and 
                    ci[split_sta] != ci[split_sta - 1] and 
                    mi[split_sta] != hold_pitch and 
                    mi[split_sta] != rest_pitch):
                    split_flag = True
                    break
                split_sta += 1
            if not split_flag:
                continue
            split_end = -1
            for k in range(split_sta + fix_len - shift_len , split_sta + fix_len):
                if ((mi[k] == hold_pitch or mi[k] == rest_pitch) and 
                    mi[k + 1] != hold_pitch and
                    ci[k] != ci[k + 1]):
                    split_end = k
            if split_end == -1:
                continue
            split_end += 1
            n_m = d["notes"][split_sta:split_end]
            n_c = d["chord_seq"][split_sta:split_end]
            if fix_len - split_end + split_sta > 0:
                for i in range(fix_len - split_end + split_sta):
                    n_m.append(rest_pitch)
                    n_c.append(none_chord)
            new_data.append({"notes": n_m, "chords": n_c})
    print("finished %d data, %d split data get" %(len(data),len(new_data)),flush = True)
    return new_data
    
def make_one_hot_data(train_data):
    print("convert data to one-hot...",flush = True)
    train_size = min(len(train_data),3000)

    train_x = np.zeros((train_size,total_len,pitch_num + chord_num), dtype = np.int32)
    train_gd = np.zeros((train_size,total_len), dtype = np.int32)
    train_cond = np.zeros((train_size,total_len, chord_num), dtype = np.int32)


    # process with bi-directional issue

    for i,data in enumerate(train_data):
        if i >= train_size:
            break
        mi = data["notes"]
        ci = data["chords"]
        prev = rest_pitch
        for j, value in enumerate(mi):
            if j < known_len:
                if value != hold_pitch:
                    prev = value
                if value == hold_pitch and mi[j + 1] != hold_pitch:
                    train_x[i,j,prev] = 1
                elif j + 1 == known_len and value == hold_pitch:
                    train_x[i,j,prev] = 1
                else:
                    train_x[i,j,value] = 1
        for j, value in enumerate(ci):
            train_x[i,j, value + pitch_num] = 1
            train_cond[i,j,value] = 1
        prev = rest_pitch
        for j, value in enumerate(mi):
            if value != hold_pitch:
                prev = value
            if j + 1 == len(mi):
                train_gd[i,j] = prev
            elif value == hold_pitch and mi[j + 1] != hold_pitch:
                train_gd[i,j] = prev
            else:
                train_gd[i,j] = value
    train_gd = train_gd[:,known_len::]
    train_cond = train_cond[:,known_len::]
    print("convert success！",flush = True)
    return [train_x,train_gd,train_cond]


In [8]:
# def train():
   
# # load data from three folders
# train_loader = MIDI_Loader(datasetName = "Nottingham", minStep = min_step)
# validate_loader = MIDI_Loader(datasetName = "Nottingham", minStep = min_step)
# test_loader = MIDI_Loader(datasetName = "Nottingham", minStep = min_step)

# train_loader.load(directory = train_path)
# validate_loader.load(directory = validate_path)
# test_loader.load(directory = test_path)

# train_loader.getChordSeq()
# validate_loader.getChordSeq()
# test_loader.getChordSeq()

# train_loader.getNoteSeq()
# validate_loader.getNoteSeq()
# test_loader.getNoteSeq()

# train_data = train_loader.dataAugment()
# validate_data = validate_loader.dataAugment()
# test_data = test_loader.dataAugment()

# # process data to explicit structure - ont hot vectors

# # aligment the data
# train_data = alignment_data(train_data)
# validate_data = alignment_data(validate_data)
# test_data = alignment_data(test_data)

# # split the data
# train_data = split_data(train_data,fix_len = total_len,shift_len = shift_len)
# validate_data = split_data(validate_data,fix_len = total_len,shift_len = shift_len)
# test_data = split_data(test_data,fix_len = total_len,shift_len = shift_len)

# tr = np.asarray(train_data)
# va = np.asarray(validate_data)
# te = np.asarray(test_data)

# print("start to save npy")
# np.save("train_data.npy",tr)
# np.save("validate_data.npy",va)
# np.save("test_data.npy",te)
# print("finish saving npy")
# # render the data to files
# # render = MIDI_Render(datasetName = "Nottingham",minStep= min_step)
# # for i,v in enumerate(train_data):
# #     if i > 2000:
# #         break
# #     render.data2midi(data = v, recogLevel = "Mm", output = "splited/train/" + str(i) + ".mid")
# # for i,v in enumerate(test_data):
# #     if i > 2000:
# #         break
# #     render.data2midi(data = v, recogLevel = "Mm", output = "splited/test/" + str(i) + ".mid")
# # for i,v in enumerate(validate_data):
# #     if i > 2000:
# #         break
# #     render.data2midi(data = v, recogLevel = "Mm", output = "splited/validate/" + str(i) + ".mid")
  
# # process train_x train_gd validate_x validate_gd
# # convert sequence data to one-hot vectors
train_data = np.load("train_data.npy")
test_data = np.load("test_data.npy")
validate_data = np.load("validate_data.npy")

train_x,train_gd,train_cond = make_one_hot_data(train_data)
test_x,test_gd,test_cond = make_one_hot_data(test_data)
validate_x,validate_gd,validate_cond = make_one_hot_data(validate_data)

print(train_x.shape)
print(train_gd.shape)
print(train_cond.shape)


convert data to one-hot...
convert success！
convert data to one-hot...
convert success！
convert data to one-hot...
convert success！
(3000, 640, 155)
(3000, 320)
(3000, 320, 25)


In [9]:
# train
model = AutoDropoutNN(input_dims = pitch_num + chord_num,
        hidden_dims = 2 * (pitch_num + chord_num),output_dims = pitch_num,time_steps = total_len,output_len = total_len - known_len)

optimizer = torch.optim.RMSprop(model.parameters(), lr = learning_rate)
if torch.cuda.is_available():
    print("Using:", torch.cuda.get_device_name(torch.cuda.current_device()),flush = True)
    model.cuda()
else:
    print("Using CPU",flush = True)

# from torchsummary import summary

# summary(model, input_size=(640, 155))
    


Using: Tesla V100-SXM2-16GB


In [10]:
model.train()
for epoch in range(1):
    print("epoch: %d\n_________________________________" % epoch,flush = True)
    train_x, train_gd,train_cond = sklearn.utils.shuffle(train_x, train_gd,train_cond)
    train_batches_x = np.split(train_x,
                        range(batch_size, train_x.shape[0] // batch_size * batch_size, batch_size))
    train_batches_gd = np.split(train_gd,
                        range(batch_size, train_gd.shape[0] // batch_size * batch_size, batch_size))
    train_batches_cond = np.split(train_cond,
                        range(batch_size, train_cond.shape[0] // batch_size * batch_size, batch_size))
    validate_x, validate_gd,valitdate_cond = sklearn.utils.shuffle(validate_x, validate_gd,validate_cond)
    validate_batches_x = np.split(validate_x,
                        range(batch_size, validate_x.shape[0] // batch_size * batch_size, batch_size))
    validate_batches_gd = np.split(validate_gd,
                        range(batch_size, validate_gd.shape[0] // batch_size * batch_size, batch_size))
    validate_batches_cond = np.split(validate_cond,
                        range(batch_size, validate_cond.shape[0] // batch_size * batch_size, batch_size))
    for i in range(len(train_batches_x)):
        x = torch.from_numpy(train_batches_x[i]).float()
        gd = torch.from_numpy(train_batches_gd[i]).float()
        cond = torch.from_numpy(train_batches_cond[i]).float()
        j = i % len(validate_batches_x)
        v_x = torch.from_numpy(validate_batches_x[j]).float()
        v_gd = torch.from_numpy(validate_batches_gd[j]).float()
        v_cond = torch.from_numpy(validate_batches_cond[j]).float()
        if torch.cuda.is_available():
            x = x.cuda()
            gd = gd.cuda()
            cond = cond.cuda()
            v_x = v_x.cuda()
            v_gd = v_gd.cuda()
            v_cond = v_cond.cuda()
        optimizer.zero_grad()
        x_out = model(x,cond)
        loss = F.cross_entropy(x_out.view(-1,x_out.size(-1)), gd.view(-1).long())
        loss.backward()
        optimizer.step()
        v_loss = 0.0
        with torch.no_grad():
            v_x_out = model(v_x,v_cond)
            v_loss = F.cross_entropy(v_x_out.view(-1,x_out.size(-1)), v_gd.view(-1).long())
        print("batch %d loss: %.5f | val loss %.5f"  % (i,loss.item(),v_loss.item()), flush = True)
# torch.save(model.cpu().state_dict(), "test_model.pt")
# model.cuda()

epoch: 0
_________________________________
batch 0 loss: 4.87199 | val loss 4.89518
batch 1 loss: 4.98166 | val loss 3.46257
batch 2 loss: 3.37087 | val loss 2.04057
batch 3 loss: 1.98693 | val loss 1.81818
batch 4 loss: 1.42391 | val loss 3.53821
batch 5 loss: 3.09697 | val loss 2.53381
batch 6 loss: 2.65128 | val loss 1.94953
batch 7 loss: 1.65094 | val loss 1.65667
batch 8 loss: 1.46258 | val loss 1.32327
batch 9 loss: 1.29713 | val loss 1.31299
batch 10 loss: 1.16004 | val loss 1.30480
batch 11 loss: 1.19306 | val loss 1.27501
batch 12 loss: 1.19317 | val loss 1.28087
batch 13 loss: 1.12348 | val loss 1.25961
batch 14 loss: 1.12753 | val loss 1.30602
batch 15 loss: 1.23347 | val loss 1.37739
batch 16 loss: 1.17292 | val loss 1.43294
batch 17 loss: 1.18721 | val loss 1.28859
batch 18 loss: 1.24710 | val loss 1.27248
batch 19 loss: 1.16869 | val loss 1.23511
batch 20 loss: 1.16697 | val loss 1.26832
batch 21 loss: 1.13936 | val loss 1.23499
batch 22 loss: 1.13949 | val loss 1.34147
b