***
*Project:* Expressive Piano Generation

*Author:* Jingwei Liu (Computer Music Ph.D., UC San Diego)
***

# <span style="background-color:darkorange; color:white; padding:2px 6px">Training</span> 


# Expressive Piano Performance Model Training

In [1]:
import py_midicsv as pm
import pandas as pd
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt
import torch
from torch import nn, optim
import os
torch.cuda.is_available()

True

In [2]:
dev = torch.device("cuda") # operate on GPU "cuda" or CPU "cpu"

## Multi-arguments Sequential Model

The temporal feature of the performance is captured by the sequential model, and the multi-arguments are inherently interdependent. To model their interdependencies, we decompose the temporal predictor into 5 separate LSTMs with inputs conditioned on previous outputs.

<img src="Pictures/LSTM_5.jpg" style="width:800px">
<caption><center> Figure 1. A way to Capture Interdependency among Arguments in a Multi-argument Sequential Model.</center></caption>

## Categorical Index of Multi-arguments

- $x_n$ -- MIDI note number minus $21$. Original range $[21,108]$, note index range $[0,87]$.

- $x_t$ -- Time shift in miliseconds categorized as Fig.1, index range $[0,104]$.

- $x_d$ -- Duration in miliseconds categorized as Fig.1, index range $[0,119]$.

- $x_v$ -- Velocity difference between two subsequent notes. Categorized as Fig.1, index range $[0,46]$

- $x_p$ -- Sustain pedal status on/off, index range $[0,1]$

In [5]:
class LSTM(nn.Module):
    def __init__(self, input_len, hidden_size, num_class, num_layers):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.LSTM = nn.LSTM(input_len, hidden_size, num_layers, batch_first = True, device=dev)
        self.output_layer = nn.Linear(hidden_size, num_class, device=dev)
        
    def forward(self, X): # input X (batch_size, seq_len, input_len)
        hidden_state = torch.zeros(self.num_layers, X.size(0), self.hidden_size, device = dev)
        cell_state = torch.zeros(self.num_layers, X.size(0), self.hidden_size, device = dev)
        out, (hn, cn) = self.LSTM(X,(hidden_state, cell_state))
        out = self.output_layer(out[:,-1,:]) # output (batch_size, seq_len, output_len)
        return out

In [3]:
def get_batch(X, pos, batch_size, seq_len):
    """
    Arguments:
    X -- input of shape (file_len, input_len)
    pos -- position to start batch
    
    Returns:
    batch -- the training batch of shape (batch_size, seq_len, input_len)
    """
    
    file_len = X.shape[0]
    input_len = X.shape[1]
    batch = torch.zeros(batch_size, seq_len, input_len, device=dev)
    if pos + seq_len + batch_size <= file_len:
        for i in range(batch_size):
            batch[i,:,:] = X[pos+i:pos+seq_len+i,:]
        pos = pos + batch_size
    elif pos + seq_len <= file_len:
        num = pos + seq_len + batch_size - file_len
        a = np.random.randint(file_len-batch_size-seq_len)
        for i in range(batch_size-num):
            batch[i,:,:] = X[pos+i:pos+seq_len+i,:]
        for i in range(batch_size-num,batch_size):
            batch[i,:,:] = X[a+i:a+seq_len+i,:]
        pos = file_len
    return batch,pos

In [4]:
batch_size = 100
hidden_size = 150
num_layers = 2

input_len1 = 362 # 88+120+47+105+2
num_class1 = 88 # predict note
input_len2 = 362 + 88 # 88+120+47+105+2
num_class2 = 105 # predict time shift
input_len3 = 362 + 193 # 88+120+47+105+2
num_class3 = 120 # predict duration
input_len4 = 362 + 313 # 88+120+47+105+2
num_class4 = 47 # predict duration
input_len5 = 362 + 360 # 88+120+47+105+2
num_class5 = 2 # predict pedal

### Model 1

In [29]:
model1 = LSTM(input_len1, hidden_size, num_class1, num_layers)
model1.load_state_dict(torch.load("model1", weights_only=True))
print(model1)

LSTM(
  (LSTM): LSTM(362, 150, num_layers=2, batch_first=True)
  (output_layer): Linear(in_features=150, out_features=88, bias=True)
)


In [30]:
loss_fun = nn.CrossEntropyLoss()
# optimizer1 = optim.Adam(model1.parameters())
optimizer1 = optim.SGD(model1.parameters(),lr = 0.1)

In [31]:
seq_len = 50
# epoch = 5

In [1]:
for g in range(3):
    seq_len += 1
    print("Sequence length: ", seq_len)
    if seq_len%10 == 0:
        epoch = 3
    else:
        epoch = np.random.choice([1,2])
    print("Epoch: ", epoch)
    for e in range(epoch):
        txt_folder = Path('D:\deep music\data\e_piano_index\selected').rglob('*.csv')
        count = 0
        Loss = 0
        print("Epoch", e,":")
        for x in txt_folder:
        #     basename = os.path.basename(x)
        #     filename = os.path.splitext(basename)[0]

            data_name = os.path.splitext(x)[0]
            file = "{}.csv".format(data_name)        
            Index = pd.read_csv(file, sep=",")

            numm = 6
            indx = np.zeros((Index.shape[0],numm),dtype = int)
            indx[:,0] = Index['Note_index']
            indx[:,1] = Index['Time_Shift_index'] + 88
            indx[:,2] = Index['Duration_index'] + 193
            indx[:,3] = Index['Velocity_index'] + 313
            indx[:,4] = Index['Pedal'] + 360
            indx[:-1,5] = Index['Note_index'][1:] + 362
            indx = indx[:-1,:]

            # process inputs
            file_input = torch.zeros(indx.shape[0],450, device = dev)
            for i in range(indx.shape[0]):
                file_input[i,indx[i]] = 1

            pos = 0
            file_len = file_input.shape[0]
            while (pos <= file_len - seq_len) & (file_len-batch_size-seq_len > 0):
                batch, pos = get_batch(file_input, pos, batch_size, seq_len)

                output1 = model1(batch[:,:,:362])
                loss1 = loss_fun(output1, batch[:,-1,362:])

                optimizer1.zero_grad()
                loss1.backward()
                optimizer1.step()

                count += 1
                Loss += loss1.item()
                
#         print(f"Loss: {Loss/count:>4f}")
                if count == 10000:
                    print(data_name)
                    print(f"Loss: {Loss/10000:>4f}")
                    Loss = 0
                    count = 0           

In [33]:
torch.save(model1.state_dict(), "model1")

In [34]:
torch.cuda.empty_cache()

### Model 2

In [8]:
model2 = LSTM(input_len2, hidden_size, num_class2, num_layers)
model2.load_state_dict(torch.load("model2", weights_only=True))
print(model2)

LSTM(
  (LSTM): LSTM(450, 150, num_layers=2, batch_first=True)
  (output_layer): Linear(in_features=150, out_features=105, bias=True)
)


In [9]:
loss_fun = nn.CrossEntropyLoss()
optimizer2 = optim.SGD(model2.parameters(),lr = 0.1)

In [35]:
seq_len = 50
# epoch = 4

In [2]:
for g in range(3):
    seq_len += 1
    print("Sequence length: ", seq_len)
    if seq_len%10 == 0:
        epoch = 3
    else:
        epoch = np.random.choice([1,2],p=[0.65,0.35])
    print("Epoch: ", epoch)
    for e in range(epoch):
        txt_folder = Path('D:\deep music\data\e_piano_index\selected').rglob('*.csv')
        count = 0
        Loss = 0
        print("Epoch", e,":")
        for x in txt_folder:
        #     basename = os.path.basename(x)
        #     filename = os.path.splitext(basename)[0]

            data_name = os.path.splitext(x)[0]
            file = "{}.csv".format(data_name)        
            Index = pd.read_csv(file, sep=",")

            numm = 7
            indx = np.zeros((Index.shape[0],numm),dtype = int)
            indx[:,0] = Index['Note_index']
            indx[:,1] = Index['Time_Shift_index'] + 88
            indx[:,2] = Index['Duration_index'] + 193
            indx[:,3] = Index['Velocity_index'] + 313
            indx[:,4] = Index['Pedal'] + 360
            indx[:-1,5] = Index['Note_index'][1:] + 362
            indx[:-1,6] = Index['Time_Shift_index'][1:] + 450
            indx = indx[:-1,:]

            # process inputs
            file_input = torch.zeros(indx.shape[0],555, device = dev)
            for i in range(indx.shape[0]):
                file_input[i,indx[i]] = 1

            pos = 0
            file_len = file_input.shape[0]
            while (pos <= file_len - seq_len) & (file_len-batch_size-seq_len > 0):
                batch, pos = get_batch(file_input, pos, batch_size, seq_len)

                output2 = model2(batch[:,:,:450])
                loss2 = loss_fun(output2, batch[:,-1,450:])

                optimizer2.zero_grad()
                loss2.backward()
                optimizer2.step()

                count += 1
                Loss += loss2.item()
                if count == 10000:
                    print(data_name)
                    print(f"Loss: {Loss/10000:>4f}")
                    Loss = 0
                    count = 0

In [37]:
torch.save(model2.state_dict(), "model2")

In [38]:
torch.cuda.empty_cache()

### Model 3

In [6]:
model3 = LSTM(input_len3, hidden_size, num_class3, num_layers)
model3.load_state_dict(torch.load("model3", weights_only=True))
print(model3)

LSTM(
  (LSTM): LSTM(555, 150, num_layers=2, batch_first=True)
  (output_layer): Linear(in_features=150, out_features=120, bias=True)
)


In [7]:
loss_fun = nn.CrossEntropyLoss()
optimizer3 = optim.SGD(model3.parameters(),lr = 0.1)

In [8]:
seq_len = 50
# epoch = 3

In [3]:
for g in range(3):
    seq_len += 1
    print("Sequence length: ", seq_len)
    if seq_len%10 == 0:
        epoch = 3
    else:
        epoch = np.random.choice([1,2],p=[0.65,0.35])
    print("Epoch: ", epoch)
    for e in range(epoch):
        txt_folder = Path('D:\deep music\data\e_piano_index\selected').rglob('*.csv')
        count = 0
        Loss = 0
        print("Epoch", e,":")
        for x in txt_folder:
        #     basename = os.path.basename(x)
        #     filename = os.path.splitext(basename)[0]

            data_name = os.path.splitext(x)[0]
            file = "{}.csv".format(data_name)        
            Index = pd.read_csv(file, sep=",")

            numm = 8
            indx = np.zeros((Index.shape[0],numm),dtype = int)
            indx[:,0] = Index['Note_index']
            indx[:,1] = Index['Time_Shift_index'] + 88
            indx[:,2] = Index['Duration_index'] + 193
            indx[:,3] = Index['Velocity_index'] + 313
            indx[:,4] = Index['Pedal'] + 360
            indx[:-1,5] = Index['Note_index'][1:] + 362
            indx[:-1,6] = Index['Time_Shift_index'][1:] + 450
            indx[:-1,7] = Index['Duration_index'][1:] + 555
            indx = indx[:-1,:]

            # process inputs
            file_input = torch.zeros(indx.shape[0], 675, device = dev)
            for i in range(indx.shape[0]):
                file_input[i,indx[i]] = 1

            pos = 0
            file_len = file_input.shape[0]
            while (pos <= file_len - seq_len) & (file_len-batch_size-seq_len > 0):
                batch, pos = get_batch(file_input, pos, batch_size, seq_len)

                output3 = model3(batch[:,:,:555])
                loss3 = loss_fun(output3, batch[:,-1,555:])

                optimizer3.zero_grad()
                loss3.backward()
                optimizer3.step()

                count += 1
                Loss += loss3.item()
                if count == 10000:
                    print(data_name)
                    print(f"Loss: {Loss/10000:>4f}")
                    Loss = 0
                    count = 0

3.384670

In [None]:
torch.save(model3.state_dict(), "model3")

In [None]:
torch.cuda.empty_cache()

### Model 4

In [None]:
model4 = LSTM(input_len4, hidden_size, num_class4, num_layers)
model4.load_state_dict(torch.load("model4", weights_only=True))
print(model4)

In [None]:
loss_fun = nn.CrossEntropyLoss()
optimizer4 = optim.SGD(model4.parameters(),lr = 0.1)

In [None]:
seq_len = 50
# epoch = 4

In [None]:
for g in range(3):
    seq_len += 1
    print("Sequence length: ", seq_len)
    if seq_len%10 == 0:
        epoch = 3
    else:
        epoch = np.random.choice([1,2],p=[0.65,0.35])
    print("Epoch: ", epoch)
    for e in range(epoch):
        txt_folder = Path('D:\deep music\data\e_piano_index\selected').rglob('*.csv')
        count = 0
        Loss = 0
        print("Epoch", e,":")
        for x in txt_folder:
        #     basename = os.path.basename(x)
        #     filename = os.path.splitext(basename)[0]

            data_name = os.path.splitext(x)[0]
            file = "{}.csv".format(data_name)        
            Index = pd.read_csv(file, sep=",")

            numm = 9
            indx = np.zeros((Index.shape[0],numm),dtype = int)
            indx[:,0] = Index['Note_index']
            indx[:,1] = Index['Time_Shift_index'] + 88
            indx[:,2] = Index['Duration_index'] + 193
            indx[:,3] = Index['Velocity_index'] + 313
            indx[:,4] = Index['Pedal'] + 360
            indx[:-1,5] = Index['Note_index'][1:] + 362
            indx[:-1,6] = Index['Time_Shift_index'][1:] + 450
            indx[:-1,7] = Index['Duration_index'][1:] + 555
            indx[:-1,8] = Index['Velocity_index'][1:] + 675
            indx = indx[:-1,:]

            # process inputs
            file_input = torch.zeros(indx.shape[0], 722, device = dev)
            for i in range(indx.shape[0]):
                file_input[i,indx[i]] = 1

            pos = 0
            file_len = file_input.shape[0]
            while (pos <= file_len - seq_len) & (file_len-batch_size-seq_len > 0):
                batch, pos = get_batch(file_input, pos, batch_size, seq_len)

                output4 = model4(batch[:,:,:675])
                loss4 = loss_fun(output4, batch[:,-1,675:])

                optimizer4.zero_grad()
                loss4.backward()
                optimizer4.step()

                count += 1
                Loss += loss4.item()
                if count == 10000:
                    print(data_name)
                    print(f"Loss: {Loss/10000:>4f}")
                    Loss = 0
                    count = 0

2.891708

In [None]:
torch.save(model4.state_dict(), "model4")

In [None]:
torch.cuda.empty_cache()

### Model 5

In [None]:
model5 = LSTM(input_len5, hidden_size, num_class5, num_layers)
model5.load_state_dict(torch.load("model5", weights_only=True))
print(model5)

In [None]:
loss_fun = nn.CrossEntropyLoss()
optimizer5 = optim.SGD(model5.parameters(),lr = 0.1)

In [None]:
seq_len = 50
# epoch = 3

In [None]:
for g in range(3):
    seq_len += 1
    print("Sequence length: ", seq_len)
    if seq_len%10 == 0:
        epoch = 3
    else:
        epoch = np.random.choice([1,2],p=[0.65,0.35])
    print("Epoch: ", epoch)
    for e in range(epoch):
        txt_folder = Path('D:\deep music\data\e_piano_index\selected').rglob('*.csv')
        count = 0
        Loss = 0
        print("Epoch", e,":")
        for x in txt_folder:
        #     basename = os.path.basename(x)
        #     filename = os.path.splitext(basename)[0]

            data_name = os.path.splitext(x)[0]
            file = "{}.csv".format(data_name)        
            Index = pd.read_csv(file, sep=",")

            numm = 10
            indx = np.zeros((Index.shape[0],numm),dtype = int)
            indx[:,0] = Index['Note_index']
            indx[:,1] = Index['Time_Shift_index'] + 88
            indx[:,2] = Index['Duration_index'] + 193
            indx[:,3] = Index['Velocity_index'] + 313
            indx[:,4] = Index['Pedal'] + 360
            indx[:-1,5] = Index['Note_index'][1:] + 362
            indx[:-1,6] = Index['Time_Shift_index'][1:] + 450
            indx[:-1,7] = Index['Duration_index'][1:] + 555
            indx[:-1,8] = Index['Velocity_index'][1:] + 675
            indx[:-1,9] = Index['Pedal'][1:] + 722
            indx = indx[:-1,:]

            # process inputs
            file_input = torch.zeros(indx.shape[0], 724, device = dev)
            for i in range(indx.shape[0]):
                file_input[i,indx[i]] = 1

            pos = 0
            file_len = file_input.shape[0]
            while (pos <= file_len - seq_len) & (file_len-batch_size-seq_len > 0):
                batch, pos = get_batch(file_input, pos, batch_size, seq_len)

                output5 = model5(batch[:,:,:722])
                loss5 = loss_fun(output5, batch[:,-1,722:])

                optimizer5.zero_grad()
                loss5.backward()
                optimizer5.step()

                count += 1
                Loss += loss5.item()
                if count == 10000:
                    print(data_name)
                    print(f"Loss: {Loss/10000:>4f}")
                    Loss = 0
                    count = 0

0.112460

In [None]:
torch.save(model5.state_dict(), "model5")

In [None]:
torch.cuda.empty_cache()