# Data preparation

In [None]:
import numpy as np
import pandas as pd
import os
import torch
import torch.nn as nn
import torch.optim as optim

normalization = 'minmax'

In [None]:
#First we list all the files
scorelist=os.listdir('SymbTr-master/txt')
#Filter out the ones that are not .txt
scorelist = [k for k in scorelist if '.txt' in k]
scorelist.sort()
makamscore_dir = list()
#Nest by makam
for score in scorelist:
    makam = {'makam':score.split('--')[0], 'scores':[k for k in scorelist if score.split('--')[0] == k.split('--')[0]]}
    if makam not in makamscore_dir:
        makamscore_dir.append(makam)
#Sort makams by number of scores
makamscore_dir.sort(key=lambda k:len(k['scores']), reverse=True)

In [None]:
#We take the three best respresented makams
makamscore_dir = makamscore_dir[0:3]

In [None]:
def processScore(file):
    #Given a .txt, provides a numpy array with [note, duration]
    columns = ['Sira', 'Kod', 'Nota53', 'NotaAE', 'Koma53', 'KomaAE', 
         'Pay', 'Payda', 'Ms', 'LNS', 'Bas', 'Soz1', 'Offset']
    df = pd.DataFrame(columns = columns)
    with open('SymbTr-master/txt/'+file) as f: 
        for n, line in enumerate(f):
            if n > 0:
                auxdf = pd.DataFrame(data = line.strip().split("\t")).T
                auxdf.columns = columns
                df = df.append(auxdf)
                
    offsets=df[df['Kod'].isin(['9', '10'])]['Offset'].values.astype(np.float)
    notes=df[df['Kod'].isin(['9', '10'])]['Koma53'].values.astype(np.float)
    notes[notes == -1] = 242 #We substitute silence note value for min-1 in the whole dataset
    offsets=np.hstack((offsets, offsets[-1]+1.0))
    durations =  (offsets[1:]-offsets[0:-1])
    notedur = np.vstack((notes, durations))
    return notedur

In [None]:
#We will stack the whole dataset in order to obtain note statics for normalization
whole_dataset = np.zeros((2,0))
for makam in makamscore_dir:
    scorelist = makam['scores']
    for i,score in enumerate(scorelist):
        x = processScore(score)
        whole_dataset = np.hstack((whole_dataset, x))

In [None]:
notemin = np.min(whole_dataset, axis=1)
notemean = np.mean(whole_dataset, axis=1)
notemax = np.max(whole_dataset, axis=1)
notestd = np.std(whole_dataset, axis=1)

In [None]:
np.save('makamscore_dir.npy', makamscore_dir)
np.save('notemin.npy', notemin)
np.save('notemean.npy', notemean)
np.save('notestd.npy', notestd)
np.save('notemax.npy', notemax)

# Training

In [1]:
import numpy as np
import pandas as pd
import os
import torch
import torch.nn as nn
import torch.optim as optim

normalization = 'minmax'
makamscore_dir = np.load('makamscore_dir.npy')
notemin = np.load('notemin.npy')
notestd = np.load('notestd.npy')
notemean = np.load('notemean.npy')
notemax = np.load('notemax.npy')

def processScore(file):
    #Given a .txt, provides a numpy array with [note, duration]
    columns = ['Sira', 'Kod', 'Nota53', 'NotaAE', 'Koma53', 'KomaAE', 
         'Pay', 'Payda', 'Ms', 'LNS', 'Bas', 'Soz1', 'Offset']
    df = pd.DataFrame(columns = columns)
    with open('SymbTr-master/txt/'+file) as f: 
        for n, line in enumerate(f):
            if n > 0:
                auxdf = pd.DataFrame(data = line.strip().split("\t")).T
                auxdf.columns = columns
                df = df.append(auxdf)
                
    offsets=df[df['Kod'].isin(['9', '10'])]['Offset'].values.astype(np.float)
    notes=df[df['Kod'].isin(['9', '10'])]['Koma53'].values.astype(np.float)
    notes[notes == -1] = 242 #We substitute silence note value for min-1 in the whole dataset
    offsets=np.hstack((offsets, offsets[-1]+1.0))
    durations =  (offsets[1:]-offsets[0:-1])
    notedur = np.vstack((notes, durations))
    return notedur

class LSTM(nn.Module):

    def __init__(self, input_dim, hidden_dim, batch_size, output_dim=1,
                    num_layers=2):
        super(LSTM, self).__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.batch_size = batch_size
        self.num_layers = num_layers

        # Define the LSTM layer
        self.lstm = nn.LSTM(self.input_dim, self.hidden_dim, self.num_layers)

        # Define the output layer
        self.linear = nn.Linear(self.hidden_dim, output_dim)

    def init_hidden(self):
        # This is what we'll initialise our hidden state as
        return (torch.zeros(self.num_layers, self.batch_size, self.hidden_dim),
                torch.zeros(self.num_layers, self.batch_size, self.hidden_dim))

    def forward(self, input):
        # Forward pass through LSTM layer
        # shape of lstm_out: [input_size, batch_size, hidden_dim]
        # shape of self.hidden: (a, b), where a and b both 
        # have shape (num_layers, batch_size, hidden_dim).
        lstm_out, self.hidden = self.lstm(input.view(len(input), self.batch_size, -1))
        
        # Only take the output from the final timetep
        y_pred = self.linear(lstm_out[-1].view(self.batch_size, -1))
        return y_pred.view(-1)

model = LSTM(input_dim=2, hidden_dim=1024, batch_size=1, output_dim=2, num_layers=1)

def trainMAKAM(scorelist):
    scoreind = np.zeros(len(scorelist)+1)
    makam = np.zeros((2,0))
    for i,score in enumerate(scorelist):
        x = processScore(score)
        makam = np.hstack((makam, x))
        scoreind[i]=x.shape[1]

    scoreind[-1]=makam.shape[1]
    reset = np.zeros(len(scorelist)+1)
    for i in range(len(scoreind)):
        reset[i]=np.sum(scoreind[:i])

    #Normalize the sequences
    if normalization == 'minmax':
        makam = (makam - np.swapaxes(np.tile(notemin, (makam.shape[1],1)), 0,1)) / (np.swapaxes(np.tile(notemax, (makam.shape[1],1)), 0,1) - np.swapaxes(np.tile(notemin, (makam.shape[1],1)), 0,1))
    elif normalization == 'standarize':
        makam = (makam - np.swapaxes(np.tile(notemean, (makam.shape[1],1)), 0,1)) / np.swapaxes(np.tile(notestd, (makam.shape[1],1)), 0,1)

    makam = np.expand_dims(np.swapaxes(makam,0,1),1)

    inputs = torch.from_numpy(makam[:-1,:,:])
    targets = torch.from_numpy(makam[1:,:,:])

    model.double()
    if torch.cuda.is_available():
        model.cuda()
        inputs = inputs.cuda()
        targets = targets.cuda()
        criterion = nn.MSELoss().cuda()
        optimizer = optim.Adam(model.parameters(), lr=0.0001)
    else:
        criterion = nn.MSELoss()

    epochs = 25
    running_loss = 0.0
    best = 100.0


    for epoch in range(epochs):
        print('Epoch '+str(epoch+1)+' starts. Average MSE:')
        running_loss = 0.0
        for k in range(len(inputs)):
            if k in reset:
                model.hidden = model.init_hidden()
            optimizer.zero_grad()
            outputs = model(inputs[k])
            loss = criterion(outputs, targets[k])
            loss.backward()
            running_loss += loss.item()
            optimizer.step()
        print(running_loss/len(inputs))
        if running_loss/len(inputs) < best:
            best = running_loss/len(inputs)
            torch.save(model.state_dict(), scorelist[0].split('--')[0]+'.pt')
    return model

## Makam-wise 2-layer LSTM training

hicaz = trainMAKAM(makamscore_dir[0]['scores'])
nihavent = trainMAKAM(makamscore_dir[1]['scores'])
ussak = trainMAKAM(makamscore_dir[2]['scores'])

Epoch 1 starts. Average MSE:
0.006928244511893923
Epoch 2 starts. Average MSE:
0.006102367428130353
Epoch 3 starts. Average MSE:
0.005750374587720355
Epoch 4 starts. Average MSE:
0.005727278195399143
Epoch 5 starts. Average MSE:
0.005713071831135281
Epoch 6 starts. Average MSE:
0.005702111993855211
Epoch 7 starts. Average MSE:
0.0056929902368233155
Epoch 8 starts. Average MSE:
0.005684863562891579
Epoch 9 starts. Average MSE:
0.005677533359715123
Epoch 10 starts. Average MSE:
0.005671108606464463
Epoch 11 starts. Average MSE:
0.0056656379085098536
Epoch 12 starts. Average MSE:
0.005659839918066617
Epoch 13 starts. Average MSE:
0.00565305516198291
Epoch 14 starts. Average MSE:
0.005646673877345821
Epoch 15 starts. Average MSE:
0.005641671425516165
Epoch 16 starts. Average MSE:
0.005638072491529734
Epoch 17 starts. Average MSE:
0.005635434394821619
Epoch 18 starts. Average MSE:
0.005633413258031596
Epoch 19 starts. Average MSE:
0.005631768142385289
Epoch 20 starts. Average MSE:
0.0056303

# Evaluation of similarity with Euclidean Distance of parameters

In [150]:
def LSTM_distance(model1, model2):

    model.load_state_dict(torch.load(model1+'.pt'))
    params1 = model.lstm.all_weights.copy()
    cpu_param1 = list()
    for param in params1[0]:
        cpu_param1.append(param.cpu().detach().numpy())

    model.load_state_dict(torch.load(model2+'.pt'))
    params2 = model.lstm.all_weights.copy()
    cpu_param2 = list()
    for param in params2[0]:
        cpu_param2.append(param.cpu().detach().numpy())
    distance = 0.0
    for p in range(len(cpu_param1)):
        distance += np.sqrt(np.sum((cpu_param1[p]-cpu_param2[p])**2))
    return distance

In [151]:
LSTM_distance('hicaz', 'nihavent')

51.03200432247304

In [152]:
LSTM_distance('hicaz', 'ussak')

79.8481377025653

In [155]:
LSTM_distance('ussak', 'nihavent')

38.686150534999015