In [None]:
import os,sys,signal
import math

import pickle
import numpy as np                                       # fast vectors and matrices
import matplotlib.pyplot as plt                          # plotting

sys.path.insert(0, '../')
import musicnet

from time import time

from sklearn.metrics import average_precision_score

os.environ['CUDA_DEVICE_ORDER']='PCI_BUS_ID'   # see issue #152
os.environ['CUDA_VISIBLE_DEVICES']='0'

import torch
from torch.nn.functional import conv1d, mse_loss
import torch.nn.functional as F
import torch.nn as nn

if torch.cuda.is_available():
    device = "cuda:0"
    torch.set_default_tensor_type('torch.cuda.FloatTensor')

In [None]:
# Network Parameters
epochs = 1000
train_size = 100000
test_size = 50000

lr = 2e-5
momentum = .95

pitch_shift = 0
jitter = 0.
num_workers = 10
sequence = 1

# lvl1 convolutions are shared between regions
m = 128
k = 512              # lvl1 nodes
n_fft = 4096              # lvl1 receptive field
window = 16384 # total number of audio samples?
stride = 512
batch_size = 100

regions = 1 + (window - n_fft)//stride

def worker_init(args):
    signal.signal(signal.SIGINT, signal.SIG_IGN) # ignore signals so parent can handle them
    np.random.seed(os.getpid() ^ int(time())) # approximately random seed for workers
kwargs = {'num_workers': num_workers, 'pin_memory': True, 'worker_init_fn': worker_init}

In [None]:
# function for returning scientific notation in a plot
def fmt(x, pos):
    a, b = '{:.0e}'.format(x).split('e')
    b = int(b)
    return fr'${a} \times 10^{{{b}}}$'

In [4]:
result_dict = {'loss_history_train': [],
               'avgp_history_train': [],
               'loss_history_test': [],
               'avgp_history_test': [],
               'parameters': {}}

result_dict['parameters']['train_size'] = train_size
result_dict['parameters']['test_size'] = test_size
result_dict['parameters']['lr'] = lr
result_dict['parameters']['pitch_shift'] = pitch_shift
result_dict['parameters']['jitter'] = jitter

# Preparing Dataset

In [5]:
start = time()
root = './data/'
train_set = musicnet.MusicNet(root=root, epoch_size=train_size,sequence=sequence
                              , train=True, download=True, refresh_cache=False, 
                              window=window, mmap=False, pitch_shift=pitch_shift, jitter=jitter)
test_set = musicnet.MusicNet(root=root, train=False, download=True,sequence=sequence
                             , refresh_cache=False, window=window, epoch_size=test_size, mmap=False)
print("Data loaded, time used = {:2.2f} seconds".format(time()-start))

train_loader = torch.utils.data.DataLoader(dataset=train_set,batch_size=batch_size,**kwargs)
test_loader = torch.utils.data.DataLoader(dataset=test_set,batch_size=batch_size,**kwargs)

Data loaded, time used = 27.82 seconds


# Defining Models

In [6]:
Loss = torch.nn.MSELoss()
def L(yhatvar,y):
    return Loss(yhatvar,y) * 128/2

In [7]:
class Model(torch.nn.Module):
    def __init__(self, avg=.9998):
        super(Model, self).__init__()          
        # Creating Layers
        self.linear1 = torch.nn.Linear(regions*k, k, bias=False)
        self.linear2 = torch.nn.Linear(k, m, bias=False)
        self.conv_sin = torch.nn.Conv1d(1,k,n_fft,stride)
        self.conv_cos = torch.nn.Conv1d(1,k,n_fft,stride)
        
        std = 1e-2
        # initialize filters for Fourier Transform
        torch.nn.init.normal_(self.conv_sin.weight, std=std)
        torch.nn.init.normal_(self.conv_cos.weight, std=std)
        
        # initialize other network weights
        torch.nn.init.normal_(self.linear1.weight, std=std)
        torch.nn.init.normal_(self.linear2.weight, std=std)
        
        self.avg = avg
        
    def forward(self,x):
        zx = self.conv_sin(x[:,None,:]).pow(2) \
           + self.conv_cos(x[:,None,:]).pow(2) # Doing STFT by using conv1d
        z2 = self.linear1(torch.log(zx + 10e-8).view(x.data.size()[0],regions*k))
        y = self.linear2(torch.relu(z2))
        return y

In [8]:
model = Model()
model.to(device)

Model(
  (linear1): Linear(in_features=12800, out_features=512, bias=False)
  (linear2): Linear(in_features=512, out_features=128, bias=False)
  (conv_sin): Conv1d(1, 512, kernel_size=(4096,), stride=(512,))
  (conv_cos): Conv1d(1, 512, kernel_size=(4096,), stride=(512,))
)

In [9]:
optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum)
# optimizer = SWA(base_opt, swa_start=0, swa_freq=1, swa_lr=0.000001)

try:
    with train_set, test_set:
        print("epoch\ttrain loss\ttest loss\ttrain avg\ttest avg\ttime\tutime")
        for e in range(epochs):
            yground = torch.Tensor(batch_size*len(train_loader), m) # what not do this together with loss
            yhat = torch.Tensor(batch_size*len(train_loader), m)
            avgp, loss_e = 0.,0
            t = time()
            for i, (x,y) in enumerate(train_loader):
                print(f"training {i}/{len(train_loader)} batches", end = '\r')
                optimizer.zero_grad()
                
                # making x and y into pytorch dealable format
                x = x.to(device, non_blocking=True)
                y = y.to(device, non_blocking=True)
                yhatvar = model(x)
                loss = L(yhatvar,y)
                loss.backward()
                loss_e += loss.item() #getting the number
                
                yground[i*batch_size:(i+1)*batch_size] = y.data
                yhat[i*batch_size:(i+1)*batch_size] = yhatvar.data
                
                optimizer.step()
            avgp = average_precision_score(yground.cpu().flatten(),yhat.cpu().flatten())  
            
            result_dict['loss_history_train'].append(loss_e/len(train_loader))
            result_dict['avgp_history_train'].append(avgp)   
            t1 = time()
            avgp, loss_e = 0.,0.           
#             optimizer.swap_swa_sgd() # change to average weight
            
            # For testing
            yground = torch.Tensor(batch_size*len(test_loader), m) # what not do this together with loss
            yhat = torch.Tensor(batch_size*len(test_loader), m)
            
            for i, (x_test,y_test) in enumerate(test_loader):
                print(f"testing {i}/{len(test_loader)} batches", end = '\r')
                x_test = x_test.to(device)
                y_test = y_test.to(device)
                yhatvar = model(x_test)
                loss_e += L(yhatvar, y_test).item() #getting the number

                yground[i*batch_size:(i+1)*batch_size] = y_test.data
                yhat[i*batch_size:(i+1)*batch_size] = yhatvar.data
            avgp = average_precision_score(yground.cpu().flatten(),yhat.cpu().flatten())
            result_dict['loss_history_test'].append(loss_e/len(test_loader))
            result_dict['avgp_history_test'].append(avgp)
            print('{}\t{:2f}\t{:2f}\t{:2f}\t{:2f}\t{:2.1f}\t{:2.1f}'.\
                  format(e,
                         result_dict['loss_history_train'][-1],result_dict['loss_history_test'][-1],
                         result_dict['avgp_history_train'][-1],result_dict['avgp_history_test'][-1],
                         time()-t, time()-t1))


        
except KeyboardInterrupt:
    print('Graceful Exit')
else:
    print("Finsihed")
    

epoch	train loss	test loss	train avg	test avg	time	utime
0	1.761913	1.078039	0.049511	0.067654	28.6	7.8
1	1.366435	1.072857	0.083080	0.069943	28.5	7.9
2	1.365198	1.074476	0.084765	0.071582	28.8	7.9
3	1.360542	1.069676	0.085725	0.071512	28.7	8.0
4	1.361088	1.074614	0.085546	0.073133	28.4	7.7
5	1.364837	1.074010	0.086391	0.073253	28.6	7.8
6	1.364379	1.081167	0.086950	0.072815	28.6	7.8
7	1.361679	1.069786	0.087453	0.074926	28.5	7.8
8	1.362483	1.073195	0.087209	0.075277	28.7	7.9
9	1.361500	1.076286	0.087628	0.076604	28.5	7.8
10	1.357399	1.070351	0.088404	0.078342	28.6	7.9
11	1.360554	1.071484	0.089049	0.078064	28.6	7.9
12	1.358115	1.071623	0.089568	0.079807	28.6	7.8
13	1.358138	1.064683	0.090061	0.077673	28.7	7.9
14	1.358732	1.068017	0.090356	0.081379	28.6	7.8
15	1.368332	1.069849	0.091499	0.082560	28.6	7.8
16	1.362494	1.065087	0.091122	0.082222	28.8	8.0
17	1.362759	1.067980	0.092220	0.083490	28.6	7.9
18	1.355750	1.068085	0.092439	0.083102	28.8	8.0
19	1.363081	1.071992	0.093062	0.084347	28

335	1.237732	0.966733	0.252573	0.254695	29.5	8.4
336	1.239716	0.967550	0.252097	0.257733	29.5	8.3
337	1.239255	0.968267	0.253647	0.256567	29.8	8.7
338	1.233602	0.963803	0.254154	0.257338	29.3	8.3
339	1.237344	0.967190	0.254975	0.256915	29.4	8.3
340	1.234439	0.959443	0.255727	0.260228	29.6	8.4
341	1.239926	0.966092	0.256100	0.258122	29.4	8.3
342	1.233452	0.965132	0.256473	0.260345	29.4	8.3
343	1.237377	0.962409	0.256871	0.258238	29.6	8.3
344	1.234252	0.963894	0.255949	0.265071	29.6	8.5
345	1.234053	0.964531	0.258708	0.262080	29.4	8.3
346	1.231629	0.962873	0.258523	0.266069	29.3	8.3
347	1.233190	0.962914	0.260094	0.262052	29.3	8.2
348	1.239911	0.968324	0.257850	0.260808	29.4	8.3
349	1.230226	0.961477	0.257156	0.257772	29.2	8.2
350	1.234119	0.966540	0.258339	0.263287	29.1	8.2
351	1.233831	0.968781	0.259579	0.261222	29.4	8.3
352	1.234844	0.961101	0.260832	0.261442	29.2	8.2
353	1.236487	0.961154	0.260596	0.259022	29.3	8.3
354	1.234894	0.962105	0.260856	0.263164	29.2	8.2
355	1.230756	0.96570

669	1.157969	0.911835	0.337320	0.338523	29.1	8.1
670	1.161100	0.916090	0.337254	0.336528	29.2	8.2
671	1.158904	0.916615	0.335549	0.329175	29.2	8.2
672	1.157861	0.913182	0.336402	0.334739	29.1	8.1
673	1.156761	0.915839	0.337379	0.329788	29.5	8.5
674	1.156747	0.910740	0.335318	0.335051	29.3	8.2
675	1.159354	0.911651	0.337308	0.332005	29.1	8.2
676	1.158576	0.910943	0.336734	0.334590	29.2	8.2
677	1.163706	0.913376	0.335617	0.332102	29.1	8.2
678	1.159726	0.914387	0.337275	0.331038	29.2	8.2
679	1.159045	0.913944	0.337767	0.338397	29.2	8.2
680	1.157264	0.915543	0.339192	0.333184	29.5	8.5
681	1.157328	0.917006	0.337797	0.333619	29.0	8.2
682	1.161294	0.914255	0.337332	0.332943	29.1	8.1
683	1.158329	0.913159	0.338382	0.328396	29.3	8.3
684	1.158898	0.910725	0.336293	0.334530	29.3	8.3
685	1.157818	0.910535	0.337453	0.332800	29.5	8.5
686	1.159682	0.913349	0.336824	0.330873	29.0	8.1
687	1.157363	0.913253	0.335933	0.334920	29.2	8.2
688	1.162718	0.910349	0.338755	0.333221	29.3	8.2
689	1.158268	0.90990

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(10,4))

ax[0].plot(result_dict['loss_history_train'])
ax[0].plot(result_dict['loss_history_test'])
ax[0].legend(['train', 'test'])
ax[0].set_title('Loss', size=20)

ax[1].plot(result_dict['avgp_history_train'])
ax[1].plot(result_dict['avgp_history_test'])
ax[1].legend(['train', 'test'])
ax[1].set_title('Accuracy', size=20)
ax[1].set_ylim(0.3,0.80)

# Mir_Eval stats

In [12]:
print('AvgP\tP\tR\tAcc\tETot\tESub\tEmiss\tEfa')
Accavg = 0
Etotavg = 0
for songid in test_set.rec_ids:
    Y_pred, Y_true = musicnet.get_piano_roll(songid, test_set, model,
                                             window=window, m=m, stride=-1)
    Yhatpred = Y_pred > 0.4
    _,_,_,Acc,Etot = musicnet.get_mir_accuracy(Y_pred, Y_true, m=m)
    Accavg += Acc
    Etotavg += Etot

AvgP	P	R	Acc	ETot	ESub	Emiss	Efa
29.72	54.36	10.30	0.09	0.90	0.08	0.82	0.01
40.67	65.72	13.02	0.12	0.88	0.06	0.81	0.01
25.29	54.64	2.96	0.03	0.98	0.02	0.95	0.01
39.70	55.37	21.80	0.19	0.88	0.07	0.71	0.10
20.41	65.79	1.40	0.01	0.99	0.00	0.98	0.01
16.80	40.00	3.30	0.03	0.98	0.04	0.93	0.01
33.82	49.87	8.20	0.08	0.93	0.07	0.85	0.01
40.90	59.02	24.36	0.21	0.79	0.13	0.62	0.04
51.44	64.98	24.35	0.22	0.80	0.08	0.67	0.05
36.36	50.48	17.46	0.15	0.87	0.13	0.69	0.04


In [13]:
print('Average Accuracy: \t{:2.2f}\nAverage Error: \t\t{:2.2f}'
      .format(Accavg/len(test_set.rec_ids)*100, Etotavg/len(test_set.rec_ids)*100))

Average Accuracy: 	11.25
Average Error: 		89.98


# Saving weights and results

In [15]:
torch.save(model.state_dict(), './weights/three-layer-e2e-sin+cos')
with open('./result_dict/three-layer-e2e-sin+cos', 'wb') as f:
    pickle.dump(result_dict, f)