In [1]:
import os,sys,signal, copy
import math

import pickle
import numpy as np                                       # fast vectors and matrices
import matplotlib.pyplot as plt                          # plotting
from scipy.fftpack import fft
from scipy.signal.windows import hann

import musicnet

from time import time

sys.path.insert(0,'lib/')
import config
import diagnosticsP3
# import base_model

from sklearn.metrics import average_precision_score

os.environ['CUDA_DEVICE_ORDER']='PCI_BUS_ID'   # see issue #152
os.environ['CUDA_VISIBLE_DEVICES']='0'

import torch
from torch.nn.functional import conv1d, mse_loss
from torchcontrib.optim import SWA

%matplotlib inline

In [2]:
# lvl1 convolutions are shared between regions
m = 128
k = 500              # frequency bins
n_fft = 4096         # window size
window = 16384 # total number of audio samples?
stride = 512
batch_size = 500
regions = 1 + (window - n_fft)//stride

def worker_init(args):
    signal.signal(signal.SIGINT, signal.SIG_IGN) # ignore signals so parent can handle them
    np.random.seed(os.getpid() ^ int(time())) # approximately random seed for workers
kwargs = {'num_workers': 20, 'pin_memory': True, 'worker_init_fn': worker_init}

root = '../'
train_set = musicnet.MusicNet(root=root, train=True, download=True, window=window)#, pitch_shift=5, jitter=.1)
test_set = musicnet.MusicNet(root=root, train=False, window=window, epoch_size=50000)

In [3]:
train_loader = torch.utils.data.DataLoader(dataset=train_set,batch_size=batch_size,**kwargs)
test_loader = torch.utils.data.DataLoader(dataset=test_set,batch_size=batch_size,**kwargs)

In [4]:
def create_filters(n_fft, freq_bins=None, low=50,high=6000, mode="stft"):
    if freq_bins==None:
        freq_bins = n_fft//2+1
    
    s = np.arange(0, n_fft, 1)
    wsin = np.empty((freq_bins,1,n_fft), dtype=np.float32)
    wcos = np.empty((freq_bins,1,n_fft), dtype=np.float32)
    start_freq = low
    end_freq = high
    # num_cycles = start_freq*d/44000.
    # scaling_ind = np.log(end_freq/start_freq)/k
    
    if mode=="fft":
        window_mask = 1
    elif mode=="stft":
        window_mask = hann(n_fft, sym=False) # same as 0.5-0.5*np.cos(2*np.pi*x/(k))
    else:
        raise Exception("Unknown mode, please chooes either \"stft\" or \"fft\"")
        

    for k in range(freq_bins): # Only half of the bins contain useful info
        wsin[k,0,:] = window_mask*np.sin(2*np.pi*k*s/n_fft)
        wcos[k,0,:] = window_mask*np.cos(2*np.pi*k*s/n_fft)
    
    return wsin,wcos

def create_filtersv2(n_fft, freq_bins=None, low=50,high=6000, mode="fft"):
    if freq_bins==None:
        freq_bins = n_fft//2+1
    
    s = torch.arange(0, n_fft, 1.)
    wsin = torch.empty((freq_bins,1,n_fft))
    wcos = torch.empty((freq_bins,1,n_fft))
    start_freq = low
    end_freq = high
    # num_cycles = start_freq*d/44000.
    # scaling_ind = np.log(end_freq/start_freq)/k
    
    if mode=="fft":
        window_mask = 1
    elif mode=="stft":
        window_mask = 0.5-0.5*torch.cos(2*math.pi*s/(n_fft)) # same as hann(n_fft, sym=False)
    else:
        raise Exception("Unknown mode, please chooes either \"stft\" or \"fft\"")
        

    for k in range(freq_bins): # Only half of the bins contain useful info
        wsin[k,0,:] = window_mask*torch.sin(2*math.pi*k*s/n_fft)
        wcos[k,0,:] = window_mask*torch.cos(2*math.pi*k*s/n_fft)
    
    return wsin,wcos

In [5]:
criterion = torch.nn.MSELoss()

In [9]:
class Model(torch.nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        # Create filter windows
        wsin, wcos = create_filters(n_fft,k, mode="stft")
        with torch.cuda.device(0):
            self.wsin = torch.Tensor(wsin).cuda()
            self.wcos = torch.Tensor(wcos).cuda()
            
        # Creating Layers
        self.linear = torch.nn.Linear(regions*k, m)
        torch.nn.init.constant_(self.linear.weight, 0) # initialize
        
    def forward(self,x):
        zx = conv1d(x[:,None,:], self.wsin, stride=stride).pow(2) \
           + conv1d(x[:,None,:], self.wcos, stride=stride).pow(2)
        return self.linear(torch.log(zx + 10e-8).view(x.data.size()[0],regions*k))

In [10]:
loss_history = []
avgp_history = []

In [11]:
model = Model()
model.cuda()

Model(
  (linear): Linear(in_features=12500, out_features=128, bias=True)
)

# Random weights

In [None]:
avg = .9998
optimizer = torch.optim.SGD(model.parameters(), lr=0.0001, momentum=.95)
# optimizer = SWA(base_opt, swa_start=0, swa_freq=1, swa_lr=0.000001)
try:
    with train_set, test_set:
        total_train_i = len(train_loader)
        total_test_i = len(test_loader)
        print("squre loss\tavg prec\ttime\t\tutime")
        for epoch in range(50):
            t = time()
            for i, (x,y) in enumerate(train_loader):
                print(f"{i}/{total_train_i} batches", end = '\r')
                optimizer.zero_grad()
                
                # making x and y into pytorch dealable format
                x = x.cuda()
                y = y.cuda()
                
                loss = criterion(model(x),y)
                loss.backward()
                optimizer.step()
                    
            t1 = time()
            avgp, loss = 0.,0.
            
#             optimizer.swap_swa_sgd() # change to average weight
            
            # For testing
            yground = torch.FloatTensor(batch_size*len(test_loader), m) # what not do this together with loss
            yhat = torch.FloatTensor(batch_size*len(test_loader), m)
            
            
            for i, (x_test,y_test) in enumerate(test_loader):
                print(f"{i}/{total_test_i} batches", end = '\r')
                x_test = x_test.cuda()
                y_test = y_test.cuda()
                yhatvar = model(x_test)
                loss += criterion(yhatvar, y_test).item() #getting the number
                yground[i*batch_size:(i+1)*batch_size,:] = y_test.data
                yhat[i*batch_size:(i+1)*batch_size,:] = yhatvar.data
            avgp = average_precision_score(yground.numpy().flatten(),yhat.numpy().flatten())
#             avgp = average_precision_score(yground.numpy().flatten(),yhat.numpy().flatten())
            loss_history.append(loss/len(test_loader))
            avgp_history.append(avgp)
            print('{:2f}\t{:2f}\t{:2f}\t{:2f}'.format(loss_history[-1],avgp_history[-1],time()-t, time()-t1))
            
            
except KeyboardInterrupt:
    print('Graceful Exit')
else:
    print("Finsihed")

squre loss	avg prec	time		utime
0.012450	0.562055	12.256832	6.137605
0.012094	0.596716	12.038667	6.253003
0.011704	0.620423	11.966404	6.193282
0.011659	0.624608	12.031651	6.275856
0.011539	0.628857	12.086549	6.250136
0.011474	0.635259	12.032470	6.288475
0.011496	0.634933	12.079718	6.256795
0.011283	0.640843	11.949706	6.242981
0.011248	0.646653	11.929277	6.249344
0.011695	0.635702	11.988232	6.244269
0.011338	0.645229	11.841899	6.230120
0.011432	0.645255	11.995857	6.246700
0.011710	0.633860	12.066398	6.359553
0.011339	0.648602	12.050474	6.354494
0.011373	0.647530	12.020749	6.214159
0.011342	0.648627	12.074949	6.194954
0.011476	0.648462	12.005205	6.232392
0.011206	0.655531	11.918997	6.269171
0.011245	0.652656	11.998214	6.291804
0.011445	0.648227	11.849743	6.202818
0.011096	0.654026	12.017773	6.303868
0.011123	0.655221	11.913939	6.132963
0.011164	0.656392	12.078338	6.254886
0.011541	0.642485	11.980838	6.213839
0.011246	0.653006	12.042090	6.327610
0.011606	0.650934	11.976974	6.242506
0.0111

# Mirex stats

In [7]:
mse_test, Yhat, Y, mse_breakdown, avp_breakdown = model.sample_records(config.test_ids, 7500, fixed_stride=512)
avp_test = average_precision_score(Y.flatten(),Yhat.flatten())
print avp_test

0.404754726515


In [8]:
Accavg = Etotavg = 0
for i in range(3):
    _,_,_,Acc,Etot = visuals.mirex_statistics(model,i,threshold=.4)
    Accavg += Acc
    Etotavg += Etot

print Accavg/3.,Etotavg/3.

AvgP	P	R	Acc	ETot	ESub	Emiss	Efa
41.71	63.06	22.18	0.20	0.80	0.11	0.67	0.02
AvgP	P	R	Acc	ETot	ESub	Emiss	Efa
26.08	45.29	10.12	0.09	0.95	0.07	0.83	0.05
AvgP	P	R	Acc	ETot	ESub	Emiss	Efa
47.68	64.24	21.16	0.19	0.83	0.07	0.71	0.04
0.158590506342 0.860359774327


# Extended test set stats

In [7]:
mse_test, Yhat, Y, mse_breakdown, avp_breakdown = model.sample_records(config.test_ids_ext, 7500, fixed_stride=512)
avp_test = average_precision_score(Y.flatten(),Yhat.flatten())
print avp_test

0.404689218856


In [15]:
Accavg = Etotavg = 0
for i in range(10):
    _,_,_,Acc,Etot = diagnostics.mirex_statistics(model,i,threshold=.4)
    Accavg += Acc
    Etotavg += Etot

print Accavg/10.,Etotavg/10.

AvgP	P	R	Acc	ETot	ESub	Emiss	Efa
41.75	63.06	22.18	0.20	0.80	0.11	0.67	0.02
AvgP	P	R	Acc	ETot	ESub	Emiss	Efa
26.16	45.09	10.12	0.09	0.95	0.07	0.83	0.05
AvgP	P	R	Acc	ETot	ESub	Emiss	Efa
47.70	63.96	21.19	0.19	0.83	0.07	0.71	0.04
AvgP	P	R	Acc	ETot	ESub	Emiss	Efa
31.72	34.74	31.58	0.20	0.90	0.38	0.31	0.22
AvgP	P	R	Acc	ETot	ESub	Emiss	Efa
44.54	54.91	30.48	0.24	0.78	0.16	0.53	0.09
AvgP	P	R	Acc	ETot	ESub	Emiss	Efa
45.55	67.25	18.61	0.17	0.83	0.08	0.74	0.01
AvgP	P	R	Acc	ETot	ESub	Emiss	Efa
40.71	54.03	29.58	0.24	0.84	0.11	0.59	0.14
AvgP	P	R	Acc	ETot	ESub	Emiss	Efa
44.91	65.45	18.29	0.17	0.85	0.06	0.75	0.03
AvgP	P	R	Acc	ETot	ESub	Emiss	Efa
47.26	64.15	16.07	0.15	0.86	0.07	0.77	0.02
AvgP	P	R	Acc	ETot	ESub	Emiss	Efa
35.24	50.46	11.62	0.10	0.90	0.10	0.78	0.01
0.174316824517 0.854800543932
