In [11]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [12]:
import os
import warnings
import signal
import numpy as np
import torch
import torch.nn as nn
import torch_optimizer
import matplotlib.pyplot as plt
from time import time
from musicnet_dataset import MusicNet
from torch.utils.data import DataLoader
from tqdm.auto import tqdm
from sklearn.metrics import average_precision_score
from torch.cuda.amp import autocast, GradScaler

In [13]:
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
warnings.filterwarnings("ignore")

N_EPOCHS = 2000     
EPOCH_SIZE = 2_000 
EVAL_SIZE = 1_000
BATCH_SIZE = 16
SMOOTH = 0.01
kwargs = {'pin_memory': True}
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [14]:
window_size = 2048
n_hidden= 48*4
learning_rate = 0.00125 * np.sqrt(96 / n_hidden)
lr_plateau_factor = 0.7
lr_plateau_patience = 20
epsilon = 1e-5 #like tf code
weight_decay = 0.01 #like in tf code

In [15]:
from torch.optim.lr_scheduler import ReduceLROnPlateau

#Not in use because it seems to never decrease the lr 
class CustomLRScheduler(ReduceLROnPlateau):  # like the scheduler in the tensorflow code
    def __init__(self, optimizer, patience=4, factor=0.7, min_lr=1e-10, verbose=False):
        self.patience = patience
        self.factor = factor
        self.min_lr = min_lr
        self.verbose = verbose
        self.loss_history = []
        self.prev_loss = [1000000] * 7
        super(CustomLRScheduler, self).__init__(optimizer)

    def step(self, avg_loss):
        self.prev_loss.append(avg_loss)
        if min(self.prev_loss[-(self.patience-1):]) > min(self.prev_loss[-self.patience:]):
            self.prev_loss = [1000000] * 7
            lr = 0.0
            count = 0
            for param_group in self.optimizer.param_groups:
                param_group['lr'] = max(param_group['lr'] * self.factor, self.min_lr)
                lr+=param_group['lr']
                count+=1
            if self.verbose:
                print(f'Reducing learning rate to:{lr/count}')


In [16]:
from musicnet_model import MusicNetModel

torch.cuda.empty_cache()
scaler = GradScaler()
model = MusicNetModel(window_size,n_hidden)
#model.load_state_dict(torch.load('./model.pth'),strict=False)
model.to(device)
print("Count of learnable parameters: ",sum(p.numel() for p in model.parameters() if p.requires_grad))

optimizer = torch_optimizer.RAdam(model.parameters(), lr=learning_rate,eps=epsilon,weight_decay=weight_decay) 
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=lr_plateau_factor,patience=lr_plateau_patience,verbose=True,mode='min',min_lr=5.0954e-05) #CustomLRScheduler(optimizer, factor=lr_plateau_factor,patience=lr_plateau_patience,verbose=True) 

loss_fn = nn.BCEWithLogitsLoss(reduction='none')
loss_fn.to(device)

step = 0
epoch = 0

Count of learnable parameters:  3204000


In [17]:
with MusicNet("../../data", train=True, download=False, window=window_size, epoch_size=EPOCH_SIZE, pitch_shift=0) as train_dataset,\
    MusicNet("../../data", train=False, download=False, window=window_size, epoch_size=EVAL_SIZE, pitch_shift=0) as test_dataset:
    
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, drop_last=True,num_workers=12)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, drop_last=True,num_workers=4)
    while epoch<=N_EPOCHS:
        epoch+=1
        losses = []
        model.train()
        for inputs, targets in train_loader:
            optimizer.zero_grad()
            with autocast(dtype=torch.float32):
                loss, result = model(inputs.unsqueeze(-2).to(device),targets.to(device),loss_fn,SMOOTH)
            scaler.scale(loss).backward()

            #torch.nn.utils.clip_grad_norm_(model.parameters(), 10.0) works very bad 

            scaler.step(optimizer)
            scaler.update()
            step+=1
            losses.append(loss.item())
            if step%100==0:
                avgLoss = np.mean(losses[-100:])
                scheduler.step(avgLoss)
            
        print(f"Train. Epoch {epoch}, loss: {np.mean(losses[-100:]):.3f}")
        if epoch%3==0:
            all_targets = []
            all_preds = []
            model.eval()
            for inputs, targets in test_loader:
                with torch.no_grad():
                    _,result = model(inputs.unsqueeze(-2).to(device),targets.to(device),loss_fn,SMOOTH)
                    targets = targets[:, window_size//2, :].squeeze(1)
                    all_targets += list(targets.numpy())
                    all_preds += list(result.detach().cpu().numpy())

            targets_np = np.array(all_targets)
            preds_np = np.array(all_preds)
            mask = targets_np.sum(axis=0) > 0
            aps = average_precision_score(targets_np[:, mask], preds_np[:, mask])
            print(f"Epoch {epoch}. APS: {aps : .2%}.")

In [13]:
torch.save(model.state_dict(), './model.pth')

In [10]:
dummy_input = torch.randn(BATCH_SIZE,1, window_size).cuda()
torch.onnx.export(model, (dummy_input), 'rse_pytorch2048.onnx')