In [1]:
import os
import gc
import time
import numpy as np
import IPython.display as ipd
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from tqdm.notebook import tqdm

os.chdir(os.path.dirname("../src/"))

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim

# Set seeds for reproducibility
torch.manual_seed(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [3]:
from sparl.datasets import librispeech_loader
from sparl.models import LRClassifier
from sparl.utils import ConfigObject

# Hyperparameters

In [4]:
N_BITS = 8

params = {
    'loader_params': {
        'batch_size': 128, 
        'shuffle': False, 
        'num_workers': 0,
        'ls_root': '../data/',
        'url': 'test-clean',
        'cipic_root': '../data/CIPIC_hrtf_database/standard_hrir_database/',
        'n_mels': 128, 
        'seconds': .5,
        'n_bits': N_BITS
    },

    'model_path': '../models/last_model.pt'
}

# Data Loaders

In [5]:
loader = librispeech_loader(**params['loader_params'])

# Model

In [6]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [7]:
model = LRClassifier()
model.load_state_dict(torch.load(params['model_path']))
model = model.to(device)

In [8]:
m_loss = nn.BCEWithLogitsLoss()

# Setup Execution

In [9]:
def evaluate(model, loader, loss_fn, threshold=.5):
    model.eval()
    heldout_loss = 0
    preds = []
    targets = []
    sigmoid = nn.Sigmoid()

    with torch.no_grad():
        for idx, (melspec, target, _) in enumerate(loader):
            melspec = melspec.to(device)
            target = target.to(device)
            out = model(melspec)

            heldout_loss += loss_fn(out, target).item()*len(target)
            preds.append(sigmoid(out).detach().cpu().numpy())
            targets.append(target.detach().cpu().numpy())

    heldout_loss /= len(loader)
    
    # Compute the accuracy
    preds = np.concatenate(preds, axis=0)
    preds = np.where(preds < threshold, 0, 1)
    targets = np.concatenate(targets, axis=0)
    heldout_acc = accuracy_score(targets, preds)
    
    return heldout_loss, heldout_acc

# Evaluate on Librispeech

In [10]:
loss_list, acc_list = [], []
for _ in tqdm(range(10)):
    loss, acc = evaluate(model, loader, m_loss)
    loss_list.append(loss)
    acc_list.append(acc)

HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




In [11]:
print("Librispeech loss: mean = {:.2f} | std = {:.2f}".format(np.mean(loss_list), np.std(loss_list)))
print("Librispeech acc: mean = {:.2f} | std = {:.2f}".format(np.mean(acc_list), np.std(acc_list)))

Librispeech loss: mean = 42.05 | std = 1.79
Librispeech acc: mean = 0.89 | std = 0.01
