In [1]:
import numpy as np
import pandas as pd
import os
from volume_estimation import modeling, evaluation

Using device: cuda
Using device: cuda



In [51]:
FEATURES_DIR = '/scratch/ci411/sonos_rirs/features/'
#adjust these variables for your own file system

feature_set = '081522_20k/081522_20k_prop1'

targets = ['vol']#, 'rt60']

data_path = os.path.join(FEATURES_DIR, feature_set, 'feature_df.csv')
feat_df = pd.read_csv(data_path)
log = True
normalize_targets = True

In [52]:
if log:
    for target in targets:
        feat_df[target] = np.log(feat_df[target])

if normalize_targets:
    for target in targets:
        feat_df[target] = feat_df[target]/feat_df[target].max()
        
for target in targets:
    print(feat_df[target].max())
    print(feat_df[target].min())

1.0
0.2486723526394655


In [53]:
smol_df = feat_df.sample(100)
dataloader = modeling.create_dataloader(smol_df, targets=targets, batch_size=10)

In [54]:
smol_df['vol'].max()

0.9845110150444752

In [55]:
features, labels = next(iter(dataloader))
print(f"Feature batch shape: {features.size()}")
print(f"Labels batch shape: {labels.size()}")

input_height = features.size()[2]
input_width = features.size()[3]

print(labels)

Feature batch shape: torch.Size([10, 1, 30, 1997])
Labels batch shape: torch.Size([10, 1])
tensor([[0.4552],
        [0.8452],
        [0.9149],
        [0.5460],
        [0.5784],
        [0.4743],
        [0.9149],
        [0.4310],
        [0.4951],
        [0.3008]])


In [56]:
n_out = len(targets)
multi_model = modeling.Baseline_Model((input_height, input_width), n_out=n_out)

In [57]:
pred = multi_model(features)
print(pred.size())


torch.Size([10, 1])


In [58]:
import torch

from torch.nn import Conv2d, AvgPool2d, ReLU, Dropout, Flatten, Linear, Sequential, Module, MSELoss

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

#metrics functions
def MSE(output, target, is_loss=False):
    loss = torch.mean((output - target)**2, 0, keepdim=True)
    if is_loss:
        loss = torch.sum(loss, dim=1, keepdim=False)
    return loss

def Bias(output, target):
    loss = torch.mean(output - target, 0, keepdim=True)
    return loss

def CovStep(output, target, output_mean, target_mean):
    loss = torch.mean(((output - output_mean) * (target - target_mean)), 0, keepdim=True)
    return loss

def MeanAbsLogStep(output, target, log=True):
    #convert out of log
    if log:
        vol_pred = 10**output
        vol_target = 10**target
    else:
        vol_pred = output
        vol_target = target
    loss = torch.mean(torch.abs(torch.log(torch.abs(vol_pred/vol_target))), 0, keepdim=True)
    return loss

def torch_to_numpy(tensor):
    return tensor.detach().cpu().numpy().flatten()

def compute_eval_metrics(dataloader, model, log=True, verbose=False):
    target_sum = 0
    pred_sum = 0
    n_steps = 0
    if verbose:
        print("Computing sums...")
        dataloader_iter = tqdm(dataloader)
    else:
        dataloader_iter = dataloader
    for (x,y) in dataloader_iter:        
        (x, y) = (x.to(device), y.to(device))
        pred = model(x)
        target_sum += y.cpu().numpy()
        pred_sum += pred.detach().cpu().numpy()
        n_steps += 1
        del x, y
    
    torch.cuda.empty_cache()
    
    target_mean = torch.tensor(target_sum/n_steps).to(device)
    pred_mean = torch.tensor(pred_sum/n_steps).to(device)
        
    mse = 0
    mean_error = 0
    cov = 0
    abs_log_ratio = 0
    
    var_pred = 0 #technically var * N but gets cancelled out in Pearson calculation
    var_target = 0 
    
    if verbose:
        print("Computing metrics...")
        dataloader_iter = tqdm(dataloader)
    else:
        dataloader_iter = dataloader
    for (x,y) in dataloader_iter:          
        (x, y) = (x.to(device), y.to(device))
        pred = model(x)
        
        mse += MSE(pred, y)
        mean_error += Bias(pred, y)
        cov += CovStep(pred, y, pred_mean, target_mean)
        abs_log_ratio += MeanAbsLogStep(pred, y, log=log)
        
        var_pred += MSE(pred, pred_mean)
        var_target += MSE(y, target_mean)
                    
                            
        del x, y
        
    out_dict = {}
    out_dict['mse'] = torch_to_numpy(mse / n_steps)
    out_dict['bias'] = torch_to_numpy(mean_error / n_steps)
    out_dict['pearson_cor'] = torch_to_numpy(cov/(torch.sqrt(var_pred) * torch.sqrt(var_target)))
    out_dict['mean_mult'] = torch_to_numpy(torch.exp(abs_log_ratio/n_steps))
    out_dict['var_ratio'] = torch_to_numpy(torch.sqrt(var_pred) / torch.sqrt(var_target))
    
    return out_dict

Using device: cuda


In [59]:
multi_model.to(device)
compute_eval_metrics(dataloader, multi_model, log=log)

{'mse': array([0.39782768]),
 'bias': array([-0.60430089]),
 'pearson_cor': array([0.08741394]),
 'mean_mult': array([4.02069279]),
 'var_ratio': array([0.00731228])}

In [64]:
_, model_spec, model_state = evaluation.get_model_hist_spec_state('prop5_vol_092322', '092322_vol_redo_20k')
feature_df = pd.read_csv(model_spec['data_path'])
log = True
targets = ['vol']
test_dataloader = modeling.create_dataloader(feature_df[feature_df['split']=='test'],  targets=targets)

In [65]:
features, labels = next(iter(test_dataloader))
input_height = features.size()[2]
input_width = features.size()[3]

model = modeling.Baseline_Model((input_height, input_width)).to(device)
model.load_state_dict(torch.load(model_state, map_location=device))
test_metrics = compute_eval_metrics(test_dataloader, model, log=log)

In [66]:
test_metrics

{'mse': array([17462136.68156819]),
 'bias': array([-2543.55373771]),
 'pearson_cor': array([-0.02179641]),
 'mean_mult': array([inf]),
 'var_ratio': array([5.12452341e-05])}

In [67]:
evaluation.compute_eval_metrics(test_dataloader, model, log=log)

{'mse': 17462210.372850593,
 'bias': -2543.5545107922803,
 'pearson_cor': -0.0805756348177095,
 'mean_mult': inf,
 'var_ratio': 5.324040925566078e-05}