In [6]:
# Import relevant modules and setup for calling glmnet
%matplotlib inline

import time
import sys
import os
import re
from itertools import compress
from glob import glob
import pickle

import scipy, importlib, pprint, matplotlib.pyplot as plt, warnings
from scipy.io import loadmat

import numpy as np
import glmnet_python
from glmnet import glmnet; from glmnetPlot import glmnetPlot
from glmnetPrint import glmnetPrint; from glmnetCoef import glmnetCoef; from glmnetPredict import glmnetPredict
from cvglmnet import cvglmnet; from cvglmnetCoef import cvglmnetCoef
from cvglmnetPlot import cvglmnetPlot; from cvglmnetPredict import cvglmnetPredict


In [11]:
data_prefix = '/mnt/nfs/proj/in-vitro/Leonardo/cf_data'
probe = 'CF025'
probe_file = os.path.join(data_prefix, f'{probe}.npz')
probe_data = np.load(probe_file)
x_to_pred = probe_data['x']
y_to_pred = probe_data['y']

In [12]:
model_file = '/mnt/nfs/proj/in-vitro/Leonardo/glmnet/fits/cf_alpha_1.0_2x2x2.pickle'
with open(model_file, 'rb') as f:
    models, ranges, analytes = pickle.load(f)

names = ['DA', '5HT', 'pH', 'NE']

# generate the predictions for each model
nx = len(models)
y_hats = np.zeros((len(models), y_to_pred.shape[0], y_to_pred.shape[1]))
for (ix, x) in enumerate(models):
    print(f'{ix} of {nx} ({x})... ', end='')
    model = models[x]
    start_time = time.time()
    y_hat = cvglmnetPredict(model, newx = x_to_pred, s='lambda_1se') 
    y_hats[ix,:,:] = y_hat[:,:,0]
    print(" took %s seconds" % (time.time() - start_time))

# compute the differences to original intervals
diff_y_hats = np.zeros(y_hats[:,:,analytes].shape)
for (ix,x) in enumerate(models):
    for ia in range(len(analytes)):
        diff_y_hats[ix,:,ia] = (y_hats[ix,:,analytes[ia]] - ranges[ia][x[ia]])**2

# find the model with sum of predictions closest to original intervals
model_e = np.sqrt(np.sum(diff_y_hats,axis=2)) # take sqrt to make it easier to debug, doesn't change the ordering
min_idx = model_e.argmin(axis=0)

# compute the RMSE for each sample (there is certainly a more pythonic way to do that, but that works)
rmse = np.zeros((4,), dtype=np.float64)
for (sample_idx, model_idx) in enumerate(min_idx):
    rmse += (y_hats[model_idx, sample_idx, :] - y_to_pred[sample_idx, :])**2/y_to_pred.shape[0]

rmse = np.sqrt(rmse)

for (armse, name) in zip(rmse,names):
    print('%s: %4.5f'%(name,armse), flush=True)