In [27]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import matplotlib.pylab as pylab

# Useful plotting thingies:
%matplotlib inline
plt.style.use('ggplot')
pylab.rcParams['figure.figsize'] = 16, 16


In [28]:

def make_slice_volume(l_segs, r_segs, l_D=[], r_D=[]):
    mappings = tb.open_file('/projects/francisco/data/caffe/standardized/data_mappings.h5', 'r')
    l_baseline_mask = mappings.get_node('/l_datamask')[:]
    l_volmask = mappings.get_node('/l_volmask')[:]
    r_baseline_mask = mappings.get_node('/r_datamask')[:]
    r_volmask = mappings.get_node('/r_volmask')[:]
    mappings.close()
    
    l_vols = []
    for HC_input in l_segs:
        flatvol = np.zeros(np.prod(l_volmask.shape))
        flatvol[l_baseline_mask] = HC_input
        l_vols.append(flatvol.reshape(-1, l_volmask.shape[2]).T)

    r_vols = []
    for HC_input in r_segs:
        flatvol = np.zeros(np.prod(r_volmask.shape))
        flatvol[r_baseline_mask] = HC_input
        r_vols.append(flatvol.reshape(-1, r_volmask.shape[2]).T)

    r_vols = np.array(r_vols)
    l_vols = np.array(l_vols)
    
    r_M, r_D = slicify(r_vols, r_D)
    l_M, l_D = slicify(l_vols, l_D)
    return l_M, r_M, l_D, r_D

def slicify(vol, disjoints=[]):
    xmax = vol.shape[2] - 1
    
    if disjoints == []:
        x = 0
        while x < xmax and np.sum(vol[:,:,x]) == 0:
            x += 1
            if np.sum(vol[:,:,x] > 0):
                cur_l = x
                while x < xmax and np.sum(vol[:,:,x]) > 0:
                    x += 1
                if not x == xmax:
                    disjoints.append((cur_l, x))

    M = np.zeros(shape=(vol.shape[0], len(disjoints)))
    
    for u in range(vol.shape[0]):
        for j in range(len(disjoints)):
            l, r = disjoints[j]
            M[u,j] = np.sum(vol[u,:,l:r])

    
    return M, disjoints

In [30]:
import tables as tb
from sklearn.preprocessing import Normalizer, scale

# Set up the experimental matrices:
targets = ['ADAS11_bl', 'ADAS13_bl', 'MMSE_bl']


targets_regex = '|'.join(['^{}$'.format(t) for t in targets])
#modalities = ['CT', 'L_HC', 'R_HC']
modalities = ['CT']
l_segment_regex = '^L_HC_'
r_segment_regex = '^R_HC_'
modalities_regex = '|'.join(['^{}_'.format(m) for m in modalities])

matrices = []
l_D = []
r_D = []
for split in ['train', 'valid', 'test']:
    frame = pd.read_pickle('/projects/francisco/data/ADNI/cli_ct_seg_fused_{}.pkl'.format(split))
    ct = frame.filter(regex=modalities_regex).as_matrix()
    y_cur = frame.filter(regex=targets_regex).as_matrix()
    y_cur[np.isnan(y_cur)] = np.nanmean(y_cur)
    l_segs = frame.filter(regex=l_segment_regex).as_matrix()
    r_segs = frame.filter(regex=r_segment_regex).as_matrix()
    l_M, r_M, l_D, r_D = make_slice_volume(l_segs, r_segs, l_D, r_D)
    l_vars = [l_M[:,i] for i in range(l_M.shape[1])]
    r_vars = [r_M[:,i] for i in range(r_M.shape[1])]
    vars = [y_cur] + l_vars + r_vars
    matrices.append(vars)
var_names = []
var_names.extend(['L_HC_V{}'.format(i) for i in range(l_M.shape[1])])
var_names.extend(['R_HC_V{}'.format(i) for i in range(r_M.shape[1])])
    

In [42]:
def adjusted_r2(y_hat, y, p, n):
    """
    And I quote:
    "adjusted R2 is a comparative measure of suitability of alternative nested sets of explanators"
    """
    r2 = metrics.r2_score(y_hat, y)
    adj_r2 = r2 - (1 - r2) * (p / n - p - 1)
    return r2
print l_M.shape
print r_M.shape
print var_names

(110, 33)
(110, 34)
['L_HC_V0', 'L_HC_V1', 'L_HC_V2', 'L_HC_V3', 'L_HC_V4', 'L_HC_V5', 'L_HC_V6', 'L_HC_V7', 'L_HC_V8', 'L_HC_V9', 'L_HC_V10', 'L_HC_V11', 'L_HC_V12', 'L_HC_V13', 'L_HC_V14', 'L_HC_V15', 'L_HC_V16', 'L_HC_V17', 'L_HC_V18', 'L_HC_V19', 'L_HC_V20', 'L_HC_V21', 'L_HC_V22', 'L_HC_V23', 'L_HC_V24', 'L_HC_V25', 'L_HC_V26', 'L_HC_V27', 'L_HC_V28', 'L_HC_V29', 'L_HC_V30', 'L_HC_V31', 'L_HC_V32', 'R_HC_V0', 'R_HC_V1', 'R_HC_V2', 'R_HC_V3', 'R_HC_V4', 'R_HC_V5', 'R_HC_V6', 'R_HC_V7', 'R_HC_V8', 'R_HC_V9', 'R_HC_V10', 'R_HC_V11', 'R_HC_V12', 'R_HC_V13', 'R_HC_V14', 'R_HC_V15', 'R_HC_V16', 'R_HC_V17', 'R_HC_V18', 'R_HC_V19', 'R_HC_V20', 'R_HC_V21', 'R_HC_V22', 'R_HC_V23', 'R_HC_V24', 'R_HC_V25', 'R_HC_V26', 'R_HC_V27', 'R_HC_V28', 'R_HC_V29', 'R_HC_V30', 'R_HC_V31', 'R_HC_V32', 'R_HC_V33']


In [51]:
from sklearn.linear_model import SGDRegressor, BayesianRidge, RidgeCV
from sklearn.decomposition import RandomizedPCA
from sklearn.dummy import DummyRegressor
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import SelectKBest, RFE
from sklearn.feature_selection import f_regression
from sklearn import metrics

num_targets = len(targets)
num_models = 2 # Includes dummy model
num_variables = len(var_names)
num_metrics = 2
k_classifiers = []
k_selectors = []
stats = np.zeros(shape=(num_variables, num_metrics, num_models, num_targets))
for i in range(num_variables):
    X, y = matrices[0][i+1].reshape((-1,1)), matrices[0][0]
    X_v, y_v = matrices[1][i+1].reshape((-1,1)), matrices[1][0]
    X_t, y_t = matrices[2][i+1].reshape((-1,1)), matrices[2][0]
    
    
    cur_triple = []
    cur_sel_triple = []
    for t in range(0,len(targets)):
        
        dummy_regressor = DummyRegressor(strategy='mean')
        classifier = RidgeCV(alphas=[0.1, 1.0, 10.0], cv=None, fit_intercept=True, scoring=None, normalize=False)
        classifiers = [classifier, dummy_regressor]
        
        # Train classifiers on transformed training X
        #print X
        #print y[:,t]
    
        classifier.fit(X, y[:,t])
        dummy_regressor.fit(X, y[:,t])
        
        # Save classifiers for visualizing on later
        cur_triple.append((classifier, dummy_regressor))

        # Predict on validation data and log stats
        y_hat_v = classifier.predict(X_v)
        y_hat_vd = dummy_regressor.predict(X_v)
        stats[i,0,0,t] =  metrics.mean_squared_error(y_hat_v, y_v[:,t])
        stats[i,1,0,t] =  adjusted_r2(y_hat_v, y_v[:,t], X_v.shape[1], X_v.shape[0])
        stats[i,0,1,t] =  metrics.mean_squared_error(y_hat_vd, y_v[:,t])
        stats[i,1,1,t] =  adjusted_r2(y_hat_vd, y_v[:,t], X_v.shape[1], X_v.shape[0])
    k_selectors.append(cur_sel_triple)
    k_classifiers.append(cur_triple)



In [54]:
print stats.shape
print len(k_classifiers)


(67, 2, 2, 3)
67


In [56]:
from tabulate import tabulate

stats_table = np.zeros(shape=(len(var_names), 4, 3))
for i in range(3):
    stats_table[:,:,i] = np.vstack([stats[:, 0, 0, i],
                                    stats[:, 1, 0, i],
                                    stats[:, 0, 1, i],
                                    stats[:, 1, 1, i]]).T

print stats_table.shape
print tabulate(stats_table[:,:,0], ['MSE', 'R2', 'DMSE', 'DR2'])
# f = 0
# for v, vname in enumerate(var_names):
#     X, y = matrices[0][v+1].reshape((-1,1)), matrices[0][0]
#     X_v, y_v = matrices[1][v+1].reshape((-1,1)), matrices[1][0]
    
#     for t, tname in enumerate(targets):
        
#         regr = k_classifiers[v][t][0]
#         dummy = k_classifiers[v][t][1]
#         f += 1  
#         print stats[v,1,0,t]
#         ax=plt.subplot(len(var_names),3,f)
#         plt.title('{} vs {}'.format(vname, tname))
        
#         # Plot outputs
#         plt.scatter(X_v, y_v[:,t], marker='+', color='gray')
#         plt.plot(X_v, regr.predict(X_v), color='green', linewidth=1)
#         plt.plot(X_v, dummy.predict(X_v), color='red', linewidth=1)
#         plt.text(0.2, 0.9,'R^2={}'.format(stats[v,1,0,t]), horizontalalignment='center', verticalalignment='center', transform = ax.transAxes)

# plt.grid()    
# plt.show()



(67, 4, 3)
    MSE                 R2     DMSE    DR2
-------  -----------------  -------  -----
37.7367       -1.19592e+31  37.7299      0
37.7363        0            37.7299      0
37.735         0            37.7299      0
37.7321    -6424.75         37.7299      0
37.7312       -1.19575e+31  37.7299      0
37.7123  -109360            37.7299      0
37.5741    -2046.31         37.7299      0
37.6705    -4830.43         37.7299      0
37.7295       -1.32855e+30  37.7299      0
37.6762    -6824.6          37.7299      0
37.753    -32550.7          37.7299      0
37.6902    -4325.41         37.7299      0
37.6919    -1087.65         37.7299      0
37.8722     -896.126        37.7299      0
37.857     -1139.57         37.7299      0
37.7695    -6947.78         37.7299      0
37.7765    -3795.83         37.7299      0
37.7256  -322251            37.7299      0
37.7285       -2.05048e+06  37.7299      0
37.8016    -4833.95         37.7299      0
37.8998    -1838.61         37.7299      0
