In [1]:
import numpy as np
import nibabel as nib
import os

import matplotlib.pyplot as plt
import seaborn as sns

from pylab import rcParams

from sklearn.preprocessing import scale, StandardScaler
from sklearn.linear_model import Lasso, Ridge

import pandas as pd

from tqdm import tqdm_notebook as tqdm
from sklearn.model_selection import KFold

from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error as MAE

import gc

from keras.preprocessing.image import ImageDataGenerator
from keras.applications.inception_v3 import InceptionV3
from keras.applications.vgg19 import VGG19
from keras.applications.resnet50 import ResNet50
from keras.applications.xception import Xception
from keras.models import Model
from keras import backend as K

def make_age_list(files, age_df):
    age_list = []
    for file in files:
        ID = file[17:25]
        age_list.append(df.loc[df[0] == ID][1].values[0])
    return np.array(age_list)

Using TensorFlow backend.


In [2]:
def preprocess_data(data, k, axis = 1):
    std = StandardScaler()
    if axis == 0:
        mask = np.var(data[:,k,:,:],0)
    elif axis == 1:
        mask = np.var(data[:,:,k,:],0)
    else:
        mask = np.var(data[:,:,:,k],0)
    #old_mask = 0.00001 464
    mask[mask < 0.000001] = 0
    mask = ~mask.astype(bool)
    if axis == 0:
        new_data = np.zeros((data.shape[0], 145, 121, 3))
        for i in range(3):
            for j in range(121):
                new_data[:,j,:,i] = std.fit_transform(data[:,k-10+10*i,j,:])
            new_data[:,mask,i] = 0
        new_data = new_data[:,12:133,:,:]
    elif axis == 1:
        new_data = np.zeros((data.shape[0], 121, 121, 3))
        for i in range(3):
            for j in range(121):
                new_data[:,j,:,i] = std.fit_transform(data[:,j,k-10+10*i,:])
            new_data[:,mask,i] = 0
    elif axis == 2:
        new_data = np.zeros((data.shape[0], 121, 145, 3))
        for i in range(3):
            for j in range(121):
                new_data[:,j,:,i] = std.fit_transform(data[:,j,:,k-10+10*i])
            new_data[:,mask,i] = 0
        new_data = new_data[:,:,12:133,:]
    return new_data.astype(np.float16)

In [3]:
folder_wm = 'wm_data'
folder_gm = 'gm_data'

files_wm = os.listdir(folder_wm)
files_gm = os.listdir(folder_gm)

df = pd.read_csv('original_participant_data.tsv', header=None, sep='\t')
ages = make_age_list(files_wm,df)

gc.collect()
data = np.zeros((len(files_wm), 121,145,121, 2))
for sub in range(len(files_wm)):
    ID = files_wm[sub]
    img = nib.load(folder_wm + '/'+ID)
    data[sub,...,0] = img.get_data()
for sub in range(len(files_gm)):
    ID = files_wm[sub]
    
    
    #Its a dirty hack, but its a hackathon right? There is no time for smarter parsing
    ID = ID[:4] + '1' + ID[5:]

    img = nib.load(folder_gm + '/'+ID)
    data[sub,...,1] = img.get_data()

In [4]:
new_data = []
for i in range(2):
    new_data.append(preprocess_data(data[...,i],60, 0))
    new_data.append(preprocess_data(data[...,i],72, 1))
    new_data.append(preprocess_data(data[...,i],60, 2))
data = []
gc.collect()

22

In [5]:
xception = Xception(include_top=False, weights='imagenet', 
                        input_tensor=None, input_shape=(121,121,3), pooling=None, classes=1000)

xception_model = Model(inputs=xception.input, outputs=xception.get_layer('block14_sepconv2_act').output)
inp = xception_model.input                                           # input placeholder
outputs = [xception.layers[19].output]     # all layer outputs
functor = K.function([inp]+ [K.learning_phase()], outputs ) # evaluation function

xception_features = []

for i in tqdm(range(6)):
    xception_features.append(functor([new_data[i][:,:121,:121,:], 1.]))




In [6]:
kf = KFold(10)

predicted_ages = np.zeros(645)
model = Ridge()
features = np.concatenate([xception_features[0][0], xception_features[1][0], 
                              xception_features[2][0], xception_features[3][0],
                          xception_features[4][0], xception_features[5][0]], 1)
features = features.reshape(645, np.prod(features.shape[1:]))
#     chosen = np.random.choice(features.shape[1], 128000)
#     features = features[:,chosen]
for i_train, i_test in tqdm(kf.split(features, ages)):
    model.fit(features[i_train,:], ages[i_train])
    predicted_ages[i_test] = model.predict(features[i_test,:])
print(MAE(ages, predicted_ages))

Ill-conditioned matrix detected. Result is not guaranteed to be accurate.
Reciprocal condition number: 1.5292773714037367e-09
Ill-conditioned matrix detected. Result is not guaranteed to be accurate.
Reciprocal condition number: 1.6127221780237733e-09
Ill-conditioned matrix detected. Result is not guaranteed to be accurate.
Reciprocal condition number: 1.4905051637370548e-09
Ill-conditioned matrix detected. Result is not guaranteed to be accurate.
Reciprocal condition number: 1.4671509562802498e-09
Ill-conditioned matrix detected. Result is not guaranteed to be accurate.
Reciprocal condition number: 1.6007871694867504e-09
Ill-conditioned matrix detected. Result is not guaranteed to be accurate.
Reciprocal condition number: 3.1867127603391054e-08
Ill-conditioned matrix detected. Result is not guaranteed to be accurate.
Reciprocal condition number: 1.3032250834044135e-09
Ill-conditioned matrix detected. Result is not guaranteed to be accurate.
Reciprocal condition number: 1.5022508792483


4.65012173616


In [33]:
small_layers = []
sizes = []
for layer in xception.layers:
    if np.prod(layer.output.get_shape().as_list()[1:]) < 250000:
        small_layers.append(layer)
        sizes.append(np.prod(layer.output.get_shape().as_list()[1:]))

In [3]:
#reading white matter and age files

files = os.listdir('gm_data')
df = pd.read_csv('original_participant_data.tsv', header=None, sep='\t')
ages = make_age_list(files,df)

data = np.zeros((len(files), 121,145,121))
for sub in range(len(files)):
    img = nib.load('gm_data/'+files[sub])
    data[sub,...] = img.get_data()