In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import glob
import nibabel as nib
import os
import time

import pandas as pd
import numpy as np

from mricode.utils import log_textfile
from mricode.utils import copy_colab
from mricode.utils import return_iter
from mricode.utils import return_csv

from mricode.models.SimpleCNN import SimpleCNN
from mricode.models.DenseNet import MyDenseNet

import tensorflow as tf
from tensorflow.keras.layers import Conv3D
from tensorflow import nn
from tensorflow.python.ops import nn_ops
from tensorflow.python.framework import tensor_shape
from tensorflow.python.keras.engine.base_layer import InputSpec
from tensorflow.python.keras.utils import conv_utils

tf.__version__

'2.0.0'

In [3]:
tf.test.is_gpu_available()

True

In [4]:
path_output = './output/'
path_tfrecords = '/data2/res64/down/'
path_csv = '/data2/csv/'
filename_res = {'train': 'intell_residual_train.csv', 'val': 'intell_residual_valid.csv', 'test': 'intell_residual_test.csv'}
filename_final = filename_res
sample_size = 'allimages'
batch_size = 8
onlyt1 = False
modelname = 'runAllImages64_DenseNet_T1T2_'
Model = SimpleCNN
Model = MyDenseNet
t1_mean=1.3779395849814497
t1_std=3.4895845243139503
t2_mean=2.22435586968901
t2_std=5.07708743178319
ad_mean=1.3008901218593748e-05
ad_std=0.009966655860940228
fa_mean=0.0037552628409334037
fa_std=0.012922319568740915
md_mean=9.827903909139596e-06
md_std=0.009956973204022659
rd_mean=8.237404999587111e-06
rd_std=0.009954672598675338

In [5]:
train_iter, val_iter, test_iter = return_iter(path_tfrecords, sample_size, batch_size, onlyt1=onlyt1)



In [6]:
if False:
  t1_mean = 0.
  t1_std = 0.
  t2_mean = 0.
  t2_std = 0.
  ad_mean = 0.
  ad_std = 0.
  fa_mean = 0.
  fa_std = 0.
  md_mean = 0.
  md_std = 0.
  rd_mean = 0.
  rd_std = 0.
  n = 0.
  for b in train_iter:
      t1_mean += np.mean(b['t1'])
      t1_std += np.std(b['t1'])
      t2_mean += np.mean(b['t2'])
      t2_std += np.std(b['t2'])
      a = np.asarray(b['ad'])
      a = a.copy()
      a[np.isnan(a)] = 0
      ad_mean += np.mean(a)
      ad_std += np.std(a)
      a = np.asarray(b['fa'])
      a = a.copy()
      a[np.isnan(a)] = 0
      fa_mean += np.mean(a)
      fa_std += np.std(a)
      a = np.asarray(b['md'])
      a = a.copy()
      a[np.isnan(a)] = 0
      md_mean += np.mean(a)
      md_std += np.std(a)
      a = np.asarray(b['rd'])
      a = a.copy()
      a[np.isnan(a)] = 0
      rd_mean += np.mean(a)
      rd_std += np.std(a)
      n += np.asarray(b['t1']).shape[0]

  t1_mean /= n
  t1_std /= n
  t2_mean /= n
  t2_std /= n
  ad_mean /= n
  ad_std /= n
  fa_mean /= n
  fa_std /= n
  md_mean /= n
  md_std /= n
  rd_mean /= n
  rd_std /= n

KeyboardInterrupt: 

In [None]:
t1_mean, t1_std, t2_mean, t2_std, ad_mean, ad_std, fa_mean, fa_std, md_mean, md_std, rd_mean, rd_std

In [None]:
train_df, val_df, test_df, norm_dict = return_csv(path_csv, filename_final, False)

In [None]:
norm_dict

In [None]:
cat_cols = {'female': 2, 'race.ethnicity': 5, 'high.educ_group': 4, 'income_group': 8, 'married': 6}
num_cols = [x for x in list(val_df.columns) if '_norm' in x]

In [None]:
def calc_loss_acc(out_loss, out_acc, y_true, y_pred, cat_cols, num_cols, norm_dict):
  for col in num_cols:
    tmp_col = col
    tmp_std = norm_dict[tmp_col.replace('_norm','')]['std']
    tmp_y_true = tf.cast(y_true[col], tf.float32).numpy()
    tmp_y_pred = np.squeeze(y_pred[col].numpy())
    if not(tmp_col in out_loss):
      out_loss[tmp_col] = np.sum(np.square(tmp_y_true-tmp_y_pred))
    else:
      out_loss[tmp_col] += np.sum(np.square(tmp_y_true-tmp_y_pred))
    if not(tmp_col in out_acc):
      out_acc[tmp_col] = np.sum(np.square((tmp_y_true-tmp_y_pred)*tmp_std))
    else:
      out_acc[tmp_col] += np.sum(np.square((tmp_y_true-tmp_y_pred)*tmp_std))
  for col in list(cat_cols.keys()):
    tmp_col = col
    if not(tmp_col in out_loss):
      out_loss[tmp_col] = tf.keras.losses.SparseCategoricalCrossentropy()(tf.squeeze(y_true[col]), tf.squeeze(y_pred[col])).numpy()
    else:
      out_loss[tmp_col] += tf.keras.losses.SparseCategoricalCrossentropy()(tf.squeeze(y_true[col]), tf.squeeze(y_pred[col])).numpy()
    if not(tmp_col in out_acc):
      out_acc[tmp_col] = tf.reduce_sum(tf.dtypes.cast((y_true[col] == tf.argmax(y_pred[col], axis=-1)), tf.float32)).numpy()
    else:
      out_acc[tmp_col] += tf.reduce_sum(tf.dtypes.cast((y_true[col] == tf.argmax(y_pred[col], axis=-1)), tf.float32)).numpy()    
  return(out_loss, out_acc)

def format_output(out_loss, out_acc, n, cols, print_bl=False):
  loss = 0
  acc = 0
  output = []
  for col in cols:
    output.append([col, out_loss[col]/n, out_acc[col]/n])
    loss += out_loss[col]/n
    acc += out_acc[col]/n
  df = pd.DataFrame(output)
  df.columns = ['name', 'loss', 'acc']
  if print_bl:
    print(df)
  return(loss, acc, df)

@tf.function
def train_step(X, y, model, optimizer, cat_cols, num_cols):
  with tf.GradientTape() as tape:
    predictions = model(X)
    i = 0
    loss = tf.keras.losses.MSE(tf.cast(y[num_cols[i]], tf.float32), tf.squeeze(predictions[num_cols[i]]))
    for i in range(1,len(num_cols)):
      loss += tf.keras.losses.MSE(tf.cast(y[num_cols[i]], tf.float32), tf.squeeze(predictions[num_cols[i]]))
    for col in list(cat_cols.keys()):
      loss += tf.keras.losses.SparseCategoricalCrossentropy()(tf.squeeze(y[col]), tf.squeeze(predictions[col]))
  gradients = tape.gradient(loss, model.trainable_variables)
  mean_std = [x.name for x in model.non_trainable_variables if ('batch_norm') in x.name and ('mean' in x.name or 'variance' in x.name)]
  with tf.control_dependencies(mean_std):
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
  return(y, predictions, loss)

@tf.function
def test_step(X, y, model):
  predictions = model(X)
  return(y, predictions)

def epoch(data_iter, df, model, optimizer, cat_cols, num_cols, norm_dict):
  out_loss = {}
  out_acc = {}
  n = 0.
  n_batch = 0.
  total_time_dataload = 0.
  total_time_model = 0.
  start_time = time.time()
  for batch in data_iter:
    total_time_dataload += time.time() - start_time
    start_time = time.time()
    t1 = (tf.cast(batch['t1'], tf.float32)-t1_mean)/t1_std
    t2 = (batch['t2']-t2_mean)/t2_std
    ad = batch['ad']
    ad = tf.where(tf.math.is_nan(ad), tf.zeros_like(ad), ad)
    ad = (ad-ad_mean)/ad_std
    fa = batch['fa']
    fa = tf.where(tf.math.is_nan(fa), tf.zeros_like(fa), fa)
    fa = (fa-fa_mean)/fa_std
    md = batch['md']
    md = tf.where(tf.math.is_nan(md), tf.zeros_like(md), md)
    md = (md-md_mean)/md_std
    rd = batch['rd']
    rd = tf.where(tf.math.is_nan(rd), tf.zeros_like(rd), rd)
    rd = (rd-rd_mean)/rd_std
    subjectid = decoder(batch['subjectid'])
    y = get_labels(df, subjectid, list(cat_cols.keys())+num_cols)
    X = tf.concat([t1, t2], axis=4)
    #X = tf.concat([t1, t2], axis=4)
    if optimizer != None:
      y_true, y_pred, loss = train_step(X, y, model, optimizer, cat_cols, num_cols)
    else:
      y_true, y_pred = test_step(X, y, model)
    out_loss, out_acc = calc_loss_acc(out_loss, out_acc, y_true, y_pred, cat_cols, num_cols, norm_dict)
    n += X.shape[0]
    n_batch += 1
    if (n_batch % 10) == 0:
      print(n_batch)
    total_time_model += time.time() - start_time
    start_time = time.time()
  return (out_loss, out_acc, n, total_time_model, total_time_dataload)

def get_labels(df, subjectid, cols = ['nihtbx_fluidcomp_uncorrected_norm']):
  subjects_df = pd.DataFrame(subjectid)
  result_df = pd.merge(subjects_df, df, left_on=0, right_on='subjectkey', how='left')
  output = {}
  for col in cols:
    output[col] = np.asarray(result_df[col].values)
  return output

def best_val(df_best, df_val, df_test):
  df_best = pd.merge(df_best, df_val, how='left', left_on='name', right_on='name')
  df_best = pd.merge(df_best, df_test, how='left', left_on='name', right_on='name')
  df_best.loc[df_best['best_loss_val']>=df_best['cur_loss_val'], 'best_loss_test'] = df_best.loc[df_best['best_loss_val']>=df_best['cur_loss_val'], 'cur_loss_test']
  df_best.loc[df_best['best_loss_val']>=df_best['cur_loss_val'], 'best_loss_val'] = df_best.loc[df_best['best_loss_val']>=df_best['cur_loss_val'], 'cur_loss_val']
  df_best.loc[(df_best['best_acc_val']<=df_best['cur_acc_val'])&(df_best['name'].isin(['female', 'race.ethnicity', 'high.educ_group', 'income_group', 'married'])), 'best_acc_test'] = df_best.loc[(df_best['best_acc_val']<=df_best['cur_acc_val'])&(df_best['name'].isin(['female', 'race.ethnicity', 'high.educ_group', 'income_group', 'married'])), 'cur_acc_test']
  df_best.loc[(df_best['best_acc_val']<=df_best['cur_acc_val'])&(df_best['name'].isin(['female', 'race.ethnicity', 'high.educ_group', 'income_group', 'married'])), 'best_acc_val'] = df_best.loc[(df_best['best_acc_val']<=df_best['cur_acc_val'])&(df_best['name'].isin(['female', 'race.ethnicity', 'high.educ_group', 'income_group', 'married'])), 'cur_acc_val']
  df_best.loc[(df_best['best_acc_val']>=df_best['cur_acc_val'])&(~df_best['name'].isin(['female', 'race.ethnicity', 'high.educ_group', 'income_group', 'married'])), 'best_acc_test'] = df_best.loc[(df_best['best_acc_val']>=df_best['cur_acc_val'])&(~df_best['name'].isin(['female', 'race.ethnicity', 'high.educ_group', 'income_group', 'married'])), 'cur_acc_test']
  df_best.loc[(df_best['best_acc_val']>=df_best['cur_acc_val'])&(~df_best['name'].isin(['female', 'race.ethnicity', 'high.educ_group', 'income_group', 'married'])), 'best_acc_val'] = df_best.loc[(df_best['best_acc_val']>=df_best['cur_acc_val'])&(~df_best['name'].isin(['female', 'race.ethnicity', 'high.educ_group', 'income_group', 'married'])), 'cur_acc_val']
  df_best = df_best.drop(['cur_loss_val', 'cur_acc_val', 'cur_loss_test', 'cur_acc_test'], axis=1)
  return(df_best)

In [None]:
decoder = np.vectorize(lambda x: x.decode('UTF-8'))
template = 'Epoch {0}, Loss: {1:.3f}, Accuracy: {2:.3f}, Val Loss: {3:.3f}, Val Accuracy: {4:.3f}, Time Model: {5:.3f}, Time Data: {6:.3f}'
for col in [0]:
  log_textfile(path_output + modelname + 'multitask_test' + '.log', cat_cols),
  log_textfile(path_output + modelname + 'multitask_test' + '.log', num_cols)
  loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
  optimizer = tf.keras.optimizers.Adam(lr = 0.001)
  model = Model(cat_cols, num_cols)
  df_best = None
  for e in range(10):
    log_textfile(path_output + modelname + 'multitask_test' + '.log', 'Epochs: ' + str(e))
    loss = tf.Variable(0.)
    acc = tf.Variable(0.) 
    val_loss = tf.Variable(0.)
    val_acc = tf.Variable(0.)
    test_loss = tf.Variable(0.)
    test_acc = tf.Variable(0.)
    tf.keras.backend.set_learning_phase(True)
    train_out_loss, train_out_acc, n, time_model, time_data = epoch(train_iter, train_df, model, optimizer, cat_cols, num_cols, norm_dict)
    tf.keras.backend.set_learning_phase(False)
    val_out_loss, val_out_acc, n, _, _ = epoch(val_iter, val_df, model, None, cat_cols, num_cols, norm_dict)
    test_out_loss, test_out_acc, n, _, _ = epoch(test_iter, test_df, model, None, cat_cols, num_cols, norm_dict)
    loss, acc, _ = format_output(train_out_loss, train_out_acc, n, list(cat_cols.keys())+num_cols)
    val_loss, val_acc, df_val = format_output(val_out_loss, val_out_acc, n, list(cat_cols.keys())+num_cols, print_bl=False)
    test_loss, test_acc, df_test = format_output(test_out_loss, test_out_acc, n, list(cat_cols.keys())+num_cols, print_bl=False)
    df_val.columns = ['name', 'cur_loss_val', 'cur_acc_val']
    df_test.columns = ['name', 'cur_loss_test', 'cur_acc_test']
    if e == 0:
      df_best = pd.merge(df_test, df_val, how='left', left_on='name', right_on='name')
      df_best.columns = ['name', 'best_loss_test', 'best_acc_test', 'best_loss_val', 'best_acc_val']
    df_best = best_val(df_best, df_val, df_test)
    print(df_best[['name', 'best_loss_test', 'best_acc_test']])
    print(df_best[['name', 'best_loss_val', 'best_acc_val']])
    log_textfile(path_output +  modelname + 'multitask_test' + '.log', template.format(e, loss, acc, val_loss, val_acc, time_model, time_data))
    if e in [7, 16]:
      optimizer.lr = optimizer.lr/3
      log_textfile(path_output +  modelname + 'multitask_test' + '.log', 'Learning rate: ' + str(optimizer.lr))
    df_best.to_csv(path_output +  modelname + 'multitask_test' + '.csv')

In [None]:
error

In [14]:
batch = next(iter(train_iter))

In [17]:
t1 = (tf.cast(batch['t1'], tf.float32)-t1_mean)/t1_std
t2 = (batch['t2']-t2_mean)/t2_std
ad = batch['ad']
ad = tf.where(tf.math.is_nan(ad), tf.zeros_like(ad), ad)
ad = (ad-ad_mean)/ad_std
fa = batch['fa']
fa = tf.where(tf.math.is_nan(fa), tf.zeros_like(fa), fa)
fa = (fa-fa_mean)/fa_std
md = batch['md']
md = tf.where(tf.math.is_nan(md), tf.zeros_like(md), md)
md = (md-md_mean)/md_std
rd = batch['rd']
rd = tf.where(tf.math.is_nan(rd), tf.zeros_like(rd), rd)
rd = (rd-rd_mean)/rd_std
#subjectid = decoder(batch['subjectid'])
#y = get_labels(df, subjectid, list(cat_cols.keys())+num_cols)
#X = tf.concat([t1, t2, ad, fa, md, rd], axis=4)
X = tf.concat([t1, t2], axis=4)    

In [19]:
tf.keras.backend.set_learning_phase(True)
model(X)['female']

<tf.Tensor: id=104053, shape=(8, 2), dtype=float32, numpy=
array([[0.7318841 , 0.26811588],
       [0.3975592 , 0.6024408 ],
       [0.08681537, 0.91318464],
       [0.8014379 , 0.19856213],
       [0.42574984, 0.5742502 ],
       [0.85568553, 0.1443145 ],
       [0.3161926 , 0.68380743],
       [0.8359293 , 0.16407076]], dtype=float32)>

In [20]:
tf.keras.backend.set_learning_phase(False)
model(X)['female']

<tf.Tensor: id=104651, shape=(8, 2), dtype=float32, numpy=
array([[0.09747502, 0.902525  ],
       [0.08380887, 0.91619116],
       [0.07291744, 0.92708254],
       [0.11647341, 0.8835266 ],
       [0.0863044 , 0.9136956 ],
       [0.11506222, 0.88493776],
       [0.08197798, 0.91802204],
       [0.10716236, 0.89283764]], dtype=float32)>

In [21]:
mean_std = [x.name for x in model.non_trainable_variables if ('batch_norm') in x.name and ('mean' in x.name or 'variance' in x.name)]

In [13]:
model = Model(cat_cols, num_cols)

In [14]:
model.non_trainable_variables 

[]