Set Colab to GPU Mode if you are training the models!

### Mounting Google Drive

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
cd 'drive/MyDrive/IIT J Summer Internship 2022/Code/Neural-Uncertainty-Representation/'

/content/drive/.shortcut-targets-by-id/1h_oF16bG32l75GPobgHe_9HgdUduLFxT/IIT J Summer Internship 2022/Code/Neural-Uncertainty-Representation


### Importing Necessary Libraries


In [4]:
import numpy as np
import pandas as pd

import tensorflow as tf

from tensorflow.keras.layers import Dense, Dropout, LSTM, TimeDistributed
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam

from sklearn.preprocessing import MinMaxScaler

import matplotlib.pyplot as plt
import os

### Importing Data
We import the data of a young subject's 3D-Tensor Data of dimensions (TR, Voxel, Number of Subjects) for the 5 different ROIs as follows:
- **Prefrontal areas:** dlPFC, vlPFC, lOFC, dmPFC, mPFC, mOFC
- **Default-mode areas:** mPFC, ACC, PCC, Precuneus, mOFC
- **Sensory areas:** VC
- **Multisensory area:** pSTS
- **Some other areas:** TP, IPL, mCC, Put, PCG, Nac, INS





In [5]:
file_names = [name for name in os.listdir('/content/drive/MyDrive/IIT J Summer Internship 2022/Hitchcock/Data/YOUNG/Voxel_BOLD/Numpy') if name.endswith("npy") and ("(1)" not in name)]
file_names = [name for name in file_names if ("537" not in name)]

In [6]:
ROI_names = [file_name.replace("data_","").replace(".npy","") for file_name in file_names]

In [7]:
data_ori = list()
for file_name in file_names:
  data_ori.append(np.load('/content/drive/MyDrive/IIT J Summer Internship 2022/Hitchcock/Data/YOUNG/Voxel_BOLD/Numpy/'+file_name))

### Data Preparation

Now, here we prepare the data to be fed to the Neural Network Architechtures. We can't predict the time ahead upto which the subjects are predicting correctly so, we will take a bit round about way to predict the BOLD values ahead in time(for several choices of time points e.g. 1 time point ahead, 2 time point ahead, etc) and will compare the results for young and old subjects.

Right from the starting we decide some of the data preparation parameters, so that we can use them as we require.

In [8]:
TRAIN_BATCH_SIZE = 20
TEST_BATCH_SIZE = 10
LOOK_AHEAD = 1
NUM_TEST_SUBS = 10

We will avoid doing an average since, all voxels in a ROI is not equally important and hence we will want to predict the BOLD values of several voxels simultaneously.

In [9]:
data = data_ori

Let's Start the Data Preparation for lOFC later we will iterate the same procedure for the other ROIs.

In [10]:
print(ROI_names[-1], "BOLD Shape:", data[-1].shape)

lOFC BOLD Shape: (189, 537, 111)


Below, we see the BOLD time series plot for all the ROIs.

We have changed the data in the form (Number of Subjects, TR, Voxels) below.

In [11]:
data = [np.transpose(dat, (2,0,1)) for dat in data]

In [12]:
print(ROI_names[-1], "BOLD Shape:", data[-1].shape)

lOFC BOLD Shape: (111, 189, 537)


In [13]:
# for i in range(len(data)):  
#   plt.figure(figsize=(10, 8))
#   plt.plot(data[i][0,:,:])
#   plt.title("BOLD of 1st Subject for "+ROI_names[i]+"'s Voxels")
#   plt.xlabel("TRs")
#   plt.ylabel("BOLD")
#   plt.savefig("./Plots/YOUNG/ALL-BOLD/ALL-BOLD-"+ROI_names[i]+".png")

We split the dataset of 111 young subjects into 101 subjects in the training data and 10 subject in the test data.

In [14]:
train_data_unscaled = [dat[:-NUM_TEST_SUBS] for dat in data]
test_data_unscaled = [dat[-NUM_TEST_SUBS:] for dat in data]  

We scaled the BOLD values of each voxel in the train data between 0 and 1 for each voxel. We use the same MinMaxScaler fit on the train data on the test data.

In [15]:
scalers = dict() # Stores a list of MinMaxScalers for each ROI, where number of MinMaxScalers is equal to the number of voxels in that ROI
for i in range(len(ROI_names)):
  scalers[ROI_names[i]] = [MinMaxScaler(feature_range = (0, 1)) for _ in range(data[i].shape[-1])]

train_data = [[scalers[ROI_names[j]][i].fit_transform(train_data_unscaled[j][:,:,i]) for i in range(len(scalers[ROI_names[j]]))] for j in range(len(ROI_names))]
test_data = [[scalers[ROI_names[j]][i].transform(test_data_unscaled[j][:,:,i]) for i in range(len(scalers[ROI_names[j]]))] for j in range(len(ROI_names))]

In [16]:
train_data = [np.transpose(np.array(dat), (1,2,0)) for dat in train_data]
test_data = [np.transpose(np.array(dat), (1,2,0)) for dat in test_data]

In [17]:
print(ROI_names[-1], "Train Data Shape: ", train_data[-1].shape)
print(ROI_names[-1], "Test Data Shape: ", test_data[-1].shape)

lOFC Train Data Shape:  (101, 189, 537)
lOFC Test Data Shape:  (10, 189, 537)


In [18]:
# for i in range(len(data)):  
#   plt.figure(figsize=(10, 8))
#   plt.plot(train_data[i][0,:,:])
#   plt.title("BOLD of 1st Subject for "+ROI_names[i]+"'s Voxels")
#   plt.xlabel("TRs")
#   plt.ylabel("BOLD[Scaled between 0 and 1]")
#   plt.savefig("./Plots/YOUNG/ALL-BOLD/ALL-BOLD-SCALED-"+ROI_names[i]+".png")

We now build a data generator which would produce mini-batches during the training of the neural network.

In [19]:
class KerasBatchGenerator(object):
    def __init__(self, data, num_TRs, batch_size, look_ahead=1):
        self.data = data
        self.num_TRs = num_TRs
        self.batch_size = batch_size
        self.current_idx = 0
        self.look_ahead = look_ahead

    def generate(self):
        while True:
            if self.current_idx + self.batch_size >= self.data.shape[0]:
              self.current_idx = 0
            x = self.data[self.current_idx:(self.current_idx+self.batch_size),:(-self.look_ahead),:]
            y = self.data[self.current_idx:(self.current_idx+self.batch_size),self.look_ahead:,:]
            self.current_idx = self.current_idx + self.batch_size
            yield x, y 

In [20]:
train_data_generator = [KerasBatchGenerator(dat, dat.shape[1], TRAIN_BATCH_SIZE, LOOK_AHEAD) for dat in train_data]
test_data_generator = [KerasBatchGenerator(dat, dat.shape[1], TEST_BATCH_SIZE, LOOK_AHEAD) for dat in test_data]

In [21]:
batch_x, batch_y = next(train_data_generator[-1].generate())
print(ROI_names[-1], "Train Batch X Shape: ", batch_x.shape)
print(ROI_names[-1], "Train Batch y Shape: ", batch_y.shape)

lOFC Train Batch X Shape:  (20, 188, 537)
lOFC Train Batch y Shape:  (20, 188, 537)


In [22]:
batch_x, batch_y = next(test_data_generator[-1].generate())
print(ROI_names[-1], "Test Batch X Shape: ", batch_x.shape)
print(ROI_names[-1], "Test Batch y Shape: ", batch_y.shape)

lOFC Test Batch X Shape:  (10, 188, 537)
lOFC Test Batch y Shape:  (10, 188, 537)


### Building Model

We will start off by building a LSTM Model:
- `Number of LSTM Layers = 3`
- `Number of Dense Layers = 1`
- `hidden units = 32`
- `dropout = 0.3`

In [23]:
HIDDEN_UNITS = 32
DROPOUT_PROB = 0.3
LSTM_LAYERS = 3
DENSE_LAYERS = 1

In [24]:
def avg_bold_model(num_voxs):
  model = Sequential()
  for _ in range(LSTM_LAYERS):
    model.add(LSTM(HIDDEN_UNITS, return_sequences=True))
  model.add(Dropout(0.3))
  for _ in range(DENSE_LAYERS-1):
    model.add(TimeDistributed(Dense(HIDDEN_UNITS, activation='relu')))
  model.add(TimeDistributed(Dense(num_voxs)))
  return model

In [25]:
sample_model = avg_bold_model(batch_x.shape[-1])
sample_model.build(input_shape=batch_x.shape)
sample_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (10, 188, 32)             72960     
                                                                 
 lstm_1 (LSTM)               (10, 188, 32)             8320      
                                                                 
 lstm_2 (LSTM)               (10, 188, 32)             8320      
                                                                 
 dropout (Dropout)           (10, 188, 32)             0         
                                                                 
 time_distributed (TimeDistr  (10, 188, 537)           17721     
 ibuted)                                                         
                                                                 
Total params: 107,321
Trainable params: 107,321
Non-trainable params: 0
__________________________________________________

In [26]:
models = [avg_bold_model(train_data[i].shape[-1]) for i in range(len(ROI_names))]

#### Training Model

If you haven't already trained the models then uncomment and train your models which will be saved and will be loaded for inference later.

In [27]:
for i in range(len(ROI_names)):
  models[i].compile(optimizer = 'adam', loss = 'mean_squared_error')

In [None]:
EPOCHS = 100
checkpoint_filepaths = [f"./Models/YOUNG/LSTM/ALL-BOLD/{name}/model_weights-best-val-loss-{LSTM_LAYERS}L-{HIDDEN_UNITS}H-{DENSE_LAYERS}D-{int(DROPOUT_PROB*10)}Drop-{LOOK_AHEAD}Ahead-{name}" for name in ROI_names]
model_checkpoint_callbacks = [tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,
    monitor = "val_loss",
    mode='min',
    save_best_only=True) for checkpoint_filepath in checkpoint_filepaths]

In [None]:
def plot_train_test_loss(history, model_name, i):  
  plt.figure(figsize=(10,10))
  plt.plot(history.history['loss'])
  plt.plot(history.history['val_loss'])
  plt.title(model_name+' loss')
  plt.ylabel('loss')
  plt.xlabel('epochs')
  plt.legend(['train', 'val'], loc='upper left')
  plt.savefig("./Plots/YOUNG/LSTM/ALL-BOLD/"+ROI_names[i]+"/"+model_name+"-loss.png")

In [None]:
histories = list()
with tf.device('/device:GPU:0'):
  for i in range(len(ROI_names)):
    history = models[i].fit(
                    train_data_generator[i].generate(),
                    steps_per_epoch=train_data[i].shape[0]//TRAIN_BATCH_SIZE,
                    epochs=EPOCHS,
                    validation_data=test_data_generator[i].generate(),
                    validation_steps=test_data[i].shape[0]//TEST_BATCH_SIZE,
                    callbacks=[model_checkpoint_callbacks[i]]
                    )
    models[i].save_weights(f"./Models/YOUNG/LSTM/ALL-BOLD/{ROI_names[i]}/model_weights-{LSTM_LAYERS}L-{HIDDEN_UNITS}H-{DENSE_LAYERS}D-{int(DROPOUT_PROB*10)}Drop-{LOOK_AHEAD}Ahead-{ROI_names[i]}")
    plot_train_test_loss(history, f"{LSTM_LAYERS}L-{HIDDEN_UNITS}H-{DENSE_LAYERS}D-{int(DROPOUT_PROB*10)}Drop-{LOOK_AHEAD}Ahead-{ROI_names[i]}", i)
    histories.append(history)

In [28]:
for i in range(len(ROI_names)):
  models[i].load_weights(f"./Models/YOUNG/LSTM/ALL-BOLD/{ROI_names[i]}/model_weights-{LSTM_LAYERS}L-{HIDDEN_UNITS}H-{DENSE_LAYERS}D-{int(DROPOUT_PROB*10)}Drop-{LOOK_AHEAD}Ahead-{ROI_names[i]}")

In [None]:
preds = [models[i].predict(next(test_data_generator[i].generate())[0]) for i in range(len(ROI_names))]

In [None]:
a_TRs = np.array([56, 67, 73, 81, 134, 142, 155, 167, 174, 180])
d_TRs = np.array([67, 72, 85, 89, 145, 154, 168, 174, 180, 188])
and_TRs = np.union1d(a_TRs, d_TRs)

def highlight_and(act, pred):
  min_y_list = list()
  max_y_list = list()
  for i in range(5):
    min_y_list.append(np.min(np.concatenate([act[a_TRs[2*i]:d_TRs[2*i+1]], pred[a_TRs[2*i]:d_TRs[2*i+1]]])))
    max_y_list.append(np.max(np.concatenate([act[a_TRs[2*i]:d_TRs[2*i+1]], pred[a_TRs[2*i]:d_TRs[2*i+1]]])))
    # plt.fill_betweenx(y=[min_y_list[i], max_y_list[i]], x1=a_TRs[2*i], x2=d_TRs[2*i+1], color='gray', alpha=0.1)
    plt.fill_betweenx(y=[min_y_list[i], max_y_list[i]], x1=a_TRs[2*i], x2=a_TRs[2*i+1], color='violet', alpha=0.4)
    plt.fill_betweenx(y=[min_y_list[i], max_y_list[i]], x1=d_TRs[2*i], x2=d_TRs[2*i+1], color='greenyellow', alpha=0.4)

In [None]:
np.random.seed(43)
for i in range(len(ROI_names)):
  
  actual_test_data_output = test_data_unscaled[i][:,LOOK_AHEAD:,:]
  pred_test_data_output = np.transpose(np.array([scalers[ROI_names[i]][j].inverse_transform(np.hstack([preds[i][:,:,j], preds[i][:,:LOOK_AHEAD,j]])) for j in range(len(scalers[ROI_names[i]]))]), (1,2,0))[:,:-LOOK_AHEAD,:]
  plt.figure(figsize=(24, 18))

  for j in range(NUM_TEST_SUBS-1):
  
    ax = plt.subplot(3, 3, j + 1)
    vox_choice = np.random.randint(0, len(scalers[ROI_names[i]]), 5)
    plt.plot(np.transpose(actual_test_data_output[j,:, vox_choice]), c='b', label='actual')
    plt.plot(np.transpose(pred_test_data_output[j,:, vox_choice]), c='r', ls='--', label='pred')
    # highlight_and(actual_test_data_output[j,:], pred_test_data_output[j,:])
    plt.xticks(np.concatenate([np.array([0]), and_TRs - LOOK_AHEAD]), np.concatenate([np.array([LOOK_AHEAD]), and_TRs]))
    plt.ylabel("BOLD")
    plt.xlabel("TRs")
    plt.title(f"All BOLD Preds of {LSTM_LAYERS}L-{HIDDEN_UNITS}H-{DENSE_LAYERS}D-{int(DROPOUT_PROB*10)}Drop-{LOOK_AHEAD}Ahead for {j+1}th Test Sub's {ROI_names[i]}")
    plt.legend()
    plt.savefig(f"./Plots/YOUNG/LSTM/ALL-BOLD/{ROI_names[i]}/{LSTM_LAYERS}L-{HIDDEN_UNITS}H-{DENSE_LAYERS}D-{int(DROPOUT_PROB*10)}Drop-{LOOK_AHEAD}Ahead-Preds.png")

In [None]:
print("***Each Test Subject's Correlation Coefficient between predicted and actual BOLD averaged across each Voxel Prediction values for ROIs***")
print()
avg_r = list()
for i in range(len(ROI_names)):
  actual_test_data_output = test_data_unscaled[i][:,LOOK_AHEAD:,:]
  pred_test_data_output = np.transpose(np.array([scalers[ROI_names[i]][j].inverse_transform(np.hstack([preds[i][:,:,j], preds[i][:,:LOOK_AHEAD,j]])) for j in range(len(scalers[ROI_names[i]]))]), (1,2,0))[:,:-LOOK_AHEAD,:] 
  for j in range(NUM_TEST_SUBS):
    print(f"{ROI_names[i]} for {j+1}th sub: {np.round(np.trace(np.corrcoef(actual_test_data_output[j,:,:], pred_test_data_output[j,:,:], rowvar=False)[:actual_test_data_output.shape[-1], actual_test_data_output.shape[-1]:])/actual_test_data_output.shape[-1], 2)}")
  print()

print("***Average across test subjects Correlation Coefficient between predicted and actual BOLD averaged across each Voxel Prediction values for ROIs***")
print()
for i in range(len(ROI_names)):
  actual_test_data_output = test_data_unscaled[i][:,LOOK_AHEAD:,:]
  pred_test_data_output = np.transpose(np.array([scalers[ROI_names[i]][j].inverse_transform(np.hstack([preds[i][:,:,j], preds[i][:,:LOOK_AHEAD,j]])) for j in range(len(scalers[ROI_names[i]]))]), (1,2,0))[:,:-LOOK_AHEAD,:]
  sum_r = 0
  for j in range(NUM_TEST_SUBS):
    sum_r += np.trace(np.corrcoef(actual_test_data_output[j,:,:], pred_test_data_output[j,:,:], rowvar=False)[:actual_test_data_output.shape[-1], actual_test_data_output.shape[-1]:]/actual_test_data_output.shape[-1])
  print(f"{ROI_names[i]}: {np.round(sum_r / NUM_TEST_SUBS, 2)}")

***Each Test Subject's Correlation Coefficient between predicted and actual BOLD averaged across each Voxel Prediction values for ROIs***

dmPFC for 1th sub: 0.18
dmPFC for 2th sub: 0.36
dmPFC for 3th sub: 0.38
dmPFC for 4th sub: 0.17
dmPFC for 5th sub: 0.16
dmPFC for 6th sub: 0.28
dmPFC for 7th sub: 0.1
dmPFC for 8th sub: 0.29
dmPFC for 9th sub: 0.33
dmPFC for 10th sub: 0.06

ACC for 1th sub: 0.2
ACC for 2th sub: 0.3
ACC for 3th sub: 0.18
ACC for 4th sub: 0.21
ACC for 5th sub: 0.09
ACC for 6th sub: 0.18
ACC for 7th sub: 0.15
ACC for 8th sub: 0.39
ACC for 9th sub: 0.18
ACC for 10th sub: 0.24

AMY for 1th sub: 0.3
AMY for 2th sub: 0.26
AMY for 3th sub: 0.53
AMY for 4th sub: 0.21
AMY for 5th sub: 0.1
AMY for 6th sub: 0.41
AMY for 7th sub: 0.16
AMY for 8th sub: 0.54
AMY for 9th sub: 0.28
AMY for 10th sub: 0.08

Cau for 1th sub: 0.05
Cau for 2th sub: 0.45
Cau for 3th sub: 0.19
Cau for 4th sub: 0.07
Cau for 5th sub: 0.38
Cau for 6th sub: 0.03
Cau for 7th sub: 0.15
Cau for 8th sub: 0.19
Cau 

In [None]:
print("***Model's Final Scaled MSE(Train) Loss between predicted and actual all voxel BOLD values for ROIs for the Train Subjects***")
print()
for i in range(len(ROI_names)):
  print(ROI_names[i]+":", np.round(histories[i].history['loss'][-1], 5))
print()
print("***Model's Final MSE(Validation) Loss between predicted and actual all voxel BOLD values for ROIs for the Test Subjects***")
print()
for i in range(len(ROI_names)):
  print(ROI_names[i]+":", np.round(histories[i].history['val_loss'][-1], 5))

***Model's Final Scaled MSE(Train) Loss between predicted and actual all voxel BOLD values for ROIs for the Train Subjects***

dmPFC: 0.01375
ACC: 0.0107
AMY: 0.00751
Cau: 0.0094
dlPFC: 0.01508
INS: 0.00643
IPL: 0.01959
mCC: 0.00987
Nac: 0.00736
PCC: 0.01185
PCG: 0.02166
Prec: 0.01235
TP: 0.01268
Put: 0.00344
pSTS: 0.00735
VC: 0.03117
vlPFC: 0.01232
mOFC: 0.01682
mPFC: 0.01326
lOFC: 0.01507

***Model's Final MSE(Validation) Loss between predicted and actual all voxel BOLD values for ROIs for the Test Subjects***

dmPFC: 0.00991
ACC: 0.00974
AMY: 0.00504
Cau: 0.00729
dlPFC: 0.0123
INS: 0.00329
IPL: 0.02111
mCC: 0.00644
Nac: 0.00389
PCC: 0.01305
PCG: 0.02291
Prec: 0.01123
TP: 0.00646
Put: 0.00122
pSTS: 0.00313
VC: 0.02845
vlPFC: 0.00892
mOFC: 0.01122
mPFC: 0.0117
lOFC: 0.01095


In [None]:
print("***Model's Final MSE(Validation) Loss between predicted and actual all voxel BOLD values for ROIs for the Test Subjects***")
print()
for i in range(len(ROI_names)):
  actual_test_data_output = test_data_unscaled[i][:,LOOK_AHEAD:,:]
  pred_test_data_output = np.transpose(np.array([scalers[ROI_names[i]][j].inverse_transform(np.hstack([preds[i][:,:,j], preds[i][:,:LOOK_AHEAD,j]])) for j in range(len(scalers[ROI_names[i]]))]), (1,2,0))[:,:-LOOK_AHEAD,:]
  print(ROI_names[i]+" for Each Validation Subject:", np.mean(tf.keras.losses.mean_squared_error(actual_test_data_output, pred_test_data_output), axis=-1))
  print(ROI_names[i]+" :", np.mean(np.mean(tf.keras.losses.mean_squared_error(actual_test_data_output, pred_test_data_output), axis=-1)))

***Model's Final MSE(Validation) Loss between predicted and actual all voxel BOLD values for ROIs for the Test Subjects***

dmPFC for Each Validation Subject: [ 65.6132    81.22682   25.656574  41.125095  39.16693  119.01752
  35.74859   26.22087   61.50221   62.353447]
dmPFC : 55.763123
ACC for Each Validation Subject: [45.197052 20.267185 35.498585 19.065588 16.089527 54.38965  38.230145
 42.41623  35.562008 18.822926]
ACC : 32.55389
AMY for Each Validation Subject: [32.2807   36.721004 30.301046 23.523926 22.862757 49.269863 21.56272
 49.63495  62.678665 25.918602]
AMY : 35.475426
Cau for Each Validation Subject: [27.66919  17.459553 29.099838 45.377396 38.527626 78.99093   8.71293
 43.119606 28.839626 20.886423]
Cau : 33.86831
dlPFC for Each Validation Subject: [30.950165 26.481518 22.256805 21.346762 47.230427 44.78033  36.14129
 30.038403 32.930416 36.54608 ]
dlPFC : 32.87022
INS for Each Validation Subject: [10.546689  10.521589   9.809857   8.964262   5.0747004  4.4808664
  7.3