Set Colab to GPU Mode if you are training the models!

### Mounting Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
cd 'drive/MyDrive/IIT J Summer Internship 2022/Code/Neural-Uncertainty-Representation/'

### Importing Necessary Libraries


In [3]:
import numpy as np
import pandas as pd

import tensorflow as tf

from tensorflow.keras.layers import Dense, Dropout, LSTM, TimeDistributed
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam

from sklearn.preprocessing import MinMaxScaler

import matplotlib.pyplot as plt
import os

### Importing Data
We import the data of a young subject's 3D-Tensor Data of dimensions (TR, Voxel, Number of Subjects) for the 5 different ROIs as follows:
- **Prefrontal areas:** dlPFC, vlPFC, lOFC, dmPFC, mPFC, mOFC
- **Default-mode areas:** mPFC, ACC, PCC, Precuneus, mOFC
- **Sensory areas:** VC
- **Multisensory area:** pSTS
- **Some other areas:** TP, IPL, mCC, Put, PCG, Nac, INS





In [4]:
file_names = [name for name in os.listdir('/content/drive/MyDrive/IIT J Summer Internship 2022/Hitchcock/Data/YOUNG/Voxel_BOLD/Numpy') if name.endswith("npy") and ("(1)" not in name)]
file_names = [name for name in file_names if ("537" not in name)]

In [5]:
ROI_names = [file_name.replace("data_","").replace(".npy","") for file_name in file_names]

In [6]:
data_ori = list()
for file_name in file_names:
  data_ori.append(np.load('/content/drive/MyDrive/IIT J Summer Internship 2022/Hitchcock/Data/YOUNG/Voxel_BOLD/Numpy/'+file_name))

### Data Preparation

Now, here we prepare the data to be fed to the Neural Network Architechtures. We can't predict the time ahead upto which the subjects are predicting correctly so, we will take a bit round about way to predict the BOLD values ahead in time(for several choices of time points e.g. 1 time point ahead, 2 time point ahead, etc) and will compare the results for young and old subjects.

We will start a basic data preparation where we will average across all the voxels for a ROI and fit the model which predicts this average BOLD value. Later we will avoid doing an average since, all voxels in a ROI is not equally important and hence we will want to predict the BOLD values of several voxels simultaneously.

In [7]:
data = [np.mean(dat, axis=1, keepdims=True) for dat in data_ori]

Let's Start the Data Preparation for lOFC later we will iterate the same procedure for the other ROIs.

In [8]:
print(ROI_names[-1], "Average BOLD Shape:", data[-1].shape)

lOFC Average BOLD Shape: (189, 1, 111)


Below, we see the average BOLD time series plot for all the ROIs.

We have changed the data in the form (Number of Subjects, TR, Voxels) below.

In [9]:
data = [np.transpose(dat, (2,0,1)) for dat in data]

In [10]:
print(ROI_names[-1], "Average BOLD Shape:", data[-1].shape)

lOFC Average BOLD Shape: (111, 189, 1)


In [None]:
for i in range(len(data)):  
  plt.figure(figsize=(10, 8))
  plt.plot(data[i][0,:,0])
  plt.title("Average BOLD of 1st Subject for "+ROI_names[i])
  plt.xlabel("TRs")
  plt.ylabel("Average BOLD(over voxels)")
  plt.savefig("./Plots/YOUNG/AVG-BOLD/AVG-BOLD-"+ROI_names[i]+".png")

We split the dataset of 111 young subjects into 110 subjects in the training data and 1 subject in the test data.

In [12]:
train_data_unscaled = [dat[:-1] for dat in data]
test_data_unscaled = [np.expand_dims(dat[-1], axis=0) for dat in data]  

We scaled the BOLD values of each voxel in the train data between 0 and 1 for each voxel. We use the same MinMaxScaler fit on the train data on the test data.

In [13]:
scalers = list()
for i in range(len(ROI_names)):
  scalers.append(MinMaxScaler(feature_range = (0, 1)))
train_data = [sc.fit_transform(dat[:,:,0]) for sc, dat in zip(scalers, train_data_unscaled)] 
test_data = [sc.transform(dat[:,:,0]) for sc, dat in zip(scalers, test_data_unscaled)]

In [14]:
train_data = [np.expand_dims(dat, axis = -1) for dat in train_data]
test_data = [np.expand_dims(dat, axis = -1) for dat in test_data]

In [15]:
print(ROI_names[-1], "Train Data Shape: ", train_data[-1].shape)
print(ROI_names[-1], "Test Data Shape: ", test_data[-1].shape)

lOFC Train Data Shape:  (110, 189, 1)
lOFC Test Data Shape:  (1, 189, 1)


In [None]:
for i in range(len(data)):  
  plt.figure(figsize=(10, 8))
  plt.plot(train_data[i][0,:,0])
  plt.title("Average BOLD of 1st Subject for "+ROI_names[i])
  plt.xlabel("TRs")
  plt.ylabel("Average BOLD(over voxels)[Scaled between 0 and 1]")
  plt.savefig("./Plots/YOUNG/AVG-BOLD/AVG-BOLD-SCALED-"+ROI_names[i]+".png")

We now build a data generator which would produce mini-batches during the training of the neural network.

In [17]:
class KerasBatchGenerator(object):
    def __init__(self, data, num_TRs, batch_size, look_ahead=1):
        self.data = data
        self.num_TRs = num_TRs
        self.batch_size = batch_size
        self.current_idx = 0
        self.look_ahead = look_ahead

    def generate(self):
        while True:
            if self.current_idx + self.batch_size >= self.data.shape[0]:
              self.current_idx = 0
            x = self.data[self.current_idx:(self.current_idx+self.batch_size),:(-self.look_ahead),:]
            y = self.data[self.current_idx:(self.current_idx+self.batch_size),self.look_ahead:,:]
            self.current_idx = self.current_idx + self.batch_size
            yield x, y 

In [18]:
TRAIN_BATCH_SIZE = 16
TEST_BATCH_SIZE = 1
train_data_generator = [KerasBatchGenerator(dat, dat.shape[1], TRAIN_BATCH_SIZE, 1) for dat in train_data]
test_data_generator = [KerasBatchGenerator(dat, dat.shape[1], TEST_BATCH_SIZE, 1) for dat in test_data]

In [19]:
batch_x, batch_y = next(train_data_generator[-1].generate())
print(ROI_names[-1], "Train Batch X Shape: ", batch_x.shape)
print(ROI_names[-1], "Train Batch y Shape: ", batch_y.shape)

lOFC Train Batch X Shape:  (16, 188, 1)
lOFC Train Batch y Shape:  (16, 188, 1)


In [20]:
batch_x, batch_y = next(test_data_generator[-1].generate())
print(ROI_names[-1], "Test Batch X Shape: ", batch_x.shape)
print(ROI_names[-1], "Test Batch y Shape: ", batch_y.shape)

lOFC Test Batch X Shape:  (1, 188, 1)
lOFC Test Batch y Shape:  (1, 188, 1)


### Building Model

We will start off by building a LSTM Model:
- `Number of LSTM Layers = 1`
- `Number of Dense Layers = 1`
- `hidden units = 16`
- `dropout = 0.3`

In [21]:
def avg_bold_model():
  model = Sequential()
  model.add(LSTM(16, return_sequences=True))
  model.add(Dropout(0.3))
  model.add(TimeDistributed(Dense(1)))
  return model

In [22]:
sample_model = avg_bold_model()
sample_model.build(input_shape=test_data[-1].shape)
sample_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (1, 189, 16)              1152      
                                                                 
 dropout (Dropout)           (1, 189, 16)              0         
                                                                 
 time_distributed (TimeDistr  (1, 189, 1)              17        
 ibuted)                                                         
                                                                 
Total params: 1,169
Trainable params: 1,169
Non-trainable params: 0
_________________________________________________________________


In [23]:
models = [avg_bold_model() for _ in range(len(ROI_names))]

In [24]:
for i in range(len(ROI_names)):
  models[i].compile(optimizer = 'adam', loss = 'mean_squared_error')

In [25]:
EPOCHS = 100
checkpoint_filepaths = ['./Models/YOUNG/LSTM/AVG-BOLD/'+name+"/" for name in ROI_names]
model_checkpoint_callbacks = [tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,
    monitor = "val_loss",
    mode='min',
    save_best_only=True) for checkpoint_filepath in checkpoint_filepaths]

In [26]:
def plot_train_test_loss(history, model_name, i):  
  plt.figure(figsize=(10,10))
  plt.plot(history.history['loss'])
  plt.plot(history.history['val_loss'])
  plt.title(model_name+' loss')
  plt.ylabel('loss')
  plt.xlabel('epochs')
  plt.legend(['train', 'val'], loc='upper left')
  plt.savefig("./Plots/YOUNG/LSTM/AVG-BOLD/"+ROI_names[i]+"/"+model_name+"-loss.png")

In [None]:
with tf.device('/device:GPU:0'):
  for i in range(len(ROI_names)):
    history = models[i].fit(
                    train_data_generator[i].generate(),
                    steps_per_epoch=train_data[i].shape[0]//TRAIN_BATCH_SIZE,
                    epochs=EPOCHS,
                    validation_data=test_data_generator[i].generate(),
                    validation_steps=test_data[i].shape[0]//TEST_BATCH_SIZE,
                    callbacks=[model_checkpoint_callbacks[i]]
                    )
    models[i].save_weights("./Models/YOUNG/LSTM/AVG-BOLD/"+ROI_names[i]+"/model_weights-1L-16H-1D-03Drop-1Ahead-"+ROI_names[i])
    plot_train_test_loss(history, "1L-16H-1D-03Drop-1Ahead-"+ROI_names[i], i)

In [None]:
preds = [models[i].predict(test_data[i]) for i in range(len(ROI_names))]

In [None]:
for i in range(len(ROI_names)):  
  plt.figure(figsize=(10, 8))
  plt.plot(np.squeeze(test_data_unscaled[i])[1:])
  plt.plot(np.squeeze(scalers[i].inverse_transform(preds[i][:,:,0]))[1:])
  plt.ylabel("Average BOLD")
  plt.xlabel("TRs")
  plt.title("Average BOLD Prediction of 1L-16H-1D-03Drop-1Ahead for Test Subject's"+ROI_names[i])
  plt.legend(['actual', 'pred'])
  plt.savefig("./Plots/YOUNG/LSTM/AVG-BOLD/"+ROI_names[i]+"/1L-16H-1D-03Drop-1Ahead-Preds.png")

In [50]:
print("***Correlation Coefficient between predicted and actual average BOLD values for ROIs***")
for i in range(len(ROI_names)):
  print(ROI_names[i]+":", np.round(np.corrcoef(np.vstack([np.squeeze(test_data_unscaled[i])[1:],np.squeeze(scalers[i].inverse_transform(preds[i][:,:,0]))[1:]]))[0,1], 2))

***Correlation Coefficient between predicted and actual average BOLD values for ROIs***
dmPFC: 0.17
ACC: 0.38
AMY: 0.28
Cau: -0.03
dlPFC: 0.23
INS: 0.18
IPL: 0.28
mCC: 0.28
Nac: -0.05
PCC: 0.41
PCG: 0.37
Prec: 0.48
TP: 0.53
Put: 0.14
pSTS: 0.01
VC: 0.35
vlPFC: 0.12
mOFC: 0.23
mPFC: -0.04
lOFC: 0.07


In [66]:
print("***MSE Loss between predicted and actual average BOLD values for ROIs***")
for i in range(len(ROI_names)):
  print(ROI_names[i]+":", np.round(tf.keras.losses.mean_squared_error(np.squeeze(scalers[i].inverse_transform(preds[i][:,:,0]))[1:], np.squeeze(test_data_unscaled[i])[1:]).numpy(), 2))


***MSE Loss between predicted and actual average BOLD values for ROIs***
dmPFC: 3.25
ACC: 2.39
AMY: 1.92
Cau: 12.23
dlPFC: 1.72
INS: 4.16
IPL: 5.62
mCC: 2.46
Nac: 6.19
PCC: 1.6
PCG: 2.37
Prec: 2.06
TP: 5.05
Put: 1.1
pSTS: 5.99
VC: 2.99
vlPFC: 4.04
mOFC: 3.45
mPFC: 7.03
lOFC: 5.31
