In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os

# get to project's folder
dirPath = "/content/drive/MyDrive/Colab Notebooks/Final project/Experiments"
os.chdir(dirPath)

In [None]:
!pip install tensorflow-addons

In [None]:
from Data_extraction_transformer import Get_Data
from Results import Get_Results

import collections
import logging
#import os already imported in code cell 2
import pathlib
import re
import string
import sys
import time
import math
import pickle

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay

import tensorflow_datasets as tfds
import tensorflow as tf
from tensorflow import keras
from keras import backend as K
from tensorflow.keras.layers import MultiHeadAttention
import matplotlib.pyplot as plt

In [None]:
K_SEED = 330

class args:

  def __init__(self,input_data,roi,net,roi_name,zscore,train_size):
    self.input_data = input_data
    self.roi = roi
    self.net = net
    self.roi_name = roi_name
    self.K_RUNS = K_RUNS
    # preprocessing
    self.zscore = zscore
    # training parameters
    self.train_size = train_size

# data parameters
args.input_data = 'data/roi_ts'
args.roi = 300
args.net = 7
args.roi_name = 'roi'
args.K_RUNS = 4
# preprocessing
args.zscore = 1
# training parameters
args.train_size = 100

In [None]:
#utils functions
def _get_clip_labels():
    '''
    assign all clips within runs a label
    use 0 for testretest
    '''
    # where are the clips within the run?
    timing_file = pd.read_csv('data/videoclip_tr_lookup.csv')

    clips = []
    for run in range(args.K_RUNS):
        run_name = 'MOVIE%d' %(run+1) #MOVIEx_7T_yz
        timing_df = timing_file[timing_file['run'].str.contains(run_name)]  
        timing_df = timing_df.reset_index(drop=True)

        for jj, row in timing_df.iterrows():
            clips.append(row['clip_name'])
            
    clip_y = {}
    jj = 1
    for clip in clips:
        if 'testretest' in clip:
            clip_y[clip] = 0
        else:
            clip_y[clip] = jj
            jj += 1

    return clip_y


def make_batches_train(ds):
  return (
      ds
      .cache()
      .shuffle(BUFFER_SIZE)
      .batch(BATCH_SIZE)
      .prefetch(tf.data.AUTOTUNE))
  
def make_batches_test(ds):
  return (
      ds
      .cache()
      .batch(BATCH_SIZE)
      .prefetch(tf.data.AUTOTUNE))

def GetCmat(model, X, y):
  '''
  get the confusion matrix and accuracy from the model.predict(X)
  inputs: model, X-Eager tensor of data, y-labels
  outputs: cm- confusion matrix, acc-accuracy
  '''

  y_hat_hold = model.predict(X)
  y_hat = np.argmax(y_hat_hold, axis=2)
  true_y = y

  y_overtime = []
  y_hat_overtime = []
  for rows_y,rows_y_hat in zip(true_y,y_hat):
    values, counts = np.unique(rows_y, return_counts=True)
    ind = np.argmax(counts)
    if values[ind] == 15:
      counts[ind] = 0
      ind = np.argmax(counts)
    y_overtime.append(values[ind])

    values, counts = np.unique(rows_y_hat, return_counts=True)
    ind = np.argmax(counts)
    if values[ind] == 15:
      counts[ind] = 0
      ind = np.argmax(counts)
    y_hat_overtime.append(values[ind])

  cm = confusion_matrix(y_overtime,y_hat_overtime)
  acc = accuracy_score(y_overtime,y_hat_overtime)
  return cm, acc

def ShuffleAndOffset(y):
  '''
  shuffles and offsets the labels
  input: y: the orderd labels
  output: y_shuffle: shuffled labels
          y_offset: labels offset by 2
  '''
  # shuffles y
  y_shuffle = tf.random.shuffle(y)
  # offsets y by 2(1->3,2->4,...,15->1,16->2,17->15)
  y_offset = np.array(y)
  for i,_ in enumerate(y):
    j = np.where(y[i,:]>0)
    y_offset[i,j] += 2
    j = np.where(y_offset[i,:]==15)
    y_offset[i,j] = 1
    j = np.where(y_offset[i,:]==16)
    y_offset[i,j] = 2
    # 17 is the filler label so it stays the last label which is 15
    j = np.where(y_offset[i,:]==17)
    y_offset[i,j] = 15
  y_offset = tf.convert_to_tensor(y_offset)
  return y_shuffle, y_offset

def lr_scheduler(epoch, lr, warmup_epochs=13, decay_epochs=100, initial_lr=1e-6, base_lr=1e-4, min_lr=5e-5):
    if epoch <= warmup_epochs:
        pct = epoch / warmup_epochs
        return ((base_lr - initial_lr) * pct) + initial_lr

    if epoch > warmup_epochs and epoch < warmup_epochs+decay_epochs:
        pct = 1 - ((epoch - warmup_epochs) / decay_epochs)
        return ((base_lr - min_lr) * pct) + min_lr

    return min_lr

def printLoop(net, head, layer):
  print('-----------------------------------------------------------------------------------------------------')
  print('-----------------------------------------------------------------------------------------------------')
  print('-----------------------------------------------------------------------------------------------------')
  print(f'---------------------------NET: {net}  number of heads: {head} number of layers: {layer}--------------------------')
  print('-----------------------------------------------------------------------------------------------------')
  print('-----------------------------------------------------------------------------------------------------')
  print('-----------------------------------------------------------------------------------------------------')

In [None]:
# get clips names

clip_y = _get_clip_labels()
k_class = len(np.unique(list(clip_y.values())))
print('number of classes = %d' %k_class)

clip_names = np.zeros(k_class).astype(str)
clip_names[0] = 'testretest'
for key, item in clip_y.items():
    if item!=0:
        clip_names[item] = key

In [None]:
# get the orginized data from Get_Data function in Data_extraction_transformer.py
X_train, train_len, y_train, X_val, val_len, y_val, X_test, test_len, y_test, train_list, test_list, clip_time = Get_Data(args)

In [None]:
def GetNetwork(X_train, X_val, X_test,startLH,endLH,startRH,endRH):
  '''
  sets brain network from right and left hemisphere to X
  inputs: X_train,X_val,X_test: Eager tensor with all brain networks
          startLH,endLH,startRH,endRH: indices of relevant brain network
  outputs: X_train_end,X_val_end,X_test_end: Eager tensor with relevant brain network
  '''
  X_train_LH = X_train[:,:,startLH:endLH]
  X_train_RH = X_train[:,:,startRH:endRH]
  X_train_end = tf.concat([X_train_LH, X_train_RH], axis=2)

  X_val_LH = X_val[:,:,startLH:endLH]
  X_val_RH = X_val[:,:,startRH:endRH]
  X_val_end = tf.concat([X_val_LH, X_val_RH], axis=2)

  X_test_LH = X_test[:,:,startLH:endLH]
  X_test_RH = X_test[:,:,startRH:endRH]
  X_test_end = tf.concat([X_test_LH, X_test_RH], axis=2)

  return X_train_end, X_val_end, X_test_end

networksDict = {'vis':{'startLH':0,'endLH':24,'startRH':151,'endRH':174,'train':[], 'val':[], 'test':[]},
                'SomMot':{'startLH':24,'endLH':53,'startRH':174,'endRH':201,'train':[], 'val':[], 'test':[]},
                'Attn':{'startLH':53,'endLH':85,'startRH':201,'endRH':237,'train':[], 'val':[], 'test':[]},
                'limbic':{'startLH':85,'endLH':95,'startRH':237,'endRH':247,'train':[], 'val':[], 'test':[]},
                'Cont':{'startLH':95,'endLH':112,'startRH':247,'endRH':270,'train':[], 'val':[], 'test':[]},
                'DMN':{'startLH':112,'endLH':150,'startRH':270,'endRH':300,'train':[], 'val':[], 'test':[]},
                'full':{'train':X_train, 'val':X_val, 'test':X_test},
                'fullShuffled':{'train':X_train, 'val':X_val, 'test':X_test},
                'fullOffset':{'train':X_train, 'val':X_val, 'test':X_test}
                }

show_shape = False

for net in networksDict:
  if net == 'full' or net == 'fullShuffled' or net == 'fullOffset': continue
  networksDict[net]['train'], networksDict[net]['val'], networksDict[net]['test'] = GetNetwork(X_train, X_val, X_test, networksDict[net]['startLH'], networksDict[net]['endLH'], networksDict[net]['startRH'], networksDict[net]['endRH'])
  if show_shape:
    print(net + ':')
    print('train: '+ str(networksDict[net]['train'].shape))
    print('val: '+ str(networksDict[net]['val'].shape))
    print('test: '+ str(networksDict[net]['test'].shape))

Encoder build

In [None]:
# set model as sub class of keras.Model

class AttentionBlock(keras.Model):
    def __init__(self, name='AttentionBlock', num_heads=2, head_size=128, ff_dim=None, dropout=0, **kwargs):
        super().__init__(name=name, **kwargs)

        if ff_dim is None:
            ff_dim = head_size

        self.attention = MultiHeadAttention(num_heads=num_heads, key_dim=head_size, dropout=dropout)
        self.attention_dropout = keras.layers.Dropout(dropout)
        self.attention_norm = keras.layers.LayerNormalization(epsilon=1e-6)

        self.ff_conv1 = keras.layers.Conv1D(filters=ff_dim, kernel_size=1, activation='relu')
        # self.ff_conv2 at build()
        self.ff_dropout = keras.layers.Dropout(dropout)
        self.ff_norm = keras.layers.LayerNormalization(epsilon=1e-6)
        self.supports_masking = True

    def build(self, input_shape):
        self.ff_conv2 = keras.layers.Conv1D(filters=input_shape[-1], kernel_size=1) 

    def call(self, inputs):

        causal_mask = self.get_causal_attention_mask(inputs)

        #if mask is not None:
        #    padding_mask = tf.cast(mask[:, :, tf.newaxis], dtype=tf.int32)
        #    combined_mask = tf.cast(mask[:, tf.newaxis, :], dtype=tf.int32)
        #    combined_mask = tf.minimum(combined_mask, causal_mask)

        x, attention_scores = self.attention(inputs, inputs, attention_mask=causal_mask, return_attention_scores=True)
        x = self.attention_dropout(x)
        x = self.attention_norm(inputs + x)

        x = self.ff_conv1(x)
        x = self.ff_conv2(x)
        x = self.ff_dropout(x)

        x = self.ff_norm(inputs + x)
        return x, attention_scores

    def get_causal_attention_mask(self, inputs):
      input_shape = tf.shape(inputs)
      batch_size, sequence_length = input_shape[0], input_shape[1]
      i = tf.range(sequence_length)[:, tf.newaxis]
      j = tf.range(sequence_length)
      mask = tf.cast(i >= j, dtype="int32")
      mask = tf.reshape(mask, (1, input_shape[1], input_shape[1]))
      mult = tf.concat(
          [tf.expand_dims(batch_size, -1), tf.constant([1, 1], dtype=tf.int32)],
          axis=0,
      )
      return tf.tile(mask, mult)

class ModelTrunk(keras.Model):
      def __init__(self, classes, inputs, name='ModelTrunk', num_heads=2, head_size=128, ff_dim=None, num_layers=1, dropout=0, **kwargs):
        super().__init__(name=name, **kwargs)
        self.MaskingLayer = keras.layers.Masking(mask_value=0.0, input_shape = [None, inputs.shape[-1]])
        if ff_dim is None:
            ff_dim = head_size
        self.dropout = dropout
        self.classes = classes
        self.attention_layers = [AttentionBlock(num_heads=num_heads, head_size=head_size, ff_dim=ff_dim, dropout=dropout) for _ in range(num_layers)]
        self.dense_layer = keras.layers.Dense(units = 512, activation = 'relu')
        self.dropout_layer = keras.layers.Dropout(dropout)
        self.final_layer = tf.keras.layers.Dense(classes, activation='softmax')

        
      def call(self, inputs):
        #x = MaskingLayer(inputs)
        x = inputs
        for attention_layer in self.attention_layers:
            x, attention_scores = attention_layer(x)
        x = self.dense_layer(x)
        x = self.dropout_layer(x)
        x = self.final_layer(x)

        return x

In [None]:
# set hyperparameters
EPOCHS = 45
num_layers = [4, 6, 8]
d_model = 300
dff = 260
key_dim = 260
num_heads = [1, 4, 6, 8]
dropout_rate = 0.1
BUFFER_SIZE = 1800
BATCH_SIZE = 64

In [None]:
# train model and get results
for net in networksDict:
  if net is 'fullOffset' or net is 'fullShuffled': continue
  for heads in num_heads:
    for layers in num_layers:
      printLoop(net, heads, layers)
      train_data = tf.data.Dataset.from_tensor_slices((networksDict[net]['train'],y_train))
      val_data = tf.data.Dataset.from_tensor_slices((networksDict[net]['val'],y_val))
      input = networksDict[net]['train']
      train_batch = make_batches_train(train_data)
      val_batch = make_batches_test(val_data)

      myModel = ModelTrunk(inputs=input, name=f'Model_{net}_numHeads_{heads}', num_heads=heads, head_size=key_dim, ff_dim=dff, num_layers=layers, dropout=dropout_rate, classes=(len(clip_time)+1))

      myModel.compile(
          optimizer=keras.optimizers.Adam(learning_rate=1e-4),
          loss=keras.losses.SparseCategoricalCrossentropy(from_logits=False),
          metrics=[keras.metrics.SparseCategoricalAccuracy()]
      )

      # LearningRateScheduler
      callbacks = keras.callbacks.LearningRateScheduler(lr_scheduler, verbose=0)

      # EarlyStopping criteria
      early_stopping = keras.callbacks.EarlyStopping(patience=3, restore_best_weights=True)

      history = myModel.fit(train_batch, validation_data=val_batch, batch_size=BATCH_SIZE, epochs=EPOCHS, callbacks=[callbacks, early_stopping])

      # results directory
      RES_DIR = f'{dirPath}/results/encoder/{net}'
      if not os.path.exists(RES_DIR):
          os.makedirs(RES_DIR)

      # summarize history for accuracy
      fig = plt.figure()
      plt.plot(history.history['sparse_categorical_accuracy'])
      plt.plot(history.history['val_sparse_categorical_accuracy'])
      plt.title(f'{net}: {heads} Heads, {layers} layers\n accuracy')
      plt.ylabel('accuracy')
      plt.xlabel('epoch')
      plt.legend(['train', 'val'], loc='upper left')
      plt.show()
      fig.savefig(RES_DIR + f'/net: {net} numHeads: {heads} layers: {layers} accuracy', dpi=fig.dpi)

      # summarize history for loss
      fig = plt.figure()
      plt.plot(history.history['loss'])
      plt.plot(history.history['val_loss'])
      plt.title(f'{net}: {heads} Heads, {layers} layers\n loss')
      plt.ylabel('loss')
      plt.xlabel('epoch')
      plt.legend(['train', 'val'], loc='upper left')
      plt.show()
      fig.savefig(RES_DIR + f'/net: {net} numHeads: {heads} layers: {layers} loss', dpi=fig.dpi)

      ## val Cmat
      cm, acc = GetCmat(myModel, networksDict[net]['val'], y_val)
      fig = plt.figure()
      disp = ConfusionMatrixDisplay(confusion_matrix=cm)
      disp.plot()
      plt.title(f'{net}: {heads} Heads, {layers} layers\n val acc: {acc:.5}')
      plt.savefig(RES_DIR + f'/net: {net} numHeads: {heads} layers: {layers} Cmat val', dpi=fig.dpi)
      plt.show()

      ## test Cmat
      cm, acc = GetCmat(myModel, networksDict[net]['test'], y_test)
      fig = plt.figure()
      disp = ConfusionMatrixDisplay(confusion_matrix=cm)
      disp.plot()
      plt.title(f'{net}: {heads} Heads, {layers} layers\n test acc: {acc:.5}')
      plt.savefig(RES_DIR + f'/net: {net} numHeads: {heads} layers: {layers} Cmat test', dpi=fig.dpi)
      plt.show()

      # get results
      results, results_prob = Get_Results(args, myModel, networksDict[net]['train'], y_train, train_list, train_len, networksDict[net]['test'], y_test, test_list, test_len, clip_time)

      myModel.save(f'{dirPath}/models/encoder_models/{net}/Model_{net}_numHeads_{heads}_num_layers_{layers}')

      res_path = (RES_DIR + 
                  '/%s_%d_net_%s' %(args.roi_name, args.roi, net) +
                  '_k_layers_%d' %(layers) +
                  '_heads_%d_batch_size_%d' %(heads, BATCH_SIZE) +
                  '_num_epochs_%d.pkl' %(EPOCHS))

      # save results
      with open(res_path, 'wb') as f:
          pickle.dump([results, results_prob], f)

In [None]:
# train model and get results on offset and shuffled data

y_shuffled_t, y_offset_t = ShuffleAndOffset(y_train)

for net, y_train_copy in zip(['fullShuffled', 'fullOffset'], [y_shuffled_t, y_offset_t]):
  for heads in num_heads:
    for layers in num_layers:
      printLoop(net, heads, layers)
      train_data = tf.data.Dataset.from_tensor_slices((networksDict[net]['train'],y_train_copy))
      val_data = tf.data.Dataset.from_tensor_slices((networksDict[net]['val'],y_val))
      input = networksDict[net]['train']
      train_batch = make_batches_train(train_data)
      val_batch = make_batches_test(val_data)

      myModel = ModelTrunk(inputs=input, name=f'Model_{net}_numHeads_{heads}', num_heads=heads, head_size=key_dim, ff_dim=dff, num_layers=layers, dropout=dropout_rate, classes=(len(clip_time)+1))

      myModel.compile(
          optimizer=keras.optimizers.Adam(learning_rate=1e-4),
          loss=keras.losses.SparseCategoricalCrossentropy(from_logits=False),
          metrics=[keras.metrics.SparseCategoricalAccuracy()]
      )

      callbacks = keras.callbacks.LearningRateScheduler(lr_scheduler, verbose=0)

      # EarlyStopping criteria
      early_stopping = keras.callbacks.EarlyStopping(patience=3, restore_best_weights=True)

      # history = myModel.fit(train_batch, validation_data=val_batch, batch_size=BATCH_SIZE, epochs=EPOCHS, callbacks=[callbacks, early_stopping])
      history = myModel.fit(train_batch, batch_size=BATCH_SIZE, epochs=EPOCHS, callbacks=[callbacks]) # without val for offset

      # results directory
      RES_DIR = f'{dirPath}/results/encoder/{net}'
      if not os.path.exists(RES_DIR):
          os.makedirs(RES_DIR)

      # summarize history for accuracy
      fig = plt.figure()
      plt.plot(history.history['sparse_categorical_accuracy'])
      # plt.plot(history.history['val_sparse_categorical_accuracy'])
      plt.title(f'{net}: {heads} Heads, {layers} layers\n accuracy')
      plt.ylabel('accuracy')
      plt.xlabel('epoch')
      # plt.legend(['train', 'val'], loc='upper left')
      plt.show()
      fig.savefig(RES_DIR + f'/net: {net} numHeads: {heads} layers: {layers} accuracy', dpi=fig.dpi)

      # summarize history for loss
      fig = plt.figure()
      plt.plot(history.history['loss'])
      # plt.plot(history.history['val_loss'])
      plt.title(f'{net}: {heads} Heads, {layers} layers\n loss')
      plt.ylabel('loss')
      plt.xlabel('epoch')
      # plt.legend(['train', 'val'], loc='upper left')
      plt.show()
      fig.savefig(RES_DIR + f'/net: {net} numHeads: {heads} layers: {layers} loss', dpi=fig.dpi)

      ## val Cmat
      cm, acc = GetCmat(myModel, networksDict[net]['val'], y_val)
      fig = plt.figure()
      disp = ConfusionMatrixDisplay(confusion_matrix=cm)
      disp.plot()
      plt.title(f'{net}: {heads} Heads, {layers} layers\n val acc: {acc:.5}')
      plt.savefig(RES_DIR + f'/net: {net} numHeads: {heads} layers: {layers} Cmat val', dpi=fig.dpi)
      plt.show()

      ## test Cmat
      cm, acc = GetCmat(myModel, networksDict[net]['test'], y_test)
      fig = plt.figure()
      disp = ConfusionMatrixDisplay(confusion_matrix=cm)
      disp.plot()
      plt.title(f'{net}: {heads} Heads, {layers} layers\n test acc: {acc:.5}')
      plt.savefig(RES_DIR + f'/net: {net} numHeads: {heads} layers: {layers} Cmat test', dpi=fig.dpi)
      plt.show()

      # get results
      results, results_prob = Get_Results(args, myModel, networksDict[net]['train'], y_train, train_list, train_len, networksDict[net]['test'], y_test, test_list, test_len, clip_time)

      myModel.save(f'{dirPath}/models/encoder_models/{net}/Model_{net}_numHeads_{heads}_num_layers_{layers}')

      res_path = (RES_DIR + 
                  '/%s_%d_net_%s' %(args.roi_name, args.roi, net) +
                  '_k_layers_%d' %(layers) +
                  '_heads_%d_batch_size_%d' %(heads, BATCH_SIZE) +
                  '_num_epochs_%d.pkl' %(EPOCHS))

      # save results
      with open(res_path, 'wb') as f:
          pickle.dump([results, results_prob], f)