#Modules

In [1]:
import os
import torch 
import numpy as np
import pandas as pd
import h5py
import math
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, Subset
from torchvision import datasets, transforms
from torch.cuda.amp import GradScaler
from sklearn.metrics import f1_score
from google.colab import drive
from collections import deque
drive.mount('/content/drive')
FMRI_DIR_STND = '/content/drive/Shareddrives/Summer Intership 2022 - Brain Dataset/data/standardized_betas/subj01'
PATH = '/content/drive/Shareddrives/Summer Intership 2022 - Brain Dataset/data/standardized_betas/checkpoint' #path for checkpoint

Mounted at /content/drive


#Debugger

In [None]:
!pip install ipdb
import ipdb

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting ipdb
  Downloading ipdb-0.13.9.tar.gz (16 kB)
Collecting ipython>=7.17.0
  Downloading ipython-7.34.0-py3-none-any.whl (793 kB)
[K     |████████████████████████████████| 793 kB 5.1 MB/s 
Collecting prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0
  Downloading prompt_toolkit-3.0.30-py3-none-any.whl (381 kB)
[K     |████████████████████████████████| 381 kB 49.8 MB/s 
Building wheels for collected packages: ipdb
  Building wheel for ipdb (setup.py) ... [?25l[?25hdone
  Created wheel for ipdb: filename=ipdb-0.13.9-py3-none-any.whl size=11648 sha256=12bb7a656fc1d65e5ca83b25f2213a92312b63ee95c3f5180bb2d904a8c0ce6c
  Stored in directory: /root/.cache/pip/wheels/65/cd/cc/aaf92acae337a28fdd2aa4d632196a59745c8c39f76eaeed01
Successfully built ipdb
Installing collected packages: prompt-toolkit, ipython, ipdb
  Attempting uninstall: prompt-toolkit
    Found existing installation: prompt-to

#Dataset Class

In [2]:
class DatasetNSD(Dataset):
  def __init__(self):
      #fmri_files will store all the betas files in the given directory
      self.dir = FMRI_DIR_STND
      self.fmri_files = self.getDirFiles()
      # num_of_scans is set by getRanges
      self.num_of_scans = 0
      # index_ranges is a dictionery with index range as key and corresponding file as value
      self.index_ranges =  self.getRanges()
      self.file_handlers = {}
      self.open_files()

  def __del__(self):
    for key, value in self.file_handlers.items():
      value.close()


  def __len__(self):
    return self.num_of_scans 

#Input: hdf5 file name 
#Output: store hdf5 in deque, return file object
  def open_files(self): 
    for file_name in self.fmri_files:
      path = os.path.join(FMRI_DIR_STND, file_name)
      self.file_handlers[file_name] = h5py.File(path, 'r')

   
  def __getitem__(self, index):
    #index will be 0-17908
    # files come in diffirent sizes of fmri scans.
    # a file will be chose based on the range of the index 
    for key in self.index_ranges:
      index_range = list(key)
      if (index in index_range):
        f = self.file_handlers[self.index_ranges[key]]
        indx = index - f['startIndx/i'][0]
        sample = torch.FloatTensor(np.array(f['betas/b'][indx]))
        label = torch.FloatTensor(np.array(f['labels/l'][indx]))
        break
    return (sample, label)

     
  def getDirFiles(self):
     files = [f for f in os.listdir(self.dir) if 
              os.path.isfile(os.path.join(self.dir, f)) and
              f[-5:] == '.hdf5']

     files.sort()                          
     return files

  def getRanges(self):
    ranges = {}
    previous = 0
    count = 0
    for x in self.fmri_files:
      with h5py.File(os.path.join(self.dir, x), "r") as f:    
          size = f['labels/l'].shape[0]
          count = count + size
          if previous == 0:
            ranges[range(size)] = x
            previous = size
          else:
            size = size + previous
            i = range(previous,size)
            ranges[i] = x
            previous = size
    self.num_of_scans = count
    return ranges

#NeuralNet3

In [3]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.kernel = 2
        self.stride = 2 
        self.conv_stack = nn.Sequential(
        nn.Conv3d(1,out_channels=64 , kernel_size = 3, stride = 3),
        nn.LeakyReLU(),
        nn.Conv3d(in_channels=64,out_channels=32 , kernel_size = self.kernel, stride = self.stride),
        nn.LeakyReLU(),
        nn.Conv3d(in_channels=32,out_channels=16 , kernel_size = self.kernel, stride = self.stride),
        nn.LeakyReLU(),
        nn.Flatten()
        )
        self.linear_stack = nn.Sequential(
        nn.Linear(4608,2000),
        nn.ReLU(),
        nn.Dropout(p=0.2),
        nn.Linear(2000,1000),
        nn.ReLU(),
        nn.Dropout(p=0.2),
        nn.Linear(1000,500),
        nn.ReLU(),
        nn.Dropout(p=0.2),
        nn.Linear(500,256),
        nn.ReLU(),
        nn.Linear(256,128),
        nn.ReLU(),
        nn.Linear(128,64),
        nn.ReLU(),
        nn.Linear(64, 1),
          )

    # # [(input_volume−kernel_size)/stride]+1
    # def output_size(self,volume):
    #   size = ((volume -self.kernel)//self.stride) + 1
    #   return size

    def forward(self, x):
        x = self.conv_stack(x.unsqueeze(dim=1))
        x = self.linear_stack(x)
        return x

#DataLoaders

In [None]:
BATCH_SIZE = 199
ds = DatasetNSD()
split = len(ds)//6
split = len(ds)-split
#split =14925
train_dataset = Subset(ds, range(len(ds))[: split] )
test_dataset = Subset(ds, range(len(ds))[split: ])
train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False)

#Initialize NeuralNet

In [None]:
#device config
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

#hyper parameter
EPOCHS = 20
LEARNING_RATE = 0.0001
WEIGHT_DECAY = 0.01

modelNN = CNN().to(device)
# modelNN.load_state_dict(torch.load(os.path.join(PATH,'model_weights.pth')))
# weight = torch.FloatTensor([.322]).to(device)
loss_func = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(modelNN.parameters(), lr = LEARNING_RATE)

print(modelNN)

#Training Loop

In [None]:
from tqdm import tqdm
def trainingLoop(dataloader, model, loss_func, optimizer , test_loader):
  n_total_steps = len(dataloader)
  scaler = GradScaler()
  trues = []
  preds = []
  for epoch in range(EPOCHS):
    model.train()
    for i, (sample, true_label) in enumerate(train_loader):
      sample = sample.to(device)
      trues.extend(true_label.numpy())
      true_label = true_label.to(device).unsqueeze(dim=1)
      pred_label = model(sample)
      loss = loss_func(pred_label, true_label)
      train_loss =  loss.item()
      #backward pass
      optimizer.zero_grad()
      scaler.scale(loss).backward()
      scaler.step(optimizer)
      scaler.update()
      #predictioins
      pred = torch.sigmoid(pred_label).detach().cpu()
      preds.extend(np.round(pred.reshape(-1)).numpy())
      if (i+1) % 2 == 0:
        print(f'epoch {epoch+1}/{EPOCHS},step {i+1}/{n_total_steps}, loss = {(train_loss):.4f} ')   
    score = f1_score(trues, preds)
    print(f'F1 score for training set is: {score}')
    torch.save(model.state_dict(), os.path.join(PATH,'model_weights.pth'))
    # print(f'Train loss for epoch: {train_loss/n_total_steps} ')
    # predTesting(test_loader, model)

def predTesting(dataloader, model): 
  model.eval()
  with torch.no_grad():
    n_samples = len(dataloader)
    # trues = []
    # preds = []
    correct = 0
    for  sample, true_label in tqdm(dataloader):
      sample = sample.to(device)
      # trues.extend(true_label.numpy())
      true_label = true_label.to(device).unsqueeze(dim=1)
      pred_label = model(sample)
      #predictioins
    #   pred = torch.sigmoid(pred_label).cpu()
    #   preds.extend(np.round(pred.reshape(-1)).numpy())
    # score = f1_score(trues, preds)
    print(f'F1 score for training set is: {score}')

In [None]:
trainingLoop(train_loader, modelNN, loss_func, optimizer, test_loader)

epoch 1/20,step 2/75, loss = 0.7141 
epoch 1/20,step 4/75, loss = 0.7079 
epoch 1/20,step 6/75, loss = 0.7069 
epoch 1/20,step 8/75, loss = 0.7069 
epoch 1/20,step 10/75, loss = 0.7031 
epoch 1/20,step 12/75, loss = 0.6957 
epoch 1/20,step 14/75, loss = 0.6861 
epoch 1/20,step 16/75, loss = 0.6732 
epoch 1/20,step 18/75, loss = 0.6410 
epoch 1/20,step 20/75, loss = 0.6073 
epoch 1/20,step 22/75, loss = 0.5876 
epoch 1/20,step 24/75, loss = 0.5575 
epoch 1/20,step 26/75, loss = 0.5728 
epoch 1/20,step 28/75, loss = 0.5526 
epoch 1/20,step 30/75, loss = 0.5817 


In [None]:
predTesting(test_loader, modelNN, loss_func)

100%|██████████| 150/150 [01:01<00:00,  2.45it/s]

F1 score for training set is: 0.8692686623721106



