#Install necessary packages

In [None]:
!pip install nilearn==0.9.2
!pip install optuna



#Import libraries

In [None]:
import os
import numpy as np
from pathlib import Path
from PIL import Image
import matplotlib
from matplotlib import pyplot as plt
from nilearn import datasets, plotting
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import models, transforms
import optuna

#Mount to drive

In [None]:
from google.colab import drive
drive.mount('/content/drive/', force_remount=True)
data_dir = '/content/drive/MyDrive/algonauts_2023_tutorial_data'
parent_submission_dir = '/content/drive/MyDrive/algonauts_2023_challenge_submission'

Mounted at /content/drive/


#Select device

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = torch.device(device)
print(device)

cuda


#Import data

In [None]:
subj = 1 #@param ["1", "2", "3", "4", "5", "6", "7", "8"] {type:"raw", allow-input: true}

In [None]:
class argObj:
  def __init__(self, data_dir, parent_submission_dir, subj):

    self.subj = format(subj, '02')
    self.data_dir = os.path.join(data_dir, 'subj'+self.subj)
    self.parent_submission_dir = parent_submission_dir
    self.subject_submission_dir = os.path.join(self.parent_submission_dir,
        'subj'+self.subj)

args = argObj(data_dir, parent_submission_dir, subj)

In [None]:
fmri_dir = os.path.join(args.data_dir, 'training_split', 'training_fmri')
lh_fmri = np.load(os.path.join(fmri_dir, 'lh_training_fmri.npy'))

print('\nLH training fMRI data shape:')
print(lh_fmri.shape)
print('(Training stimulus images × LH vertices)')


LH training fMRI data shape:
(9841, 19004)
(Training stimulus images × LH vertices)


In [None]:
train_img_dir  = os.path.join(args.data_dir, 'training_split', 'training_images')
test_img_dir  = os.path.join(args.data_dir, 'test_split', 'test_images')

# Create lists will all training and test image file names, sorted
train_img_list = os.listdir(train_img_dir)
train_img_list.sort()
test_img_list = os.listdir(test_img_dir)
test_img_list.sort()
print('Training images: ' + str(len(train_img_list)))
print('Test images: ' + str(len(test_img_list)))

Training images: 9841
Test images: 159


#Train Validation and Test Split

In [None]:
rand_seed = 5
np.random.seed(rand_seed)

# Calculate how many stimulus images correspond to 90% of the training data
num_train = int(np.round(len(train_img_list) / 100 * 90))
# Shuffle all training stimulus images
idxs = np.arange(len(train_img_list))
np.random.shuffle(idxs)
# Assign 90% of the shuffled stimulus images to the training partition,
# and 10% to the test partition
idxs_train, idxs_val = idxs[:num_train], idxs[num_train:]
# No need to shuffle or split the test stimulus images
idxs_test = np.arange(len(test_img_list))

print('Training stimulus images: ' + format(len(idxs_train)))
print('\nValidation stimulus images: ' + format(len(idxs_val)))
print('\nTest stimulus images: ' + format(len(idxs_test)))

Training stimulus images: 8857

Validation stimulus images: 984

Test stimulus images: 159


#Dataloader

In [None]:
# Define the custom dataset
class ImageDataset(Dataset):
    def __init__(self, imgs_paths, idxs, transform):
        self.imgs_paths = np.array(imgs_paths)[idxs]
        self.transform = transform

    def __len__(self):
        return len(self.imgs_paths)

    def __getitem__(self, idx):
        img_path = self.imgs_paths[idx]
        img = Image.open(img_path).convert('RGB')
        if self.transform:
            img = self.transform(img).to(device)
        return img

#Transfer Learning and Training Loop

In [None]:

# Define the transform for image preprocessing
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # resize the images to 224x224 pixels
    transforms.ToTensor(),  # convert the images to a PyTorch tensor
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])  # normalize the images color channels
])

# Remove the last layer of the pretrained model
model = models.resnet50(pretrained=True)
model.to(device)
model.eval()
feature_extractor = nn.Sequential(*list(model.children())[:-1])

class LinearizingEncodingModel(nn.Module):
    def __init__(self, input_dim, output_dim, hidden_dim1, hidden_dim2, activation1, activation2,
                 bnorm1, bnorm2, dropout1, dropout_ratio1, dropout2, dropout_ratio2):
        super(LinearizingEncodingModel, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim1)
        if activation1:
            self.activation1 = activation1()
        if bnorm1:
            self.batchnorm1 = nn.BatchNorm1d(hidden_dim1)
        if dropout1:
            self.dropout1 = nn.Dropout(dropout_ratio1)
        self.fc2 = nn.Linear(hidden_dim1, hidden_dim2)
        if activation2:
            self.activation2 = activation2()
        if bnorm2:
            self.batchnorm2 = nn.BatchNorm1d(hidden_dim2)
        if dropout2:
            self.dropout2 = nn.Dropout(dropout_ratio2)
        self.fc3 = nn.Linear(hidden_dim2, output_dim)

    def forward(self, x):
        x = self.fc1(x) #initial
        if hasattr(self, 'activation1'):
            x = self.activation1(x) #initial
        if hasattr(self, 'batchnorm1'):
            x = self.batchnorm1(x)
        if hasattr(self, 'dropout1'):
            x = self.dropout1(x)
        x = self.fc2(x) #initial
        if hasattr(self, 'activation2'):
            x = self.activation2(x)
        if hasattr(self, 'batchnorm2'):
            x = self.batchnorm2(x)
        if hasattr(self, 'dropout2'):
            x = self.dropout2(x)
        x = self.fc3(x)
        return x

def train_linearizing_encoding_model(network, train_dataloader, train_targets, val_dataloader, val_targets, num_epochs, batch_size, loss_function, optimizer, learning_rate, save_name=None, patience=3):
    criterion = loss_function
    optimizer = optimizer(network.parameters(), lr=learning_rate)
    network.train()

    train_losses = []
    val_losses = []

    best_val_loss = float('inf')
    best_epoch = 0
    early_stopping_counter = 0

    for epoch in range(num_epochs):
        training_loss = 0.0
        for index, data in enumerate(train_dataloader):



            inputs = data.to(device)
            inputs = feature_extractor(inputs)
            inputs = inputs.view(inputs.size(0), -1)


            #targets = torch.zeros(inputs.size(0), output_dim).to(device)  # Change target dimension to output_dim
            targets_batch = torch.tensor(train_targets[index*batch_size : index*batch_size + batch_size if index+batch_size <= train_targets.shape[0] else train_targets.shape[0]-index*batch_size]).to(device)
            #print(index*batch_size, index*batch_size + batch_size)

            optimizer.zero_grad()
            outputs = network(inputs)
            loss = criterion(outputs, targets_batch)
            loss.backward()
            optimizer.step()

            training_loss += loss.item()

        training_loss /= len(train_dataloader)
        train_losses.append(training_loss)
        print(f'Epoch {epoch + 1}/{num_epochs}, Training Loss: {training_loss:.4f}')

        network.eval()
        val_loss = 0.0
        with torch.no_grad():
            for index, data in enumerate(val_dataloader):
                inputs = data.to(device)
                inputs = feature_extractor(inputs)
                inputs = inputs.view(inputs.size(0), -1)

                targets_batch = torch.tensor(val_targets[index * batch_size: (index + 1) * batch_size]).to(device)

                outputs = network(inputs)
                loss = criterion(outputs, targets_batch)
                val_loss += loss.item()

        val_loss /= len(val_dataloader)
        val_losses.append(val_loss)
        print(f'Validation - Epoch {epoch + 1}/{num_epochs}, Validation Loss: {val_loss:.4f}')

        # Check if the current validation loss is the best so far
        if round(val_loss, 2) < round(best_val_loss, 2):
            best_val_loss = val_loss
            best_epoch = epoch
            early_stopping_counter = 0
        else:
            early_stopping_counter += 1

        # Check if early stopping criterion is met
        if early_stopping_counter >= patience:
            print(f'Early stopping triggered. No improvement in {patience} epochs.')
            break

    if save_name:
        torch.save(network.state_dict(), save_name+'.pt')

    return train_losses, val_losses




#Hyperparameter Tuning with Optuna

In [None]:
#Train and validation targets
lh_fmri_train = lh_fmri[idxs_train]
lh_fmri_val = lh_fmri[idxs_val]

with torch.no_grad():
    sample_input = torch.zeros(1, 3, 224, 224).to(device)
    output = feature_extractor(sample_input)

input_dim = output.shape[1] # Set the dimensions for input and output of thr pretrained model
output_dim = lh_fmri_train.shape[1]


def objective(trial):
    batch_size = trial.suggest_categorical("batch_size", [150])
    hidden_dim1 = trial.suggest_int("hidden_dim1", int(input_dim/16), int(input_dim/2) , log=True)
    hidden_dim2 = trial.suggest_int("hidden_dim2", int(hidden_dim1/16), int(hidden_dim1/2) , log=True)
    num_epochs = trial.suggest_categorical("num_epochs", [3])
    activation1 = trial.suggest_categorical("activation1", [nn.ReLU,  nn.Tanh])
    activation2 = trial.suggest_categorical("activation2", [nn.ReLU,  nn.Tanh])
    bnorm1 = trial.suggest_categorical("bnorm1", [True, False])
    bnorm2 = trial.suggest_categorical("bnorm2", [True, False])
    dropout1 = trial.suggest_categorical("dropout1", [True, False])
    dropout_ratio1 = trial.suggest_uniform("dropout_ratio1", 0.0, 0.5)
    dropout2 = trial.suggest_categorical("dropout2", [True, False])
    dropout_ratio2 = trial.suggest_uniform("dropout_ratio2", 0.0, 0.5)
    learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-1)
    optimizer = trial.suggest_categorical("optimizer", [optim.Adam, optim.SGD])
    loss_function = nn.MSELoss()



    train_imgs_paths = sorted(list(Path(train_img_dir).iterdir()))
    test_imgs_paths = sorted(list(Path(test_img_dir).iterdir()))
    train_imgs_dataloader = DataLoader(
        ImageDataset(train_imgs_paths, idxs_train, transform),
        batch_size=batch_size
    )
    val_imgs_dataloader = DataLoader(
        ImageDataset(train_imgs_paths, idxs_val, transform),
        batch_size=batch_size
    )


    # Train the dimensionality reduction network
    network = LinearizingEncodingModel(input_dim, output_dim, hidden_dim1,  hidden_dim2, activation1, activation2, bnorm1, bnorm2, dropout1, dropout_ratio1, dropout2, dropout_ratio2).to(device)
    return train_linearizing_encoding_model(network, train_imgs_dataloader, lh_fmri_train, val_imgs_dataloader, lh_fmri_val, num_epochs, batch_size, loss_function, optimizer, learning_rate,  None, 3 )[0][-1]

study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=10)




[I 2023-07-05 12:15:31,145] A new study created in memory with name: no-name-dd28318f-89e9-4a56-94bb-6d4e5338e67b
  dropout_ratio1 = trial.suggest_uniform("dropout_ratio1", 0.0, 0.5)
  dropout_ratio2 = trial.suggest_uniform("dropout_ratio2", 0.0, 0.5)
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-1)


Epoch 1/3, Training Loss: 0.6870
Validation - Epoch 1/3, Validation Loss: 0.4830
Epoch 2/3, Training Loss: 0.4631
Validation - Epoch 2/3, Validation Loss: 0.4541
Epoch 3/3, Training Loss: 0.4498


[I 2023-07-05 12:26:00,122] Trial 0 finished with value: 0.4497518594066302 and parameters: {'batch_size': 150, 'hidden_dim1': 155, 'hidden_dim2': 53, 'num_epochs': 3, 'activation1': <class 'torch.nn.modules.activation.Tanh'>, 'activation2': <class 'torch.nn.modules.activation.Tanh'>, 'bnorm1': True, 'bnorm2': True, 'dropout1': True, 'dropout_ratio1': 0.3861153052639878, 'dropout2': True, 'dropout_ratio2': 0.18215089186979622, 'learning_rate': 0.0005554173532824683, 'optimizer': <class 'torch.optim.adam.Adam'>}. Best is trial 0 with value: 0.4497518594066302.


Validation - Epoch 3/3, Validation Loss: 0.4497
Epoch 1/3, Training Loss: 0.6986
Validation - Epoch 1/3, Validation Loss: 0.6379
Epoch 2/3, Training Loss: 1.1860
Validation - Epoch 2/3, Validation Loss: 0.8777
Epoch 3/3, Training Loss: 0.7753


[I 2023-07-05 12:36:18,847] Trial 1 finished with value: 0.7753406713406245 and parameters: {'batch_size': 150, 'hidden_dim1': 241, 'hidden_dim2': 118, 'num_epochs': 3, 'activation1': <class 'torch.nn.modules.activation.Tanh'>, 'activation2': <class 'torch.nn.modules.activation.ReLU'>, 'bnorm1': False, 'bnorm2': True, 'dropout1': False, 'dropout_ratio1': 0.29256293199133454, 'dropout2': True, 'dropout_ratio2': 0.10894141018599801, 'learning_rate': 0.09162672224547891, 'optimizer': <class 'torch.optim.sgd.SGD'>}. Best is trial 0 with value: 0.4497518594066302.


Validation - Epoch 3/3, Validation Loss: 0.7057
Epoch 1/3, Training Loss: 0.8341
Validation - Epoch 1/3, Validation Loss: 0.5609
Epoch 2/3, Training Loss: 0.5269
Validation - Epoch 2/3, Validation Loss: 0.5920
Epoch 3/3, Training Loss: 0.5360


[I 2023-07-05 12:46:29,847] Trial 2 finished with value: 0.5359674145778021 and parameters: {'batch_size': 150, 'hidden_dim1': 565, 'hidden_dim2': 45, 'num_epochs': 3, 'activation1': <class 'torch.nn.modules.activation.Tanh'>, 'activation2': <class 'torch.nn.modules.activation.Tanh'>, 'bnorm1': False, 'bnorm2': False, 'dropout1': False, 'dropout_ratio1': 0.22832029701436313, 'dropout2': False, 'dropout_ratio2': 0.1616875511504542, 'learning_rate': 0.08714310657529234, 'optimizer': <class 'torch.optim.adam.Adam'>}. Best is trial 0 with value: 0.4497518594066302.


Validation - Epoch 3/3, Validation Loss: 0.6128
Epoch 1/3, Training Loss: 0.5295
Validation - Epoch 1/3, Validation Loss: 0.5252
Epoch 2/3, Training Loss: 0.5229
Validation - Epoch 2/3, Validation Loss: 0.5251
Epoch 3/3, Training Loss: 0.5228


[I 2023-07-05 12:56:54,066] Trial 3 finished with value: 0.5227948983510335 and parameters: {'batch_size': 150, 'hidden_dim1': 373, 'hidden_dim2': 25, 'num_epochs': 3, 'activation1': <class 'torch.nn.modules.activation.Tanh'>, 'activation2': <class 'torch.nn.modules.activation.ReLU'>, 'bnorm1': False, 'bnorm2': False, 'dropout1': True, 'dropout_ratio1': 0.4579320134437364, 'dropout2': True, 'dropout_ratio2': 0.02170159309245112, 'learning_rate': 4.668069854824824e-05, 'optimizer': <class 'torch.optim.sgd.SGD'>}. Best is trial 0 with value: 0.4497518594066302.


Validation - Epoch 3/3, Validation Loss: 0.5250
Epoch 1/3, Training Loss: 0.5183
Validation - Epoch 1/3, Validation Loss: 0.5113
Epoch 2/3, Training Loss: 0.5087
Validation - Epoch 2/3, Validation Loss: 0.5109
Epoch 3/3, Training Loss: 0.5085


[I 2023-07-05 13:07:17,502] Trial 4 finished with value: 0.5084914450844129 and parameters: {'batch_size': 150, 'hidden_dim1': 491, 'hidden_dim2': 52, 'num_epochs': 3, 'activation1': <class 'torch.nn.modules.activation.ReLU'>, 'activation2': <class 'torch.nn.modules.activation.Tanh'>, 'bnorm1': False, 'bnorm2': False, 'dropout1': True, 'dropout_ratio1': 0.4801581011222472, 'dropout2': True, 'dropout_ratio2': 0.4610215926380868, 'learning_rate': 1.010549930694834e-05, 'optimizer': <class 'torch.optim.adam.Adam'>}. Best is trial 0 with value: 0.4497518594066302.


Validation - Epoch 3/3, Validation Loss: 0.5107
Epoch 1/3, Training Loss: 0.5115
Validation - Epoch 1/3, Validation Loss: 0.5099
Epoch 2/3, Training Loss: 0.5073
Validation - Epoch 2/3, Validation Loss: 0.5091
Epoch 3/3, Training Loss: 0.5067


[I 2023-07-05 13:17:40,174] Trial 5 finished with value: 0.5066520874698957 and parameters: {'batch_size': 150, 'hidden_dim1': 398, 'hidden_dim2': 130, 'num_epochs': 3, 'activation1': <class 'torch.nn.modules.activation.Tanh'>, 'activation2': <class 'torch.nn.modules.activation.ReLU'>, 'bnorm1': False, 'bnorm2': False, 'dropout1': True, 'dropout_ratio1': 0.36525276151080455, 'dropout2': False, 'dropout_ratio2': 0.47071524661801933, 'learning_rate': 0.006044678842679579, 'optimizer': <class 'torch.optim.sgd.SGD'>}. Best is trial 0 with value: 0.4497518594066302.


Validation - Epoch 3/3, Validation Loss: 0.5086
Epoch 1/3, Training Loss: 0.9380
Validation - Epoch 1/3, Validation Loss: 0.7948
Epoch 2/3, Training Loss: 0.6385
Validation - Epoch 2/3, Validation Loss: 0.6140
Epoch 3/3, Training Loss: 0.5968


[I 2023-07-05 13:28:01,096] Trial 6 finished with value: 0.5967582374811172 and parameters: {'batch_size': 150, 'hidden_dim1': 445, 'hidden_dim2': 46, 'num_epochs': 3, 'activation1': <class 'torch.nn.modules.activation.Tanh'>, 'activation2': <class 'torch.nn.modules.activation.ReLU'>, 'bnorm1': True, 'bnorm2': True, 'dropout1': True, 'dropout_ratio1': 0.06512706855733147, 'dropout2': True, 'dropout_ratio2': 0.2419851314813895, 'learning_rate': 3.222012383111736e-05, 'optimizer': <class 'torch.optim.adam.Adam'>}. Best is trial 0 with value: 0.4497518594066302.


Validation - Epoch 3/3, Validation Loss: 0.5853
Epoch 1/3, Training Loss: 0.5572
Validation - Epoch 1/3, Validation Loss: 0.5512
Epoch 2/3, Training Loss: 0.5477
Validation - Epoch 2/3, Validation Loss: 0.5492
Epoch 3/3, Training Loss: 0.5458


[I 2023-07-05 13:38:28,435] Trial 7 finished with value: 0.5457790940999985 and parameters: {'batch_size': 150, 'hidden_dim1': 857, 'hidden_dim2': 326, 'num_epochs': 3, 'activation1': <class 'torch.nn.modules.activation.ReLU'>, 'activation2': <class 'torch.nn.modules.activation.ReLU'>, 'bnorm1': True, 'bnorm2': False, 'dropout1': False, 'dropout_ratio1': 0.45940660757517016, 'dropout2': False, 'dropout_ratio2': 0.13691497090556254, 'learning_rate': 0.0002540393697918865, 'optimizer': <class 'torch.optim.sgd.SGD'>}. Best is trial 0 with value: 0.4497518594066302.


Validation - Epoch 3/3, Validation Loss: 0.5474
Epoch 1/3, Training Loss: 0.5577
Validation - Epoch 1/3, Validation Loss: 0.5357
Epoch 2/3, Training Loss: 0.5079
Validation - Epoch 2/3, Validation Loss: 0.5086
Epoch 3/3, Training Loss: 0.5062


[I 2023-07-05 13:48:53,310] Trial 8 finished with value: 0.5061940461397171 and parameters: {'batch_size': 150, 'hidden_dim1': 441, 'hidden_dim2': 79, 'num_epochs': 3, 'activation1': <class 'torch.nn.modules.activation.ReLU'>, 'activation2': <class 'torch.nn.modules.activation.ReLU'>, 'bnorm1': True, 'bnorm2': False, 'dropout1': False, 'dropout_ratio1': 0.4808964733466174, 'dropout2': True, 'dropout_ratio2': 0.3245507484827267, 'learning_rate': 0.0681762060257759, 'optimizer': <class 'torch.optim.sgd.SGD'>}. Best is trial 0 with value: 0.4497518594066302.


Validation - Epoch 3/3, Validation Loss: 0.5084
Epoch 1/3, Training Loss: 0.8184
Validation - Epoch 1/3, Validation Loss: 0.7821
Epoch 2/3, Training Loss: 1.7987
Validation - Epoch 2/3, Validation Loss: 0.6552
Epoch 3/3, Training Loss: 0.6012


[I 2023-07-05 13:59:18,987] Trial 9 finished with value: 0.6012042750914891 and parameters: {'batch_size': 150, 'hidden_dim1': 128, 'hidden_dim2': 40, 'num_epochs': 3, 'activation1': <class 'torch.nn.modules.activation.Tanh'>, 'activation2': <class 'torch.nn.modules.activation.Tanh'>, 'bnorm1': False, 'bnorm2': True, 'dropout1': False, 'dropout_ratio1': 0.22553558257565942, 'dropout2': False, 'dropout_ratio2': 0.0055179588701198545, 'learning_rate': 0.06331878363042374, 'optimizer': <class 'torch.optim.sgd.SGD'>}. Best is trial 0 with value: 0.4497518594066302.


Validation - Epoch 3/3, Validation Loss: 0.5570
