In [15]:
import os
import sys
from pathlib import Path
import pandas as pd
import numpy as np
import re
import glob
import h5py
from bids import BIDSLayout
import nibabel as nib
from PIL import Image
import json

from sklearn.decomposition import PCA
from sklearn.linear_model import SGDRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import MinMaxScaler

import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
from torchvision.models import  ResNet50_Weights
import torchvision.transforms as transforms

dir2 = os.path.abspath('..')
dir1 = os.path.dirname(dir2)
if not dir1 in sys.path: 
    sys.path.append(dir1)
from tc2see import load_data

### Global Variables

```
ROIs = {
    "FFC": [18],"V1": [1],"V2": [4],"V3": [5],"V3A": [13],"V3B": [19],"V3CD": [158],"V4": [6],"V6": [3],"V7": [16],
    "V8": [7], "VMV1": [153],"VMV2": [160],"VMV3": [154],"LO1": [20],"LO2": [21],"PIT": [22],"VVC": [163], "140": [140], "11":[11],
    "85": [85], "83":[83], "82": [82], "87": [87], "V1_V2_V3_V4": [1,4,5,6], "V1_V2": [1,4], "PIT_FFC_VVC": [22, 18, 163]
}
```

In [16]:
subject_str = '06'
layers = ["layer1", "layer4"]
ROIs = {"V1_V2": [1,4], "PIT_FFC_VVC": [22, 18, 163]}

In [20]:
dataset_root = Path('E:\\fmri_processing\\results')
dataset_path = dataset_root
dataset_layout = BIDSLayout(dataset_path / 'TC2See')
derivatives_path = dataset_path / 'derivatives_TC2See'
data_path = derivatives_path / 'fmriprep'

tc2see_version = 3
tr = 2
num_runs = 6

stimulus_images = h5py.File(derivatives_path / 'stimulus-images.hdf5', 'r')
stimulus_id_map = {i: name for i, name in enumerate(stimulus_images.attrs['stimulus_names'])}
images_dir = Path("E:/Decoding/bird_data/bird_images/docs/cropped")

load_data_params = dict(
    path = data_path / f'tc2see-v{tc2see_version}-fsaverage-surfs.hdf5', 
    tr_offset = num_runs / tr,
    run_normalize='linear_trend',
    interpolation=False,
)

### Representation Similarity Analysis

#### Create masks for relevant ROIs or ROI combinations

In [21]:
glasser_L = nib.freesurfer.io.read_annot("E:/fmri_processing/results/visualization/atlas/lh.HCPMMP1.annot")
glasser_R = nib.freesurfer.io.read_annot("E:/fmri_processing/results/visualization/atlas/rh.HCPMMP1.annot")

ROI_masks = {}

for key, vals in ROIs.items():

    # mask glasser atlas to mark current loop ROI as 1s
    L_mask = np.isin(glasser_L[0], vals) # vals is a list of ROIs to set as 1
    R_mask = np.isin(glasser_R[0], vals)
    
    # concatenate left and right hemispheres 
    L_R_concat_mask = np.concatenate([L_mask, R_mask], axis=0)
    ROI_masks[key] = L_R_concat_mask

#### Dataloader and transformations

In [22]:
class CustomImageDataset(Dataset):
    def __init__(self, file_paths, transform=None):
        self.file_paths = file_paths
        self.transform = transform

    def __len__(self):
        return len(self.file_paths)

    def __getitem__(self, idx):
        img_path = str(self.file_paths[idx])
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, img_path
    
transform = transforms.Compose([
    transforms.Resize((224, 224)),  
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), 
])

#### Save ROI and layer representations to files if they haven't been saved already

In [23]:
# Initialize the feature extractor
model = models.resnet50(weights=ResNet50_Weights.IMAGENET1K_V1)
model.eval() 

for run_id in range(num_runs):

    bold_run, stimulus_ids = load_data(
        **load_data_params,
        subject = f'sub-{subject_str}',
        run_ids = [run_id]
    )
    
    file_paths = [images_dir / f"{stimulus_id_map[img_num]}.png" for img_num in stimulus_ids]
    dataset = CustomImageDataset(file_paths=file_paths, transform=transform)
    data_loader = DataLoader(dataset, batch_size=1, shuffle=False)
    
    
    #########################################################
    ############## Save ROI Representations #################
    #########################################################

    for ROI, ROI_mask in ROI_masks.items():
        roi_path = Path("E:/Decoding/fmri-preprocessing/img_bold_arrays") / f"sub_{subject_str}" / f"run_{run_id + 1}" / "ROIs" / ROI

        if not roi_path.exists():
            roi_path.mkdir(parents=True, exist_ok=True)
            bold_file_name = roi_path / "roi_bold_for_imgs.npy"
            all_img_roi_data = []

            for i, img in enumerate(stimulus_ids):
                bold_copy = bold_run[i].copy()
                bold_copy = bold_copy[ROI_mask == 1]
                all_img_roi_data.append(bold_copy)

            all_img_roi_data = np.stack(all_img_roi_data)
            np.save(bold_file_name, all_img_roi_data)


    #########################################################
    ############## Save Layer Representations ###############
    #########################################################
    
    # Define a hook to extract layer representations
    def hook_fn(module, input, output):
        global features
        features = output

    for layer_name in layers:
        layer_path = Path("E:/Decoding/fmri-preprocessing/img_bold_arrays") / f"sub_{subject_str}" / f"run_{run_id + 1}" / "layers" / layer_name
        features_file_name = layer_path / "features_for_imgs.npy"
        all_nn_layer_data = []

        if not layer_path.exists():
            layer_path.mkdir(parents=True, exist_ok=True)

            layer = getattr(model, layer_name)[0].relu
            handle = layer.register_forward_hook(hook_fn)

            for images, img_paths in data_loader:
                with torch.no_grad():  
                    _ = model(images)  # Forward pass to trigger the hook

                features = features.detach().numpy().flatten()
                all_nn_layer_data.append(features)

            all_nn_layer_data = np.stack(all_nn_layer_data)
            np.save(features_file_name, all_nn_layer_data)

            # Remove the hook
            handle.remove()

#### Train linear model using combinations of ROI representation labels and layer representation features

In [None]:
layer_ROI_correlations = {}
ROI_names = ROIs.keys()

for ROI in ROI_names:
    print("ROI: ", ROI)
    layer_ROI_correlations[ROI] = {}
    cors = {lay:[] for lay in layers}
    for test_run_id in range(1, num_runs+1):
        scaler = MinMaxScaler()
        training_run_ids = list(range(1, num_runs+1))
        training_run_ids.remove(test_run_id)

        ##### Prepare testing data  #####
        roi_testing_data = np.load(f'E:/Decoding/fmri-preprocessing/img_bold_arrays/sub_{subject_str}/run_{test_run_id}/ROIs/{ROI}/roi_bold_for_imgs.npy')
        roi_testing_data = scaler.fit_transform(roi_testing_data) 
        layer_testing_data = [np.load(f'E:/Decoding/fmri-preprocessing/img_bold_arrays/sub_{subject_str}/run_{test_run_id}/layers/{layers[i]}/features_for_imgs.npy') for i in range(len(layers))]
        for i, layer in enumerate(layer_testing_data.copy()):
            layer_testing_data[i] = scaler.fit_transform(layer_testing_data[i])


        ##### Prepare training data #####
        roi_training_data = []
        layer_training_data = []

        for run_id in training_run_ids:
            roi_data = np.load(f'E:/Decoding/fmri-preprocessing/img_bold_arrays/sub_{subject_str}/run_{run_id}/ROIs/{ROI}/roi_bold_for_imgs.npy')
            layer_data = [np.load(f'E:/Decoding/fmri-preprocessing/img_bold_arrays/sub_{subject_str}/run_{run_id}/layers/{layers[i]}/features_for_imgs.npy') for i in range(len(layers))]

            roi_training_data.append(roi_data)
            layer_training_data.append(layer_data)
        
        # TODO: make this concatenation simpler
        layer_training_data = [np.concatenate([layer_training_data[run_id][i] for run_id in range(len(layer_training_data))], axis=0) for i in range(len(layers))]

        roi_training_data = np.concatenate(roi_training_data, axis=0)
        roi_training_data = scaler.fit_transform(roi_training_data)

        for i, layer in enumerate(layer_training_data.copy()):
            layer_training_data[i] = scaler.fit_transform(layer_training_data[i])
            

        ##### Training Loop #####
        for layer_idx, layer in enumerate(layers):
            # Dimensionality reduction
            pca_layer = PCA(n_components=.99)

            print("Num features before PCA: ", layer_training_data[layer_idx].shape[1])
            layer_training_data_reduced = pca_layer.fit_transform(layer_training_data[layer_idx])
            print("Num features after PCA: ", layer_training_data_reduced.shape[1])

            layer_testing_data_reduced = pca_layer.transform(layer_testing_data[layer_idx])

            model_layer = LinearRegression()
            model_layer.fit(layer_training_data_reduced, roi_training_data)

            predictions_layer = model_layer.predict(layer_testing_data_reduced)

            # Evaluate the models
            mse_layer = float(mean_squared_error(roi_testing_data, predictions_layer))

            r2_layer = r2_score(roi_testing_data, predictions_layer)

            correlations = []

            for i in range(predictions_layer.shape[1]):
                corr = np.corrcoef(predictions_layer[:, i], roi_testing_data[:, i])[0, 1]
                correlations.append(corr)

            correlations = np.array(correlations)

            cor_avg = correlations.mean()
            cors[layer].append(correlations)

            print(f"\nMean Squared Error for model using layer_training_data: {round(mse_layer, 4)}")
            print(f"R² Score for model using layer_training_data: {round(r2_layer, 4)}")
            print(f"Avg Correlation Between {layers[layer_idx]} and ROI data: {round(cor_avg, 4)}\n")
            print("========================================\n")

    for layer in cors.keys():
        cors_np = np.array(cors[layer])
        avg_cors_for_layer = cors_np.mean()
        layer_ROI_correlations[ROI][layer] = round(avg_cors_for_layer, 5)

In [25]:
print(layer_ROI_correlations)

{'V1_V2': {'layer1': 0.087, 'layer4': 0.08422}, 'PIT_FFC_VVC': {'layer1': -0.00207, 'layer4': 0.01682}}


#### Save the output

In [26]:
with open('layer_ROI_correlations.json', 'w') as json_file:
    json.dump(layer_ROI_correlations, json_file, indent=4)