In [1]:
import os
import numpy as np
from pathlib import Path
from PIL import Image
from tqdm import tqdm
import matplotlib
from matplotlib import pyplot as plt
from nilearn import datasets
from nilearn import plotting
import torch
from torch.utils.data import DataLoader, Dataset
from torchvision.models.feature_extraction import create_feature_extractor, get_graph_node_names
from torchvision import transforms
from sklearn.decomposition import IncrementalPCA
from sklearn.linear_model import LinearRegression
from scipy.stats import pearsonr as corr

In [2]:
# data_dir = '../algonauts_2023_challenge_data'
# parent_submission_dir = 'algonauts_2023_challenge_submission'
data_dir = "/fsx/proj-medarc/fmri/natural-scenes-dataset/algonauts_data/dataset"
parent_submission_dir = "/fsx/proj-medarc/fmri/dweisberg/fMRI-Algonauts-Challenge-2023/algonauts_2023_challenge_submission"

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("device:",device)

# def seed_everything(seed=42):
#     random.seed(seed)
#     os.environ['PYTHONHASHSEED'] = str(seed)
#     np.random.seed(seed)
#     torch.manual_seed(seed)
#     torch.cuda.manual_seed(seed)
#     torch.cuda.manual_seed_all(seed)
#     torch.backends.cudnn.deterministic = True
# seed_everything()

subj=1

device: cuda


In [3]:
class argObj:
  def __init__(self, data_dir, parent_submission_dir, subj):
    
    self.subj = format(subj, '02')
    self.data_dir = os.path.join(data_dir, 'subj'+self.subj)
    self.parent_submission_dir = parent_submission_dir
    self.subject_submission_dir = os.path.join(self.parent_submission_dir,
        'subj'+self.subj)

    # Create the submission directory if not existing
    if not os.path.isdir(self.subject_submission_dir):
        os.makedirs(self.subject_submission_dir)

args = argObj(data_dir, parent_submission_dir, subj)

In [4]:
fmri_dir = os.path.join(args.data_dir, 'training_split', 'training_fmri')
lh_fmri = np.load(os.path.join(fmri_dir, 'lh_training_fmri.npy'))
rh_fmri = np.load(os.path.join(fmri_dir, 'rh_training_fmri.npy'))

print('LH training fMRI data shape:')
print(lh_fmri.shape)
print('(Training stimulus images × LH vertices)')

print('\nRH training fMRI data shape:')
print(rh_fmri.shape)
print('(Training stimulus images × RH vertices)')

LH training fMRI data shape:
(9841, 19004)
(Training stimulus images × LH vertices)

RH training fMRI data shape:
(9841, 20544)
(Training stimulus images × RH vertices)


In [5]:
train_img_dir  = os.path.join(args.data_dir, 'training_split', 'training_images')
test_img_dir  = os.path.join(args.data_dir, 'test_split', 'test_images')

# Create lists with all training and test image file names, sorted
train_img_list = os.listdir(train_img_dir)
train_img_list.sort()
test_img_list = os.listdir(test_img_dir)
test_img_list.sort()
print('Training images: ' + str(len(train_img_list)))
print('Test images: ' + str(len(test_img_list)))

Training images: 9841
Test images: 159


In [6]:
rand_seed = 5 
np.random.seed(rand_seed)
batch_size = 300

# Calculate how many stimulus images correspond to 90% of the training data
num_train = int(np.round(len(train_img_list) / 100 * 90))
num_train = num_train - num_train % batch_size
# Shuffle all training stimulus images
idxs = np.arange(len(train_img_list))
# np.random.shuffle(idxs)
# Assign 90% of the shuffled stimulus images to the training partition,
# and 10% to the test partition
idxs_train, idxs_val = idxs[:num_train], idxs[num_train:]
# No need to shuffle or split the test stimulus images
idxs_test = np.arange(len(test_img_list))

print('Training stimulus images: ' + format(len(idxs_train)))
print('\nValidation stimulus images: ' + format(len(idxs_val)))
print('\nTest  stimulus images: ' + format(len(idxs_test)))

Training stimulus images: 8700

Validation stimulus images: 1141

Test  stimulus images: 159


In [7]:
transform = transforms.Compose([
    transforms.Resize((224,224)), # resize the images to 224x24 pixels
    transforms.ToTensor(), # convert the images to a PyTorch tensor
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # normalize the images color channels
])

In [8]:
class ImageDataset(Dataset):
    def __init__(self, imgs_paths, idxs, transform):
        self.imgs_paths = np.array(imgs_paths)[idxs]
        self.transform = transform

    def __len__(self):
        return len(self.imgs_paths)

    def __getitem__(self, idx):
        # Load the image
        img_path = self.imgs_paths[idx]
        img = Image.open(img_path).convert('RGB')
        # Preprocess the image and send it to the chosen device ('cpu' or 'cuda')
        if self.transform:
            img = self.transform(img).to(device)
        return img

In [9]:
# Get the paths of all image files
train_imgs_paths = sorted(list(Path(train_img_dir).iterdir()))
subm_imgs_paths = sorted(list(Path(test_img_dir).iterdir()))

# The DataLoaders contain the ImageDataset class
train_imgs_dataloader = DataLoader(
    ImageDataset(train_imgs_paths, idxs_train, transform), 
    batch_size=batch_size, drop_last=True
)
val_imgs_dataloader = DataLoader(
    ImageDataset(train_imgs_paths, idxs_val, transform), 
    batch_size=batch_size
)

test_imgs_dataloader = DataLoader(
    ImageDataset(subm_imgs_paths, idxs_test, transform), 
    batch_size=batch_size
)

In [10]:
lh_fmri_train = lh_fmri[idxs_train]
lh_fmri_val = lh_fmri[idxs_val]
lh_fmri_test = lh_fmri[idxs_test]
rh_fmri_train = rh_fmri[idxs_train]
rh_fmri_val = rh_fmri[idxs_val]
rh_fmri_test = rh_fmri[idxs_test]

In [11]:
del lh_fmri, rh_fmri

In [12]:
model = torch.hub.load('pytorch/vision:v0.10.0', 'alexnet')
model.to(device) # send the model to the chosen device ('cpu' or 'cuda')
model.eval() # set the model to evaluation mode, since you are not training it
train_nodes, _ = get_graph_node_names(model)
print(train_nodes)
del model

Using cache found in /admin/home-dweisberg/.cache/torch/hub/pytorch_vision_v0.10.0


['x', 'features.0', 'features.1', 'features.2', 'features.3', 'features.4', 'features.5', 'features.6', 'features.7', 'features.8', 'features.9', 'features.10', 'features.11', 'features.12', 'avgpool', 'flatten', 'classifier.0', 'classifier.1', 'classifier.2', 'classifier.3', 'classifier.4', 'classifier.5', 'classifier.6']


In [13]:
def fit_pca(feature_extractor, dataloader):

    # Define PCA parameters
    pca = IncrementalPCA(n_components=100, batch_size=batch_size)

    # Fit PCA to batch
    for _, d in tqdm(enumerate(dataloader), total=len(dataloader)):
        # Extract features
        ft = feature_extractor(d)
        # Flatten the features
        ft = torch.hstack([torch.flatten(l, start_dim=1) for l in ft.values()])
        # Fit PCA to batch
        pca.partial_fit(ft.detach().cpu().numpy())
    return pca

In [14]:
def extract_features(feature_extractor, dataloader, pca):

    features = []
    for _, d in tqdm(enumerate(dataloader), total=len(dataloader)):
        # Extract features
        ft = feature_extractor(d)
        # Flatten the features
        ft = torch.hstack([torch.flatten(l, start_dim=1) for l in ft.values()])
        # Apply PCA transform
        ft = pca.transform(ft.cpu().detach().numpy())
        features.append(ft)
    return np.vstack(features)

In [15]:
roi_list = ["V1v", "V1d", "V2v", "V2d", "V3v", "V3d", "hV4", "EBA", "FBA-1", "FBA-2", "mTL-bodies", "OFA", "FFA-1", "FFA-2", "mTL-faces", "aTL-faces", "OPA", "PPA", "RSC", "OWFA", "VWFA-1", "VWFA-2", "mfs-words", "mTL-words", "early", "midventral", "midlateral", "midparietal", "ventral", "lateral", "parietal", "nsdgeneral"]

# Load the ROI classes mapping dictionaries
roi_mapping_files = ['mapping_prf-visualrois.npy', 'mapping_floc-bodies.npy',
    'mapping_floc-faces.npy', 'mapping_floc-places.npy',
    'mapping_floc-words.npy', 'mapping_streams.npy']
roi_name_maps = []
for r in roi_mapping_files:
    roi_name_maps.append(np.load(os.path.join(args.data_dir, 'roi_masks', r),
        allow_pickle=True).item())

# Load the ROI brain surface maps
lh_challenge_roi_files = ['lh.prf-visualrois_challenge_space.npy',
    'lh.floc-bodies_challenge_space.npy', 'lh.floc-faces_challenge_space.npy',
    'lh.floc-places_challenge_space.npy', 'lh.floc-words_challenge_space.npy',
    'lh.streams_challenge_space.npy']
rh_challenge_roi_files = ['rh.prf-visualrois_challenge_space.npy',
    'rh.floc-bodies_challenge_space.npy', 'rh.floc-faces_challenge_space.npy',
    'rh.floc-places_challenge_space.npy', 'rh.floc-words_challenge_space.npy',
    'rh.streams_challenge_space.npy']

lh_challenge_rois = []
rh_challenge_rois = []

for r in range(len(lh_challenge_roi_files)):
    lh_challenge_rois.append(np.load(os.path.join(args.data_dir, 'roi_masks',
        lh_challenge_roi_files[r])))
    rh_challenge_rois.append(np.load(os.path.join(args.data_dir, 'roi_masks',
        rh_challenge_roi_files[r])))

In [16]:
# correlations for each roi for each layer, for both hemispheres
layer_correlations_lh = np.zeros((len(train_nodes), len(roi_list)))
layer_correlations_rh = np.zeros((len(train_nodes), len(roi_list)))

roi_model = np.zeros(2, len(roi_list)) # best layer for each roi in each hemisphere
fmri_lh_val_pred_best = 
fmri_rh_val_pred_best
fmri_lh_test_pred
fmri_rh_test_pred


for layer_num, model_layer in enumerate(train_nodes):
    print(model_layer)
    model = torch.hub.load('pytorch/vision:v0.10.0', 'alexnet')
    model.to(device) # send the model to the chosen device ('cpu' or 'cuda')
    model.eval() # set the model to evaluation mode, since you are not training it
    
    feature_extractor = create_feature_extractor(model, return_nodes=[model_layer])

    pca = fit_pca(feature_extractor, train_imgs_dataloader)

    features_train = extract_features(feature_extractor, train_imgs_dataloader, pca)
    features_val = extract_features(feature_extractor, val_imgs_dataloader, pca)
    features_test = extract_features(feature_extractor, test_imgs_dataloader, pca)

    del model, pca

    # Fit linear regressions on the training data
    reg_lh = LinearRegression().fit(features_train, lh_fmri_train)
    reg_rh = LinearRegression().fit(features_train, rh_fmri_train)
    # Use fitted linear regressions to predict the validation and test fMRI data
    lh_fmri_val_pred = reg_lh.predict(features_val)
    rh_fmri_val_pred = reg_rh.predict(features_val)

    # Empty correlation array of shape: (LH vertices)
    lh_correlation = np.zeros(lh_fmri_val_pred.shape[1])
    # Correlate each predicted LH vertex with the corresponding ground truth vertex
    for v in tqdm(range(lh_fmri_val_pred.shape[1])):
        lh_correlation[v] = corr(lh_fmri_val_pred[:,v], lh_fmri_val[:,v])[0]

    # Empty correlation array of shape: (RH vertices)
    rh_correlation = np.zeros(rh_fmri_val_pred.shape[1])
    # Correlate each predicted RH vertex with the corresponding ground truth vertex
    for v in tqdm(range(rh_fmri_val_pred.shape[1])):
        rh_correlation[v] = corr(rh_fmri_val_pred[:,v], rh_fmri_val[:,v])[0]

    # Select the correlation results vertices of each ROI
    roi_names = []
    lh_roi_correlation = []
    rh_roi_correlation = []
    for r1 in range(len(lh_challenge_rois)):
        for r2 in roi_name_maps[r1].items():
            if r2[0] != 0: # zeros indicate to vertices falling outside the ROI of interest
                roi_names.append(r2[1])
                lh_roi_idx = np.where(lh_challenge_rois[r1] == r2[0])[0]
                rh_roi_idx = np.where(rh_challenge_rois[r1] == r2[0])[0]
                lh_roi_correlation.append(lh_correlation[lh_roi_idx])
                rh_roi_correlation.append(rh_correlation[rh_roi_idx])
    roi_names.append('nsdgeneral')
    lh_roi_correlation.append(lh_correlation)
    rh_roi_correlation.append(rh_correlation)
    
    # correlations for layer for all roi's
    layer_correlations_lh[layer_num, :] = [np.median(lh_roi_correlation[r]) for r in range(len(lh_roi_correlation))]
    layer_correlations_rh[layer_num, :] = [np.median(rh_roi_correlation[r]) for r in range(len(rh_roi_correlation))]
    
#     layer_correlations_lh[np.isnan(layer_correlations_lh)]=-1
#     layer_correlations_rh[np.isnan(layer_correlations_rh)]=-1
    
#     # for each new layer, for all roi's, test if new correlations beat the old best one
#     for 
#     roi_model[0] = np.argmax(layer_correlations_lh, axis=0)
#     roi_model[1] = np.argmax(layer_correlations_rh, axis=0)

SyntaxError: invalid syntax (1344472444.py, line 6)

In [17]:
plt.figure(figsize=(18,6))
x = np.arange(len(train_nodes))
first = 1 # min is 1
last = 32 # max 32

width = 0.30
# plt.plot(x - width/2, feature_correlations_lh, width, label=roi_list)
lines = plt.plot(x, layer_correlations_lh[:,first-1:last], width,label=roi_list[first-1:last])
print(len(lines))
plt.xlim(left=min(x), right=max(x)+3.1)
# plt.ylim(bottom=0, top=1)
plt.xlabel('Layers')
plt.xticks(ticks=x, labels=train_nodes, rotation=60)
plt.ylabel('Median Pearson\'s $r$')
plt.legend(handles = lines[first-1:last],frameon=True, loc='lower right', ncol = 2)
plt.title("Correlations per layer by ROI");

NameError: name 'layer_correlations_lh' is not defined

<Figure size 1296x432 with 0 Axes>

In [None]:
# lh_fmri_test_pred = lh_fmri_test_pred.astype(np.float32)
# rh_fmri_test_pred = rh_fmri_test_pred.astype(np.float32)

In [None]:
np.save(os.path.join(args.subject_submission_dir, 'layer_correlations_lh.npy'), layer_correlations_lh)
np.save(os.path.join(args.subject_submission_dir, 'layer_correlations_rh.npy'), layer_correlations_rh)

In [None]:
layer_correlations_lh_load = np.load(os.path.join(args.subject_submission_dir, 'layer_correlations_lh.npy'))
layer_correlations_rh_load = np.load(os.path.join(args.subject_submission_dir, 'layer_correlations_rh.npy'))

In [None]:
# test whether correlations saved
print(np.sum(layer_correlations_lh-layer_correlations_lh_load))
print(np.sum(layer_correlations_rh-layer_correlations_rh_load))
# print(layer_correlations_lh)
print(layer_correlations_rh)

In [None]:
roi_model = np.zeros(2, len(roi_list)) # left and right hemisphere
layer_correlations_lh[np.isnan(layer_correlations_lh)]=-1
layer_correlations_rh[np.isnan(layer_correlations_rh)]=-1

# best layer for each roi in each hemisphere
roi_model[0] = np.argmax(layer_correlations_lh, axis=0)
roi_model[1] = np.argmax(layer_correlations_rh, axis=0)


    