In [4]:
from torchvision.models.feature_extraction import create_feature_extractor
from torchvision.models import resnet50, ResNet50_Weights

from torchvision.ops import FeaturePyramidNetwork

import torch
import torch.nn as nn
from torch.utils.data import TensorDataset

from tqdm import tqdm

import numpy as np

import os

import matplotlib.pyplot as plt

from utils.dataset import Dataset

from scipy.stats import pearsonr as corr

from sklearn.decomposition import IncrementalPCA
from sklearn.linear_model import SGDRegressor


In [5]:
batch_size = 100

In [6]:
data = Dataset('../../data/subj08')
#test_data = Dataset('../../data/subj08', test=True)

train_set, val_set = torch.utils.data.random_split(data, [0.8, 0.2])

train_loader = torch.utils.data.DataLoader(data, batch_size=batch_size, shuffle=True)
#test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=False)

val_loader = torch.utils.data.DataLoader(val_set, batch_size=batch_size, shuffle=True)

Loading dataset sample names...
Training images: 8779
Test images: 395

LH training fMRI data shape:
(8779, 18981)
(Training stimulus images × LH vertices)

RH training fMRI data shape:
(8779, 20530)
(Training stimulus images × RH vertices)


In [7]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

alexnet = torch.hub.load('pytorch/vision:v0.10.0', 'alexnet')
alexnet.to(device) # send the alexnet to the chosen device ('cpu' or 'cuda')
alexnet.eval() # set the alexnet to evaluation mode, since you are not training it

model_layer = "features.2" #@param ["features.2", "features.5", "features.7", "features.9", "features.12", "classifier.2", "classifier.5", "classifier.6"] {allow-input: true}
feature_extractor = create_feature_extractor(alexnet, return_nodes=[model_layer])
feature_extractor.to(device)
feature_extractor.eval()

Using cache found in /home/ubuntu/.cache/torch/hub/pytorch_vision_v0.10.0


AlexNet(
  (features): Module(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
)

In [8]:
def fit_pca(feature_extractor, dataloader):
    # Define PCA parameters
    pca = IncrementalPCA(n_components=100, batch_size=batch_size)

    # Fit PCA to batch
    for _, d in tqdm(enumerate(dataloader), total=len(dataloader)):
        if _ == len(dataloader)-1:
            break
        # Extract features
        ft = feature_extractor(d[0].to(device))
        # Flatten the features
        ft = torch.hstack([torch.flatten(l, start_dim=1) for l in ft.values()])
        # Fit PCA to batch
        pca.partial_fit(ft.detach().cpu().numpy())

    return pca

In [9]:
pca = fit_pca(feature_extractor, train_loader)

 65%|██████▍   | 57/88 [02:30<01:21,  2.63s/it]

0


 99%|█████████▉| 87/88 [03:50<00:02,  2.65s/it]


In [10]:
def extract_features(feature_extractor, dataloader, pca, right=False, test=False):
    fmri = []
    features = []
    for _, d in tqdm(enumerate(dataloader), total=len(dataloader)):
        # Extract features
        if test == False:
            ft = feature_extractor(d[0].to(device))
            if right == False:
                fmri += [d[1].cpu().detach().numpy()]
            else:
                fmri += [d[2].cpu().detach().numpy()]
        else:
            ft = feature_extractor(d.to(device))
        # Flatten the features
        ft = torch.hstack([torch.flatten(l, start_dim=1) for l in ft.values()])
        # Apply PCA transform
        ft = pca.transform(ft.cpu().detach().numpy())
        features += [ft]
    if test == False:
        return (np.vstack(features), np.vstack(fmri))
    return np.vstack(features)

In [11]:
right = False

features_train, labels_train = extract_features(feature_extractor, train_loader, pca, right=right)
features_val, labels_val = extract_features(feature_extractor, val_loader, pca, right=right,)

 34%|███▍      | 30/88 [00:30<00:58,  1.01s/it]

0


100%|██████████| 88/88 [01:30<00:00,  1.03s/it]
 61%|██████    | 11/18 [00:13<00:08,  1.21s/it]

0


100%|██████████| 18/18 [00:21<00:00,  1.17s/it]


In [12]:
reg = SGDRegressor().fit(features_train, labels_train)
fmri_test_pred = reg.predict(features_val)

ValueError: y should be a 1d array, got an array of shape (8779, 18981) instead.

In [10]:
print(fmri_test_pred.shape)

(1755, 18981)


In [11]:
from scipy.stats import pearsonr as corr

# Empty correlation array of shape: (LH vertices)
lh_correlation = np.zeros(fmri_test_pred.shape[1])
# Correlate each predicted LH vertex with the corresponding ground truth vertex
for v in tqdm(range(fmri_test_pred.shape[1])):
    lh_correlation[v] = corr(fmri_test_pred[:,v], labels_val[:,v])[0]

score = np.median(lh_correlation) * 100
print(score)

100%|██████████| 18981/18981 [00:03<00:00, 5427.94it/s]

17.809155036188315



