# Hands-on Tutorial for Encoding Analysis (Predicting fMRI responses from DNN features)

This notebook provides a hands-on tutorial for encoding analysis, which predicts fMRI responses from DNN features.

## Environment setup

Here we're installing required packages for the tutorial.

!curl -O https://raw.githubusercontent.com/KamitaniLab/feature-encoding/main/requirements.txt
!pip install -r requirements.txt

In [None]:
import os
from itertools import product

import bdpy
from bdpy.dataform import Features, save_array
from bdpy.ml import ModelTraining
from fastl2lir import FastL2LiR
import numpy as np

## Data preparation

Here we're downloading the data for the tutorial.

In [None]:
!mkdir data
!curl -O https://raw.githubusercontent.com/KamitaniLab/feature-encoding/main/data/download.py
!curl -O https://raw.githubusercontent.com/KamitaniLab/feature-encoding/main/data/files.json
!python download.py fmri_deeprecon_fmriprep_hcpvc
!python download.pyfeatures_imagenet_training_vgg19_random5000
!python features_imagenet_test_vgg19_random5000
!mv fmri data/
!mv features data/
!ls -la data/

In [None]:
!rm -rf data
!ln -s ../data
!ls -la data/features/ImageNetTraining/caffe/

In [None]:
# Data setting

subject = "sub-03"
rois = ['V1', 'V2', 'V3', 'V4', 'VentralVC']

network = "caffe/VGG19"
layers = ['fc6', 'fc7', 'fc8']

training_fmri_path    = f"./data/fmri/Deeprecon/{subject}_ImageNetTraining_fmriprep_volume_native_hcpvc.h5"
training_feature_path = f"./data/features/ImageNetTraining/{network}_random5000"

test_fmri_path    = f"./data/fmri/Deeprecon/{subject}_ImageNetTest_fmriprep_volume_native_hcpvc.h5"
test_feature_path = f"./data/features/ImageNetTest/{network}_random5000"

# Outputs
encoding_model_path = f"./data/feature_encoders/handson/{network}_random5000"
encoded_fmri_path   = f"./data/encoded_fmri/handson/{network}_random5000"

os.makedirs(encoding_model_path, exist_ok=True)
os.makedirs(encoded_fmri_path, exist_ok=True)

## Training of encoding models

In [None]:
# Parameters of the encoding model

# L2 regularization parameter
alpha = 100

# Number of features to select
num_features = 500

In [None]:
# Load training data (fMRI and features)
train_fmri = bdpy.BData(training_fmri_path)
train_features = Features(training_feature_path)

In [None]:
for layer, roi in product(layers, rois):
    print("----------------------------------------")
    print(f"Encoding: {layer} features -> {roi} fMRI")

    # Output directory
    model_dir = os.path.join(encoding_model_path, layer, subject, roi)

    # Extract fMRI data in the ROI
    brain = train_fmri.select(f"hcp180_{roi}")
    brain_labels = train_fmri.get_labels("stimulus_name")
    print("The shape of fMRI data array: ", brain.shape)

    # Extract features
    feat = train_features.get(layer)
    feat_labels = train_features.labels
    print("The shape of feature data array: ", feat.shape)

    # Normalize data
    brain_mean = np.mean(brain, axis=0)
    brain_norm = np.std(brain, axis=0)
    feat_mean = np.mean(feat, axis=0)
    feat_norm = np.std(feat, axis=0)

    # Align fMRI data and features
    feat_index = np.array([np.where(np.array(feat_labels) == bl) for bl in brain_labels]).flatten()

    feat = feat[feat_index]
    print("The shape of aligned feature data array: ", feat.shape)

    # Setup model
    model = FastL2LiR()
    model_param = {
        'alpha':  alpha,
        'n_feat': num_features,
        'dtype':  np.float32
    }

    # Setup model learner

    train = ModelTraining(model, feat, brain)
    train.model_parameters = model_param

    train.X_normalize = {'mean': feat_mean,  'std': feat_norm}
    train.Y_normalize = {'mean': brain_mean, 'std': brain_norm}
    train.X_sort = {'index': feat_index}

    train.dtype = np.float32
    train.save_format = 'bdmodel'
    train.save_path = model_dir

    train.run()


## Prediction of fMRI responses

In [None]:
# Load test data (fMRI and features)
test_fmri = bdpy.BData(test_fmri_path)
test_features = Features(test_feature_path)

## Evaluation

## Visualization