# Searchlight Analysis
Searchlight Analysis is used to sequentially analyze small groups of voxels in the brain in order to identify regions of interest. The technique is designed in the following steps:

1. Define a sphere of voxels around a seed voxel
2. Extract the time series from each voxel in the sphere
3. Concatenate the time series into a feature vector
4. Train a classifier on the feature vector and the labels
5. Use the classifier to predict the labels of the seed voxel
6. Repeat steps 1-5 for all seed voxels
7. Aggregate the results across all seed voxels to form a statistical map


In [8]:
# Load dependencies
from sklearn.svm import LinearSVC
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_classification
from sklearn.pipeline import Pipeline
import numpy as np
import nibabel as nib
from nilearn import image as nli

### Load and initialize data

In [14]:
# Set up data
clean_data_audio = nib.load('../results/audio/cleaned_data_audio.nii.gz')
clean_data_visual = nib.load('../results/visual/cleaned_data_visual.nii.gz')

# Resample visual data to match audio data
visual_resampled = nli.resample_img(
    clean_data_visual,
    target_affine = clean_data_audio.affine,
    target_shape = clean_data_audio.shape[:3],
    interpolation = 'linear' 
)
print("visual sample data before truncation: ", visual_resampled.shape)
print("audio sample data before truncation: ", clean_data_audio.shape)

# Convert 4D fMRI to a 2D array (samples per timepoint x features)
min_timepoints = min(clean_data_audio.shape[-1], visual_resampled.shape[-1]) # get min z axis
print("min timepoints: ", min_timepoints)

audio_data = clean_data_audio.get_fdata()[...,:min_timepoints] # set length to min z axis
visual_data = visual_resampled.get_fdata()[...,:min_timepoints]

print("audio sample data after truncation: ", audio_data.shape)
print("visual sample data after truncation: ", visual_data.shape)

visual sample data before truncation:  (120, 120, 28, 156)
audio sample data before truncation:  (120, 120, 28, 200)
min timepoints:  156
audio sample data after truncation:  (120, 120, 28, 156)
visual sample data after truncation:  (120, 120, 28, 156)


In [11]:
# Reshape data: (x, y, z, time) -> (time, x*y*z)
X_audio = audio_data.reshape(audio_data.shape[-1], -1)
X_visual = visual_data.reshape(visual_data.shape[-1], -1)

# Create labels (0 for audio, 1 for visual)
y_audio = np.zeros(X_audio.shape[0])
y_visual = np.ones(X_visual.shape[0])

# combine datasets to create a single feature matrix and labels
X = np.vstack((X_audio, X_visual))
y = np.vstack((y_audio, y_visual))

# test print
print(X.shape)
print(y.shape)


ValueError: all the input array dimensions except for the concatenation axis must match exactly, but along dimension 1, the array at index 0 has size 200 and the array at index 1 has size 156

In [1]:
# Set up Linear SVM classifier and train the new feature matrix


# Set up model pipeline
pipeline = make_pipeline(StandardScaler(), LinearSVC( random_state = 42, tol = 1e-4, C = 1.0, max_iter = 2000))
pipeline = Pipeline(steps = [('standardscaler', StandardScaler()),
                ('linearsvc', LinearSVC(random_state=0, tol=1e-05))])

# set up features
n_voxels = 1000
X, y = make_classification(n_features=n_voxels, random_state=42)

# Train model
pipeline.fit(X, y)

# Test print the model score
print(pipeline.score(X, y))

1.0
