# Searchlight Analysis
Searchlight Analysis is used to sequentially analyze small groups of voxels in the brain in order to identify regions of interest. The technique is designed in the following steps:

1. Define a sphere of voxels around a seed voxel
2. Extract the time series from each voxel in the sphere
3. Concatenate the time series into a feature vector
4. Train a classifier on the feature vector and the labels
5. Use the classifier to predict the labels of the seed voxel
6. Repeat steps 1-5 for all seed voxels
7. Aggregate the results across all seed voxels to form a statistical map


In [7]:
# Load dependencies
from sklearn.svm import LinearSVC
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_classification
from sklearn.pipeline import Pipeline
import numpy as np
import nibabel as nib
from nilearn import image as nli

### Load and initialize data

In [9]:
def prep_data_training(clean_data_audio, clean_data_visual):
    """
    Load and resample the audio and visual data
    """

    # Resample visual data to match audio data
    visual_resampled = nli.resample_img(
        clean_data_visual,
        target_affine = clean_data_audio.affine,
        target_shape = clean_data_audio.shape[:3],
        interpolation = 'linear' 
    )
    print("visual sample data before truncation: ", visual_resampled.shape)
    print("audio sample data before truncation: ", clean_data_audio.shape)

    # Convert 4D fMRI to a 2D array (samples per timepoint x features)
    min_timepoints = min(clean_data_audio.shape[-1], visual_resampled.shape[-1]) # get min z axis
    print("min timepoints: ", min_timepoints)

    audio_data = clean_data_audio.get_fdata()[...,:min_timepoints] # set length to min z axis
    visual_data = visual_resampled.get_fdata()[...,:min_timepoints]

    print("audio sample data after truncation: ", audio_data.shape)
    print("visual sample data after truncation: ", visual_data.shape)

    return audio_data, visual_data


In [10]:
# Reshape data: (x, y, z, time) -> (time, x*y*z)
def reshape_data(audio_data, visual_data):
    """
    Reshape the dataset to the feature matrix and labels to prep for training
    """
    X_audio = audio_data.reshape(audio_data.shape[-1], -1)
    X_visual = visual_data.reshape(visual_data.shape[-1], -1)

    # Create labels (0 for audio, 1 for visual)
    y_audio = np.zeros(X_audio.shape[0])
    y_visual = np.ones(X_visual.shape[0])

    # combine datasets to create a single feature matrix and labels
    X = np.vstack((X_audio, X_visual))
    y = np.concatenate((y_audio, y_visual))

    # test print
    print("Feature matrix shape: ", X.shape)
    print("Labels shape: ", y.shape)

    return X, y


### Set up Linear SVM classifier and train the new feature matrix

In [11]:
# Set up model pipeline
def train_model(X, y):
    """
    X represents a feature matrix and y represents labels
    """
    pipeline = Pipeline(steps = [
        ('standardscaler', StandardScaler()),
        ('linearsvc', LinearSVC(random_state=0, tol=1e-05))])

    # Train model with feature matrix and labels
    pipeline.fit(X, y)

    # Test print the model score
    print("Model score: ", pipeline.score(X, y))

    return pipeline

def evaluate_model(pipeline, X, y):
    """
    Data represents a feature matrix and labels of the test set
    """

    print(pipeline.score(X, y))
    if pipeline.score(X, y) > 0.5:
        print("Model is performing better than chance")
    else:
        print("Model is performing worse than chance")

    return pipeline


### Set up searchlight analysis


In [12]:

def searchlight_analysis():
    # Set up data
    clean_data_audio = nib.load('../results/audio/cleaned_data_audio.nii.gz')
    clean_data_visual = nib.load('../results/visual/cleaned_data_visual.nii.gz')

    # Preprocess data
    audio_data, visual_data = prep_data_training(clean_data_audio, clean_data_visual)
    X, y = reshape_data(audio_data, visual_data)

    # Train model
    pipeline = train_model(X, y)
    evaluate_model(pipeline, X, y)

    return pipeline

pipeline = searchlight_analysis()
print(pipeline)

visual sample data before truncation:  (120, 120, 28, 156)
audio sample data before truncation:  (120, 120, 28, 200)
min timepoints:  156
audio sample data after truncation:  (120, 120, 28, 156)
visual sample data after truncation:  (120, 120, 28, 156)
Feature matrix shape:  (312, 403200)
Labels shape:  (312,)
Model score:  1.0
1.0
Model is performing better than chance
Pipeline(steps=[('standardscaler', StandardScaler()),
                ('linearsvc', LinearSVC(random_state=0, tol=1e-05))])
