In [25]:
import os
import pandas as pd
import numpy as np
from glob import glob
import nibabel as nib
from nilearn.plotting import plot_img, plot_stat_map, view_img, plot_prob_atlas
from nilearn.regions import connected_label_regions
from nilearn.glm.first_level.hemodynamic_models import spm_hrf
from nilearn.image import concat_imgs, mean_img, index_img
from nilearn.glm.first_level import FirstLevelModel
from nilearn.glm import threshold_stats_img
import matplotlib.pyplot as plt
from nilearn.plotting import plot_design_matrix
from nilearn.interfaces.fmriprep import load_confounds_strategy
from nilearn.plotting import plot_roi
from nilearn.maskers import NiftiMapsMasker, NiftiSpheresMasker
from scipy.interpolate import interp1d
import seaborn as sns


### To-do list: 
- Smooth data 4mm kernel
- See if I need to trim the offset of each func file.
- Data appears to have 378 vol points and 756 time points. However, some trailers start appearing untile sec 777.
- Maybe could be a bit more specific when getting onsets idk it would make a difference though.

# Homemade functions

In [26]:
# Define a function to invert scores if needed. 
def invert_score(score):
    if(score == 1):  
        return 4
    elif(score == 2):  
        return 3
    elif(score == 3):  
        return 2
    elif(score == 4):  
        return 1
    else: 
        return "Something went wrong here!"
      

In [27]:
def process_events_data(run_dataframe):

    proccesed_events_df = pd.DataFrame(columns={"Trailer", "Type", "Onset", "Duration", "Offset", "W_score", "A_score", "F_score"}) 

    # Initial fixation 12 sec (TR=6).
    in_fix = 12

    # Time it take subjects to complete questionnaire 12 sec (TR=6). 
    questionnaire_duration = 12

    # All trailers last 30 sec (TR=15). 
    trailer_duration = 30

    # Initialize this variable, though it will change through each iteration of the loop.
    trailer_onset = 0

    # Run a for loop for each row in the df. 
    for id in range(run_dataframe.shape[0]):

        # Get trailer label and separate them accroding to their type. 
        trailer_name = run_dataframe["label"][id]
        trailer_type = "Horror" if "h" in run_dataframe["label"][id] else "Comedy"

        # This onsets don't work, so I need to re-calculate them
        traile_iti = run_dataframe["trial_ITI"][id]
        
        # For the first run add the initial fixation time to the calculation of the first trailer onset. 
        # After the first run, calculate onset by adding previous traile onset, questionnaire duration, and trial iti.
        if (id == 0):
            trailer_onset += in_fix
        else:
            trailer_onset += trailer_duration + questionnaire_duration + traile_iti

        # Calculate trailer onset. 
        trailer_offset = trailer_onset + 30 

        """ 
        For the questionnaire scores, as I understood it. If they were not inverted (["scale_flip"] == 0), then 
        the lower the score the stronger the response. If they were inverted (["scale_flip"] == 1), the higher the 
        score the stronger the response.
        """
        trailer_watch_score = run_dataframe["exp_Watch.keys"][id].astype(int)
        trailer_arousal_score = run_dataframe["exp_Arousal.keys"][id].astype(int)
        trailer_feel_score = run_dataframe["exp_Feel.keys"][id].astype(int)

        # Check if scaled was flipped and put scores on the same scale. 
        # For me, the most intuitive is that the higher the score, the stronger the response. 
        if(run_dataframe["scale_flip"][id] == 0):
            trailer_watch_score = run_dataframe["exp_Watch.keys"].apply(invert_score)[id].astype(int)
            trailer_arousal_score = run_dataframe["exp_Arousal.keys"].apply(invert_score)[id].astype(int)
            trailer_feel_score = run_dataframe["exp_Feel.keys"].apply(invert_score)[id].astype(int)
        
        # Place processed data on list, add list to new dataframe, and concat to main dataframe. 
        current_row_data = [[trailer_name, trailer_type, trailer_onset, trailer_duration, trailer_offset, trailer_watch_score, trailer_arousal_score, trailer_feel_score]]
        current_row = pd.DataFrame(data=current_row_data, columns=["Trailer", "Type", "Onset", "Duration", "Offset", "W_score", "A_score", "F_score"]) 
        proccesed_events_df = pd.concat([proccesed_events_df, current_row], ignore_index=True)
        proccesed_events_df = proccesed_events_df[["Trailer", "Type", "Onset", "Offset", "Duration", "W_score", "A_score", "F_score"]]

    return proccesed_events_df

In [28]:
# Open a datasets directory. 
fd = os.open("/Users/luisalvarez/Documents/Datasets", os.O_RDONLY)

# Use os.fchdir() method to change the current dir/folder.
os.fchdir(fd)

# Safe check- Print current working directory
print("Current working dir : %s" % os.getcwd())

Current working dir : /Users/luisalvarez/Documents/Datasets


## Load func files for Sub-03 run 1. 

In [29]:
# Load files for sub 001  
sub03_run1_func_path = "MovieData_BIDS_preproc/sub-03/func/sub-03_task-movie_run-01_space-MNI152NLin2009cAsym_desc-preproc_bold.nii.gz"
sub03_run2_func_path = "MovieData_BIDS_preproc/sub-03/func/sub-03_task-movie_run-02_space-MNI152NLin2009cAsym_desc-preproc_bold.nii.gz"

sub03_run1_mask_path = "MovieData_BIDS_preproc/sub-03/func/sub-03_task-movie_run-01_space-MNI152NLin2009cAsym_desc-brain_mask.nii.gz"
sub03_run2_mask_path = "MovieData_BIDS_preproc/sub-03/func/sub-03_task-movie_run-02_space-MNI152NLin2009cAsym_desc-brain_mask.nii.gz"

sub03_run1_events_path = 'MovieData_BIDS_raw/sub-03/func/sub-03_task-movie_run-01_events.csv'
sub03_run2_events_path = 'MovieData_BIDS_raw/sub-03/func/sub-03_task-movie_run-02_events.csv'

sub03_run1_confounds_path = "MovieData_BIDS_preproc/sub-03/func/sub-03_task-movie_run-01_desc-confounds_timeseries.tsv"
sub03_run2_confounds_path = "MovieData_BIDS_preproc/sub-03/func/sub-03_task-movie_run-01_desc-confounds_timeseries.tsv"


In [30]:
# Calculate mean image to use as background image.
fmri_img = concat_imgs(sub03_run1_func_path)
mean_img = mean_img(fmri_img)

func_file = nib.load(sub03_run1_func_path)
func_data = func_file.get_fdata()

# Calculate relevant parameters for GLM and ROI time-course analysis.
n_vols = func_data.shape[3]
TR = 2
n_timepoints = n_vols*TR

print("Sub-03 run 1 contains %s" % str(n_timepoints) + " time points.")
print("Sub-03 run 1 contains %s" % str(n_vols) + " vols.")

Sub-03 run 1 contains 756 time points.
Sub-03 run 1 contains 378 vols.


# Process behavioral data. 

In [31]:
sub03_run1_events_df = pd.read_csv(sub03_run1_events_path, index_col=0)
sub03_run2_events_df = pd.read_csv(sub03_run2_events_path, index_col=0)

sub03_run1_events_df.head(20)

Unnamed: 0,participant,expName,label,released,trial_ITI,scale_flip,sound_trailer.started,sound_trailer.stopped,exp_WatchQ.started,exp_WatchQ.stopped,exp_Watch.keys,exp_FeelQ.started,exp_FeelQ.stopped,exp_Feel.keys,exp_Arousal.started,exp_Arousal.stopped,exp_Arousal.keys
0,ah231215,fmritrailer_run2,rh6,1.0,6.0,0.0,117.635831,,147.735315,151.746823,2.0,151.665969,155.742751,3.0,155.69463,,2.0
1,ah231215,fmritrailer_run2,uh1,0.0,6.0,1.0,165.61954,195.674011805997,195.674012,199.70716,4.0,199.671155,203.766015,3.0,203.715461,,2.0
2,ah231215,fmritrailer_run2,rc2,1.0,6.0,1.0,213.670337,,243.700427,247.733517,4.0,247.669639,251.747882,3.0,251.700221,,4.0
3,ah231215,fmritrailer_run2,rh2,1.0,2.0,0.0,261.700535,291.7332398279977,291.73324,295.767502,3.0,295.728788,299.813911,3.0,299.749913,,2.0
4,ah231215,fmritrailer_run2,rh1,1.0,2.0,1.0,305.714593,335.7568738730042,335.756874,339.765825,3.0,339.743652,343.783202,3.0,343.783202,,2.0
5,ah231215,fmritrailer_run2,rc9,1.0,6.0,0.0,349.721753,,379.755897,383.787692,3.0,383.764349,387.780805,4.0,387.747459,,3.0
6,ah231215,fmritrailer_run2,uc4,0.0,2.0,0.0,397.749807,,427.767757,431.827877,3.0,431.78032,435.812748,4.0,435.746831,,3.0
7,ah231215,fmritrailer_run2,rh9,1.0,2.0,0.0,441.780005,471.7951010569959,471.795101,475.834535,2.0,475.809778,479.84119,2.0,479.80873,,2.0
8,ah231215,fmritrailer_run2,uh2,0.0,2.0,1.0,485.798951,515.8166230719944,515.796498,519.806642,4.0,519.806642,523.845952,4.0,523.819513,,3.0
9,ah231215,fmritrailer_run2,rc6,1.0,2.0,1.0,529.784136,559.8125010149961,559.795409,563.812359,2.0,563.812359,567.863882,1.0,567.830626,,2.0


In [32]:
sub03_run1_p_events = process_events_data(sub03_run1_events_df)
sub03_run2_p_events = process_events_data(sub03_run2_events_df)

sub03_run1_p_events.head(16)

Unnamed: 0,Trailer,Type,Onset,Offset,Duration,W_score,A_score,F_score
0,rh6,Horror,12.0,42.0,30,3,3,2
1,uh1,Horror,60.0,90.0,30,4,2,3
2,rc2,Comedy,108.0,138.0,30,4,4,3
3,rh2,Horror,152.0,182.0,30,2,3,2
4,rh1,Horror,196.0,226.0,30,3,2,3
5,rc9,Comedy,244.0,274.0,30,2,2,1
6,uc4,Comedy,288.0,318.0,30,2,2,1
7,rh9,Horror,332.0,362.0,30,3,3,3
8,uh2,Horror,376.0,406.0,30,4,3,4
9,rc6,Comedy,420.0,450.0,30,2,2,1


In [33]:
sub03_run2_p_events.head(16)

Unnamed: 0,Trailer,Type,Onset,Offset,Duration,W_score,A_score,F_score
0,rc3,Comedy,12.0,42.0,30,1,2,1
1,rh12,Horror,60.0,90.0,30,4,2,4
2,rc1,Comedy,108.0,138.0,30,2,3,1
3,rh7,Horror,152.0,182.0,30,4,2,4
4,rc8,Comedy,200.0,230.0,30,3,4,2
5,uc3,Comedy,244.0,274.0,30,1,2,1
6,rh10,Horror,288.0,318.0,30,4,2,4
7,rc4,Comedy,332.0,362.0,30,1,1,1
8,rh4,Horror,380.0,410.0,30,4,2,4
9,uc2,Comedy,428.0,458.0,30,1,3,2


# Slice func files to each trailer onset-duration. 

In [34]:
# Down-sample predictors to get vol onsets. 

# Create array from 0 to 'n_timepoints' in steps of 1.
time_scale = np.arange(0, n_timepoints, 1)  
print("Time_scale scale has %i datapoints" % time_scale.size)

# Create array from 0 to 'n_timepoints' in steps of 2.
vol_scale = np.arange(0, n_timepoints, TR)  
print("Vol scale has %i datapoints" % vol_scale.size)

# Get the labels of each trailer for each run. 
run1_trailer_labels = sub03_run1_p_events["Trailer"].tolist()
run2_trailer_labels = sub03_run2_p_events["Trailer"].tolist()

# Create dictionary variable to store arrays with onset values for each trailer. 
run1_onsets = {}
run2_onsets = {}

# Create a dictionary with all the onsets for each trailer in each run. 
for id in range(len(run1_trailer_labels)):

    #print("Running trailer: " + str(id))
    # Create array of zeros.
    run1_trailer_onsets = np.zeros(n_timepoints)
    run2_trailer_onsets = np.zeros(n_timepoints)

    # Get onset time. 
    run1_current_trailer_onset = sub03_run1_p_events["Onset"][id]
    run2_current_trailer_onset = sub03_run2_p_events["Onset"][id]

    # Assign 1 to such onset all the way til the end of the trailer (30 sec) in the array of zeros.
    run1_trailer_onsets[int(run1_current_trailer_onset):int(run1_current_trailer_onset)+30] = 1
    run2_trailer_onsets[int(run2_current_trailer_onset):int(run2_current_trailer_onset)+30] = 1

    # Create resampler objects for each trailer/run of reward.
    run1_resampler = interp1d(time_scale, run1_trailer_onsets)
    run2_resampler = interp1d(time_scale, run2_trailer_onsets)

    # Create downsampled arrays for each trailer. 
    # Note this vol arrays are half the length than the time arrays.
    run1_trailer_vol_onsets = run1_resampler(vol_scale)
    run2_trailer_vol_onsets = run2_resampler(vol_scale)

    # Append/store the downsampled volumes arrays to each dictionary.
    # I'm doing it this way, so the code is more interpretable
    run1_onsets[run1_trailer_labels[id]] = run1_trailer_vol_onsets
    run2_onsets[run2_trailer_labels[id]] = run2_trailer_vol_onsets


Time_scale scale has 756 datapoints
Vol scale has 378 datapoints


# Load confounds

In [62]:
# Load confounds file for sub001 run 1. 
sub03_run1_confounds_df = pd.read_csv(sub03_run1_confounds_path, sep='\t')
sub03_run2_confounds_df = pd.read_csv(sub03_run2_confounds_path, sep='\t')
default_confounds = ['trans_y', 'trans_x', 'trans_z', 'rot_y', 'rot_x', 'rot_z',
                    "tcompcor", "white_matter", "csf", "framewise_displacement", "dvars"]

# Add confound columns if they contain 'motion' in the title. 
sub03_run1_motion_confounds = [i for i in sub03_run1_confounds_df.columns if "motion" in i] 
sub03_run2_motion_confounds = [i for i in sub03_run2_confounds_df.columns if "motion" in i] 
sub03_run1_filtered_confounds_df = sub03_run1_confounds_df[default_confounds + sub03_run1_motion_confounds]
sub03_run2_filtered_confounds_df = sub03_run2_confounds_df[default_confounds + sub03_run2_motion_confounds]

# Change NaNs to 0s. 
sub03_run1_filtered_confounds_df = sub03_run1_filtered_confounds_df.fillna(0) 
sub03_run2_filtered_confounds_df = sub03_run2_filtered_confounds_df.fillna(0) 
sub03_run1_filtered_confounds_df.head()

Unnamed: 0,trans_y,trans_x,trans_z,rot_y,rot_x,rot_z,tcompcor,white_matter,csf,framewise_displacement,dvars,motion_outlier00,motion_outlier01
0,-9e-06,-0.010757,-0.080162,0.000294,-0.000413,-0.000501,5201.536099,3992.553458,6695.666681,0.0,0.0,0.0,0.0
1,-0.031245,4.2e-05,0.228605,-0.0005,-0.000515,0.000698,3637.053525,3510.484566,3149.107057,0.45556,339.184845,1.0,0.0
2,-0.060796,-0.006199,0.328923,-0.000703,-0.000511,0.000579,3567.5625,3497.475596,3185.547266,0.152368,29.457405,0.0,1.0
3,-0.046815,-0.016876,0.319505,-0.000638,-0.000511,0.000822,3548.010671,3494.525344,3126.151065,0.0494,13.949973,0.0,0.0
4,-0.060792,-0.015862,0.35764,-0.000449,-0.000511,0.000661,3485.162203,3491.878433,3095.246374,0.070642,14.441262,0.0,0.0


# Use Sphere Mask on NAcc. 

In [63]:
# Init the sphere masking object. 
# Note, I am using to seeds one for right and one for left NAcc. 
# The sphere has a radius of 8 mm. 
# I'm filtering out both high an low frequencies. 
masker_sNAcc_r1 = NiftiSpheresMasker(
    seeds=[(10, 12, -2), (-10, 12, -2)],  # right, left
    radius=8, 
    mask_img=sub03_run1_mask_path,
    standardize="psc", 
    t_r=2,
    standardize_confounds=True,
    high_pass=0.011,
    low_pass=0.1)

masker_sNAcc_r2 = NiftiSpheresMasker(
    seeds=[(10, 12, -2), (-10, 12, -2)],  # right, left
    radius=8, 
    mask_img=sub03_run2_mask_path,
    standardize="psc", 
    t_r=2,
    standardize_confounds=True,
    high_pass=0.011,
    low_pass=0.1)

# Mask the epi data and get a time series for the ROI. 
# Note this is similar to fitting the GLM, but without the event files.
sub03_r1_bNAcc = masker_sNAcc_r1.fit_transform(sub03_run1_func_path, confounds=sub03_run1_filtered_confounds_df)
sub03_r2_bNAcc = masker_sNAcc_r2.fit_transform(sub03_run2_func_path, confounds=sub03_run2_filtered_confounds_df)
print("Sub03 run 1 bilateral NAcc timecourse has the following shape: %s" % str(sub03_r1_bNAcc.shape))
print("Sub03 run 2 bilateral NAcc timecourse has the following shape: %s" % str(sub03_r2_bNAcc.shape))

Sub03 run 1 bilateral NAcc timecourse has the following shape: (378, 2)
Sub03 run 2 bilateral NAcc timecourse has the following shape: (378, 2)


# Derive ROI time courses for each Movie trailer. 

In [72]:
# Extract segments based stimulus onsets. 

# Create dictionary variable to store arrays with time series arrays for each trailer. 
run1_timeseries = {}
run2_timeseries = {}

# Get the trailers presented in each run. 
r1_keys = list(run1_onsets.keys())
r2_keys = list(run2_onsets.keys())
print("Run 1 presented the following trailers: %s" % str(r1_keys))
print("Run 2 presented the following trailers: %s" % str(r2_keys))

for id in range(15):

    run1_timeseries[r1_keys[id]] = {
        "Bilateral_NAcc": np.mean(sub03_r1_bNAcc[run1_onsets[r1_keys[id]].astype(bool)][:, :], axis=1),
        "Left_NAcc": sub03_r1_bNAcc[run1_onsets[r1_keys[id]].astype(bool)][:, 1],
        "Right_NAcc": sub03_r1_bNAcc[run1_onsets[r1_keys[id]].astype(bool)][:, 0]}
    
    run2_timeseries[r1_keys[id]] = {
        "Bilateral_NAcc": np.mean(sub03_r2_bNAcc[run2_onsets[r2_keys[id]].astype(bool)][:, :], axis=1),
        "Left_NAcc": sub03_r2_bNAcc[run2_onsets[r2_keys[id]].astype(bool)][:, 1],
        "Right_NAcc": sub03_r2_bNAcc[run2_onsets[r2_keys[id]].astype(bool)][:, 0]}





Run 1 presented the following trailers: ['rh6', 'uh1', 'rc2', 'rh2', 'rh1', 'rc9', 'uc4', 'rh9', 'uh2', 'rc6', 'rh3', 'rc11', 'uc1', 'rh11', 'rc12', 'rc5']
Run 2 presented the following trailers: ['rc3', 'rh12', 'rc1', 'rh7', 'rc8', 'uc3', 'rh10', 'rc4', 'rh4', 'uc2', 'rh5', 'rh8', 'rc10', 'uh3', 'rc7', 'uh4']


In [73]:
run1_timeseries

{'rh6': {'Bilateral_NAcc': array([ 43.18390565,  35.71279101,  -0.94753737, -49.64385272,
         -77.4310442 , -67.27291854, -36.03378564, -15.06362804,
         -16.3639234 , -26.34318261, -30.39523517, -30.40589068,
         -33.99911612, -38.63879032, -35.02456007]),
  'Left_NAcc': array([ 39.95840422,  27.63716634, -15.78293861, -64.86199989,
         -82.03283875, -57.79957288, -21.19959586,  -6.5960038 ,
         -17.82154518, -31.96385886, -31.97416109, -23.5634109 ,
         -20.42142464, -25.01961271, -28.72135166]),
  'Right_NAcc': array([ 46.40940709,  43.78841569,  13.88786386, -34.42570555,
         -72.82924966, -76.7462642 , -50.86797541, -23.53125228,
         -14.90630163, -20.72250635, -28.81630925, -37.24837045,
         -47.5768076 , -52.25796794, -41.32776849])},
 'uh1': {'Bilateral_NAcc': array([-110.65331963,  -10.15732053,   44.04594594,   17.91120691,
          -42.775419  ,  -69.62750483,  -41.87131151,    4.04341498,
           28.49197611,   36.85264281,  