In [1]:
import os
import pandas as pd
import numpy as np
from glob import glob
import nibabel as nib
from nilearn.plotting import plot_img, plot_stat_map, view_img, plot_prob_atlas
from nilearn.regions import connected_label_regions
from nilearn.glm.first_level.hemodynamic_models import spm_hrf
from nilearn.image import concat_imgs, mean_img, index_img
from nilearn.glm.first_level import FirstLevelModel
from nilearn.glm import threshold_stats_img
import matplotlib.pyplot as plt
from nilearn.plotting import plot_design_matrix
from nilearn.interfaces.fmriprep import load_confounds_strategy
from nilearn.plotting import plot_roi
from nilearn.maskers import NiftiMapsMasker, NiftiSpheresMasker
from scipy.interpolate import interp1d
import seaborn as sns

# Homemade functions


# Check files of sub-01


In [78]:
# Open a datasets directory. 
fd = os.open("/Users/luisalvarez/Documents/SpanLab/fmrimatch_psypy/data", os.O_RDONLY)


# Use os.fchdir() method to change the current dir/folder.
os.fchdir(fd)

# Safe check- Print current working directory
print("Current working dir : %s" % os.getcwd())

Current working dir : /Users/luisalvarez/Documents/SpanLab/fmrimatch_psypy/data


In [119]:
# Use glob to get all the csv files in the folder
path = os.getcwd()

csv_files = glob(os.path.join(path, "*.csv"))

# Remove elements for list that cotain 'test' and 'cam' sub-strings. 
#Also, we don't have fmri data for this subject "lm231126".
csv_files = [elem for elem in csv_files if not "lm231126" in elem]
csv_files = [elem for elem in csv_files if not "test" in elem]
csv_files = [elem for elem in csv_files if not "cam" in elem]

print(f"We got a total of {len(csv_files)} files.")

csv_files.sort()
csv_files[0:6]


We got a total of 74 files.


['/Users/luisalvarez/Documents/SpanLab/fmrimatch_psypy/data/ab231203_fmritrailer_run2_2023_Dec_03_1751.csv',
 '/Users/luisalvarez/Documents/SpanLab/fmrimatch_psypy/data/ab231203_fmritrailer_run2_2023_Dec_03_1806.csv',
 '/Users/luisalvarez/Documents/SpanLab/fmrimatch_psypy/data/ag231213_fmritrailer_run2_2023_Dec_13_1432.csv',
 '/Users/luisalvarez/Documents/SpanLab/fmrimatch_psypy/data/ag231213_fmritrailer_run2_2023_Dec_13_1446.csv',
 '/Users/luisalvarez/Documents/SpanLab/fmrimatch_psypy/data/ah231215_fmritrailer_run2_2023_Dec_15_1651.csv',
 '/Users/luisalvarez/Documents/SpanLab/fmrimatch_psypy/data/ah231215_fmritrailer_run2_2023_Dec_15_1708.csv']

In [77]:

# Load files for sub 001  
sub01_events_file1 = "/Users/luisalvarez/Documents/SpanLab/fmrimatch_psypy/data/ab231203_fmritrailer_run2_2023_Dec_03_1751.csv"
sub01_events_file2 = "/Users/luisalvarez/Documents/SpanLab/fmrimatch_psypy/data/ab231203_fmritrailer_run2_2023_Dec_03_1806.csv"



Notes on csv files
- Note, we can determine the run of each file by looking into the 'expName' column -> ['fmritrailer_run2', 'fmritrailer_run1']
- Some, "sound_trailer.stopped" have None values. What do we do with this trailers. Should we just take the duration and calculate offsets based on that?
- Participants are missing data for the "exp_Arousal.stopped" col.



In [121]:

def preprocess_events_files(sub_events_file1, sub_events_file2): 

    # Load each csv file. 
    sub_events_file1_df = pd.read_csv(sub_events_file1, sep='\,')
    sub_events_file2_df = pd.read_csv(sub_events_file2, sep='\,')

    # Determine to which run each file corresponds. 
    sub_events_file1_run = sub_events_file1_df["expName"][0][-4:]
    sub_events_file2_run = sub_events_file2_df["expName"][0][-4:]

    # Remove rows with redundant/irrelevant information.
    # Find the starting row [x==1]. 
    sub_events_file1_start = int(np.where(sub_events_file1_df['order'] == 1)[0])
    sub_events_file2_start = int(np.where(sub_events_file2_df['order'] == 1)[0])

    # Find the ending row [x==max]. 
    sub_events_file1_end = int(np.where(sub_events_file1_df['order'] == sub_events_file1_df['order'].max())[0])
    sub_events_file2_end = int(np.where(sub_events_file2_df['order'] == sub_events_file2_df['order'].max())[0])

    # Merge information into list. 
    sub_events_file1_drop_start = list(range(0, sub_events_file1_start)) # from to one row before relevant info.
    sub_events_file2_drop_start = list(range(0, sub_events_file2_start))

    sub_events_file1_drop_end = list(range(sub_events_file1_end+1, sub_events_file1_df.shape[0])) # from last row with irrelevant info til the end of the dataframe.
    sub_events_file2_drop_end = list(range(sub_events_file2_end+1, sub_events_file2_df.shape[0]))

    sub_events_file1_drop_list = sub_events_file1_drop_start + sub_events_file1_drop_end
    sub_events_file2_drop_list = sub_events_file2_drop_start + sub_events_file2_drop_end

    # Drop the rows from the df. 
    sub_events_file1_df.drop(sub_events_file1_drop_list, inplace=True)
    sub_events_file2_df.drop(sub_events_file2_drop_list, inplace=True)

    # Reset index to adjust for dropped rows. 
    sub_events_file1_df.reset_index(drop=True, inplace=True)
    sub_events_file2_df.reset_index(drop=True, inplace=True)
    
    # Drop columns with redundant/irrelevant information.
    # List columns that will be retained in the dataframe. 
    keep_cols = ["participant", "expName", "label", "released", "trial_ITI", "scale_flip", "sound_trailer.started", "sound_trailer.stopped",
             "exp_WatchQ.started", "exp_WatchQ.stopped", "exp_Watch.keys", "exp_FeelQ.started", "exp_FeelQ.stopped",
             "exp_Feel.keys", "exp_Arousal.started", "exp_Arousal.stopped", "exp_Arousal.keys"]

    sub_events_file1_df = sub_events_file1_df[keep_cols]
    sub_events_file2_df = sub_events_file2_df[keep_cols]

    # Determine which dataframe corresponds to which run. 
    if(sub_events_file1_run == "run1"):
        sub_events_run1_df = sub_events_file1_df
    elif (sub_events_file2_run == "run1"):
        sub_events_run1_df = sub_events_file2_df
    else:
        print("For this participant there is no run1 file.")

    if(sub_events_file1_run == "run2"):
        sub_events_run2_df = sub_events_file1_df
    elif (sub_events_file2_run == "run2"):
        sub_events_run2_df = sub_events_file2_df
    else:
        print("For this participant there is no run2 file.")

    return sub_events_run1_df, sub_events_run2_df


SyntaxError: 'return' outside function (90408937.py, line 62)

In [72]:
# Load path for sub01 file.
sub01_events_r1_file = "/Users/luisalvarez/Documents/SpanLab/fmrimatch_psypy/data/ab231203_fmritrailer_run2_2023_Dec_03_1751.csv"
sub01_events_r2_file = "/Users/luisalvarez/Documents/SpanLab/fmrimatch_psypy/data/ab231203_fmritrailer_run2_2023_Dec_03_1806.csv"

# Load participants file.
sub01_events_r1_df = pd.read_csv(sub01_events_r1_file, sep='\,')
sub01_events_r1_keys = sub01_events_r1_df.keys()

# Determine to which run this file corresponds. 
sub01_events_r1_run = sub01_events_r1_df["expName"][0][-4:]
print(f"sub01_events_r1_df shape: {sub01_events_r1_df.shape}")
print(f"sub01_events_r1_df actual run number: {sub01_events_r1_run}")

sub01_events_r1_df.head(40)

sub01_events_r1_df shape: (22, 120)
sub01_events_r1_df actual run number: run2


  sub01_events_r1_df = pd.read_csv(sub01_events_r1_file, sep='\,')


Unnamed: 0,order,vid_path,aud_path,genre,released,trial_ITI,scale_flip,label,Rating_trials.thisRepN,Rating_trials.thisTrialN,...,key_resp_end.keys,key_resp_end.started,key_resp_end.stopped,participant,session,date,expName,psychopyVersion,frameRate,Unnamed: 119
0,,,,,,,,,,,...,,,,ab231203,1,2023_Dec_03_1751,fmritrailer_run2,2021.2.3,60.242847,
1,,,,,,,,,,,...,,,,ab231203,1,2023_Dec_03_1751,fmritrailer_run2,2021.2.3,60.242847,
2,,,,,,,,,,,...,,,,ab231203,1,2023_Dec_03_1751,fmritrailer_run2,2021.2.3,60.242847,
3,,,,,,,,,,,...,,,,ab231203,1,2023_Dec_03_1751,fmritrailer_run2,2021.2.3,60.242847,
4,,,,,,,,,,,...,,,,ab231203,1,2023_Dec_03_1751,fmritrailer_run2,2021.2.3,60.242847,
5,1.0,videos/video_mute/rh6_trailer.mp4,videos/audio/rh6_audio.wav,h,1.0,6.0,0.0,rh6,0.0,0.0,...,,,,ab231203,1,2023_Dec_03_1751,fmritrailer_run2,2021.2.3,60.242847,
6,2.0,videos/video_mute/uh1_trailer.mp4,videos/audio/uh1_audio.wav,h,0.0,6.0,1.0,uh1,0.0,1.0,...,,,,ab231203,1,2023_Dec_03_1751,fmritrailer_run2,2021.2.3,60.242847,
7,3.0,videos/video_mute/rc2_trailer.mp4,videos/audio/rc2_audio.wav,c,1.0,6.0,1.0,rc2,0.0,2.0,...,,,,ab231203,1,2023_Dec_03_1751,fmritrailer_run2,2021.2.3,60.242847,
8,4.0,videos/video_mute/rh2_trailer.mp4,videos/audio/rh2_audio.wav,h,1.0,2.0,0.0,rh2,0.0,3.0,...,,,,ab231203,1,2023_Dec_03_1751,fmritrailer_run2,2021.2.3,60.242847,
9,5.0,videos/video_mute/rh1_trailer.mp4,videos/audio/rh1_audio.wav,h,1.0,2.0,1.0,rh1,0.0,4.0,...,,,,ab231203,1,2023_Dec_03_1751,fmritrailer_run2,2021.2.3,60.242847,


In [73]:
# Drop rows with redundant/irrelevant information.
# Find the starting row [x==1]. 
sub01_events_r1_start = int(np.where(sub01_events_r1_df['order'] == 1)[0])

# Find the ending row [x==max]. 
sub01_events_r1_end = int(np.where(sub01_events_r1_df['order'] == sub01_events_r1_df['order'].max())[0])

# Create lists for rows that will be dropped.
r1_drop_list_start = list(range(0, sub01_events_r1_start))
r1_drop_list_end = list(range(sub01_events_r1_end+1, sub01_events_r1_df.shape[0]))
r1_drop_list = r1_drop_list_start + r1_drop_list_end

print(f"Rows to drop from the start: {r1_drop_list_start}")
print(f"Rows to drop from the end: {r1_drop_list_end}")

# Drop the rows from the df. 
sub01_events_r1_df.drop(r1_drop_list, inplace=True)
sub01_events_r1_df.reset_index(drop=True, inplace=True)
sub01_events_r1_df.head(20)


Rows to drop from the start: [0, 1, 2, 3, 4]
Rows to drop from the end: [21]


Unnamed: 0,order,vid_path,aud_path,genre,released,trial_ITI,scale_flip,label,Rating_trials.thisRepN,Rating_trials.thisTrialN,...,key_resp_end.keys,key_resp_end.started,key_resp_end.stopped,participant,session,date,expName,psychopyVersion,frameRate,Unnamed: 119
0,1.0,videos/video_mute/rh6_trailer.mp4,videos/audio/rh6_audio.wav,h,1.0,6.0,0.0,rh6,0.0,0.0,...,,,,ab231203,1,2023_Dec_03_1751,fmritrailer_run2,2021.2.3,60.242847,
1,2.0,videos/video_mute/uh1_trailer.mp4,videos/audio/uh1_audio.wav,h,0.0,6.0,1.0,uh1,0.0,1.0,...,,,,ab231203,1,2023_Dec_03_1751,fmritrailer_run2,2021.2.3,60.242847,
2,3.0,videos/video_mute/rc2_trailer.mp4,videos/audio/rc2_audio.wav,c,1.0,6.0,1.0,rc2,0.0,2.0,...,,,,ab231203,1,2023_Dec_03_1751,fmritrailer_run2,2021.2.3,60.242847,
3,4.0,videos/video_mute/rh2_trailer.mp4,videos/audio/rh2_audio.wav,h,1.0,2.0,0.0,rh2,0.0,3.0,...,,,,ab231203,1,2023_Dec_03_1751,fmritrailer_run2,2021.2.3,60.242847,
4,5.0,videos/video_mute/rh1_trailer.mp4,videos/audio/rh1_audio.wav,h,1.0,2.0,1.0,rh1,0.0,4.0,...,,,,ab231203,1,2023_Dec_03_1751,fmritrailer_run2,2021.2.3,60.242847,
5,6.0,videos/video_mute/rc9_trailer.mp4,videos/audio/rc9_audio.wav,c,1.0,6.0,0.0,rc9,0.0,5.0,...,,,,ab231203,1,2023_Dec_03_1751,fmritrailer_run2,2021.2.3,60.242847,
6,7.0,videos/video_mute/uc4_trailer.mp4,videos/audio/uc4_audio.wav,c,0.0,2.0,0.0,uc4,0.0,6.0,...,,,,ab231203,1,2023_Dec_03_1751,fmritrailer_run2,2021.2.3,60.242847,
7,8.0,videos/video_mute/rh9_trailer.mp4,videos/audio/rh9_audio.wav,h,1.0,2.0,0.0,rh9,0.0,7.0,...,,,,ab231203,1,2023_Dec_03_1751,fmritrailer_run2,2021.2.3,60.242847,
8,9.0,videos/video_mute/uh2_trailer.mp4,videos/audio/uh2_audio.wav,h,0.0,2.0,1.0,uh2,0.0,8.0,...,,,,ab231203,1,2023_Dec_03_1751,fmritrailer_run2,2021.2.3,60.242847,
9,10.0,videos/video_mute/rc6_trailer.mp4,videos/audio/rc6_audio.wav,c,1.0,2.0,1.0,rc6,0.0,9.0,...,,,,ab231203,1,2023_Dec_03_1751,fmritrailer_run2,2021.2.3,60.242847,


In [76]:
# Drop columns with redundant/irrelevant information.

keep_cols = ["participant", "expName", "label", "released", "trial_ITI", "scale_flip", "sound_trailer.started", "sound_trailer.stopped",
             "exp_WatchQ.started", "exp_WatchQ.stopped", "exp_Watch.keys", "exp_FeelQ.started", "exp_FeelQ.stopped",
             "exp_Feel.keys", "exp_Arousal.started", "exp_Arousal.stopped", "exp_Arousal.keys"]

sub01_events_r1_df = sub01_events_r1_df[keep_cols]
sub01_events_r1_df.head(20)


Unnamed: 0,participant,expName,label,released,trial_ITI,scale_flip,sound_trailer.started,sound_trailer.stopped,exp_WatchQ.started,exp_WatchQ.stopped,exp_Watch.keys,exp_FeelQ.started,exp_FeelQ.stopped,exp_Feel.keys,exp_Arousal.started,exp_Arousal.stopped,exp_Arousal.keys
0,ab231203,fmritrailer_run2,rh6,1.0,6.0,0.0,85.48801,115.50503695500085,115.488142,119.523315,3.0,119.495061,123.522819,2.0,123.492833,,3.0
1,ab231203,fmritrailer_run2,uh1,0.0,6.0,1.0,133.504507,163.51846566300082,163.518466,167.542894,4.0,167.519112,171.535297,2.0,171.535297,,2.0
2,ab231203,fmritrailer_run2,rc2,1.0,6.0,1.0,181.505026,211.53011957299896,211.513098,215.537774,4.0,215.514414,219.531072,2.0,219.531072,,4.0
3,ab231203,fmritrailer_run2,rh2,1.0,2.0,0.0,229.537318,259.5479470290011,259.528627,263.537984,3.0,263.537984,267.57119,2.0,267.543457,,2.0
4,ab231203,fmritrailer_run2,rh1,1.0,2.0,1.0,273.487234,,303.486608,307.520067,1.0,307.486932,311.520357,3.0,311.486848,,2.0
5,ab231203,fmritrailer_run2,rc9,1.0,6.0,0.0,317.485529,,347.48144,351.499566,4.0,351.499566,355.531886,4.0,355.49933,,4.0
6,ab231203,fmritrailer_run2,uc4,0.0,2.0,0.0,365.51343,395.5262585869932,395.509501,399.527298,3.0,399.527298,403.546159,4.0,403.519298,,2.0
7,ab231203,fmritrailer_run2,rh9,1.0,2.0,0.0,409.502226,439.5176617499965,439.517662,443.541864,2.0,443.519262,447.569053,2.0,447.534946,,2.0
8,ab231203,fmritrailer_run2,uh2,0.0,2.0,1.0,453.533994,483.55013782100286,483.532966,487.550555,1.0,487.550555,491.583373,4.0,491.550495,,2.0
9,ab231203,fmritrailer_run2,rc6,1.0,2.0,1.0,497.516638,527.5300167980022,527.511783,531.529497,4.0,531.529497,535.561868,2.0,535.534295,,4.0
