# Setting up Derivative_Project01 - Data prep

In [1]:
project = 'derivative_project01'
import matplotlib
import mne
import os
from mne import io
import zipfile
import sys
import pandas as pd
import numpy as np

# Participants

In [2]:
len(sorted(os.listdir('./sourcedata')))

71

### Creating new data location

In [2]:
if not os.path.exists('./derivative_project01/data'):
    os.mkdir('./derivative_project01/data')
else:
    print ('data folder already exists')

### Choosing subjects and files 

In [112]:
# Final Subject List
sub_final = []
for i in sorted(os.listdir('./sourcedata')): 
    if 'CCTC06NETICU' not in i:
        sub_final.append(i) 
        
# Subfolders/Tasks
fol_names = []
removed = []
for i in sub_final:
    print(f'{i}:')
    subfolders = os.listdir(f'./sourcedata/{i}/eeg')
    for s in subfolders:
        if '(failed)' not in s and 'tennis' not in s and 'ERP' not in s and 'sub-' in s:
            print(f'    -{s}')
            fol_names.append(s) 
        else:
            removed.append(s) 
            print ('-----------ERROR---------------------------------------', s)


sub-CCTC03:
    -sub-CCTC03_task-taken_eeg
    -sub-CCTC03_task-rest02_eeg
    -sub-CCTC03_task-taken02_eeg
    -sub-CCTC03_task-rest_eeg
sub-CCTC04:
    -sub-CCTC04_task-restredo_eeg
    -sub-CCTC04_task-taken_eeg
    -sub-CCTC04_task-rest_eeg
sub-CCTC05:
    -sub-CCTC05_task-RS_ST_Taken_eeg
-----------ERROR--------------------------------------- sub-CCTC05_task-ERP_eeg
sub-CCTC07:
    -sub-CCTC07_task-rest_eeg
    -sub-CCTC07_task-taken_eeg
-----------ERROR--------------------------------------- sub-CCTC07_task-ERP_eeg
sub-CCTC07NETICU:
    -sub-CCTC07NETICU_task-sedon1rest_eeg
    -sub-CCTC07NETICU_task-sedoffrest_eeg
    -sub-CCTC07NETICU_task-sedofftaken_eeg
    -sub-CCTC07NETICU_task-sedon1taken_eeg
sub-HC01:
    -sub-HC01_task-ST_eeg
    -sub-HC01_task-IT_eeg
sub-HC02:
    -sub-HC02_task-Taken_eeg
sub-HC03:
    -sub-HC03_task-Taken_eeg
sub-HC04:
    -sub-HC04_task-Taken_eeg
sub-HC05:
    -sub-HC05_task-Taken_eeg
sub-HC06:
    -sub-HC06_task-Taken_eeg
sub-HC07:
    -sub-HC07_task

# Copy from SourceData to new Derivative directory for further processing

In [123]:
 #######################################
# Copy files to derivative_project/data #
 #######################################
p_list = ['MSICU32'] # Example with one participant file

for ID in p_list:
    print (f'\n________________________________________________________________________________________________\n____________________________________________{ID}______________________________________________\n')
#------------------------------------------------------------------------------------------------------------------#    
    
    # Iterate over list of conditions for each participant
    tlist = vars()[ID+'_tlist']
    for c in tlist:
        print ('\n___________',c,'___________')
        #print ('(1) Searching for working directorys.')
        source_path = f'./sourcedata/sub-{ID}/eeg/sub-{ID}_task-{c}_eeg' 
        fname  = (os.listdir(source_path)[0])[:-4] #using string sliciung OR use replace Ex: fname=file.replace(".zip", "")
        #create new directory
        new_path = f'./{project}/data/sub-{ID}/sub-{ID}_task-{c}_eeg'
        if not os.path.exists(new_path):
            os.makedirs(new_path)
            #print('    - No directory found. \n    - New directory made.')
        else:
            print('    - This directory already exists. \n    - Continue with existing file.') 
        #unzip source files to the participants new directory
        #print('(2) Unzipping files into prepoc directory.')         
        if not os.path.exists(f'{new_path}/{fname}'):
            with zipfile.ZipFile(f'{source_path}/{fname}.zip', 'r') as zip_ref:
                zip_ref.extractall(new_path)
                #print (f'    - sub-{ID}_task-{c} sourcefile was unzipped to the participants working directory.')
        else:
            print(f'    - Cannot unzip files because {fname} already exists.\n    - Continue with existing file.')
        # Rename to .mff         
        if not os.path.exists(f'./{new_path}/{fname}.mff'):
            os.rename(f'{new_path}/{fname}', f'{new_path}/{fname}.mff')
            #print(f'    - sub-{ID}_task-{c}_eeg/{fname}.mff was renamed with .mff suffix')
        else:
            print(f'    - Cant rename file to .mff because {fname}.mff file already exisits. \n    - Continuing with existing file')
        
#---------------------------------------------------------------------------------------------------#        
        # Loading metadata
        #print(f'(3) Downsampling and loading data for {ID}_{c}')
        data_path = f'{new_path}/{fname}.mff'
        with io.capture_output() as captured:
            raw = mne.io.read_raw_egi(data_path)#, preload=True)
        # Downsampling to 250Hz
        data = raw.copy()
        raw_sfreq = raw.info['sfreq']
        data_sfreq = data.info['sfreq']
        #print(f'    - The raw sampling frequincy is {raw_sfreq}Hz.')
        if data_sfreq > 250:
            with io.capture_output() as captured:
                data.resample(250)
            print(f'    > Data was downsampled to 250Hz from {raw_sfreq}')
        if data_sfreq < 250: 
            print(f'    > Error: Data sampling frequency is {raw_sfreq}Hz.')
            answer = input('      - Do you want to continue(yes/no)?:')
            if answer.lower().startswith("y"):
                print("      - ok, carry on then")
            elif answer.lower().startswith("n"):
                print("      - ok, sayonnara")
                sys.exit()
        # loading in data      
        #print('    - Loading data into memory...')
        with io.capture_output() as captured: 
            data.load_data() 
        # Timeing
        meas_data = data.info['meas_date']
        print('(1) Data timing...', 
              f'\n    - {fname} ----> {meas_data}',
              f'\n    - {data}',
              f'\n    - time(s): {(data.last_samp+1)/250}',
              f'\n    - first_samp-last_samp: {data.first_samp} - {data.last_samp}')
        seconds = (data.last_samp+1)/250
        seconds = seconds % (24 * 3600)
        hour = seconds // 3600
        seconds %= 3600
        minutes = seconds // 60
        seconds %= 60
        print("    - Duration = %dhr : %02dmin : %02dsec" % (hour, minutes, seconds))
    ############################################################################################
    #EVENTS
    ############################################################################################
        print('(2) Trigger Channels')
        triggers = []
        for i in data.ch_names:
            if not 'E' in i:
                if not 'sync' in i:
                    if not 'STI' in i:
                        if not 'V' in i:
                            print ("    - Trigger ch.",i)
                            triggers.append(i)
        #
        with io.capture_output() as captured:
            events = mne.find_events(data, stim_channel='STI 014', initial_event=True)
        for i in range(events.shape[0]):
            vars()['event'+str(i+1)] = events[i,0]
            print (f' - {events[i,0]} / 250 = {(events[i,0])/250}')


________________________________________________________________________________________________
____________________________________________MSICU32______________________________________________


___________ rest ___________
    - This directory already exists. 
    - Continue with existing file.
    - Cant rename file to .mff because Session 20240220 2041.mff file already exisits. 
    - Continuing with existing file
    > Data was downsampled to 250Hz from 1000.0
(1) Data timing... 
    - Session 20240220 2041 ----> 2024-02-21 01:44:07.407000+00:00 
    - <RawMff | signal1.bin, 131 x 90668 (362.7 s), ~90.8 MB, data loaded> 
    - time(s): 362.672 
    - first_samp-last_samp: 0 - 90667
    - Duration = 0hr : 06min : 02sec
(2) Trigger Channels

___________ taken ___________
(1) Data timing... 
    - Session 20240220 2022 ----> 2024-02-21 01:30:46.400000+00:00 
    - <RawMff | signal1.bin, 135 x 160839 (643.4 s), ~165.8 MB, data loaded> 
    - time(s): 643.356 
    - first_samp-last_s