---
# Intertrial Variability: 1. Preprocessing

In [None]:
import sys; sys.path.insert(1, '../')
from scripts.preproc import load_prepare_data, epoch_clean
import os
import pickle as pkl
from scripts.util import custom_logger, concatenate_epochs

---
# Define Preprocessing Parameters

In [None]:
# Paths and Names
data_pth = 'D:/data/Psychiatrie_Autismus_2012/'
folders = ['Neurotypicals', 'Asperger']
file_codes = ['PL', 'PS']
group_name = ['control', 'asd']

# Trigger-specific Variables
triglens = [2, 3, 4, 5, 6, 9, 10, 11, 12, 13, 14, 15, 21, 22, 23, 24, 25, 26, 27, 70]
triglabels = [[10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 30], 
              [40, 40, 40, 40, 40, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 60]]
stimuli = [['SF', 'LR', 'GPL'], ['SR', 'LF', 'GPS']]  # Codes {S: Small, L: Large, F: Frequent, R: Rare}
round_to = 1
onoff = [3, 3]

# Preprocessing Parameters
baseline = None  # No Baseline Correction
trial_time = (-0.2, 0.5)  # Trial time span specification
filtfreq = [1, 30]  # Band-pass frequencies
rereference = ['TP9', 'TP10']  # Reference Channels
art_thresh = None  # Artifact Threshold
srate = 500  # Sampling rate to resample to
rm_bad_chans = False  # No custom Bad Channel Detection
use_ransac=True  # use RANSAC Bad Channel Detection
csd = False  # Don't Use Current Source Density
perform_ICA = True  # Calculate ICA and Remove Blink-related Artifacts
n_jobs = -1  # Number of Cores to Utilize During Parallelization

---
## Load, preprocess and epoch data

In [None]:
all_data_raw = {} 
all_data_epoch = {}

subject_list = [[n for n in os.listdir(os.path.join(data_pth,folder)) if not n.startswith('.') 
                if not n.endswith('.txt')] for folder in folders]

save_path = '../processed/data_preprocessed.pkl'

for i in range(len(subject_list)):
    all_data_raw[group_name[i]] = {}
    all_data_epoch[group_name[i]] = {}
    for j in range(len(subject_list[i])):
        subname = subject_list[i][j][0:2]
        all_data_raw[group_name[i]][subname] = {}

        # Get Paths of all condition of current subject
        conds = os.listdir(os.path.join(data_pth, folders[i], subject_list[i][j]))
        conds_of_interest = [k for k in conds if any([k.startswith('PL'), k.startswith('PS')]) 
                            if not k.startswith('.') if k.endswith('.vhdr')]
        filepaths = [os.path.join(data_pth, folders[i], subject_list[i][j], k) for k in conds_of_interest]
        filepaths.sort()
        
        # Initialize Logging
        logger = custom_logger(os.path.dirname(save_path) + '/logs/' + subject_list[i][j] + '.log')
        
        # Raws
        raws = [load_prepare_data(filepath, rereference, filtfreq, perform_ICA=perform_ICA, logger=logger,
                rm_bad_chans=rm_bad_chans, n_jobs=n_jobs) for filepath in filepaths]
        # Epochs
        epocheds = [epoch_clean(raw, baseline, trial_time, onoff, art_thresh, srate, 
                    round_to, triglens, triglabel, logger=logger, stim_names=stimulus, csd=csd,
                    use_ransac=use_ransac) for raw, triglabel, stimulus in zip(raws, triglabels, stimuli)]
        epochs = concatenate_epochs(epocheds)


        all_data_raw[group_name[i]][subname][file_codes[0]] = raws[0]
        all_data_raw[group_name[i]][subname][file_codes[1]] = raws[1]
        all_data_epoch[group_name[i]][subname] = epochs

with open(save_path, 'wb') as f:
    pkl.dump([all_data_raw, all_data_epoch], f)
    print('saved')