# This preprocessing workflow is developped at 08/07/2019
* mne event coding in VD {'12.0': 5, '100.0': 2, '21.0': 8, '102.0': 4, '254.0': 11, '131.0': 6, '132.0': 7, '1.0': 1, '23.0': 10, '101.0': 3, '255.0': 12, '22.0': 9})   6-safe period   7-threat period  11-start 12-end
* one needs to notice the event coding can be different in diff files
* event recoding three-number.0(float format) code: session+state+cond
    * section 1,2
    * state: VD:1, FA:2, OP:3
    * condition safe:1, threat:2




In [23]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
===============================================
Preprocessing on Enrico data using MNE and ASR
===============================================
We firstly define subject dictionary as well as state list, reject dict, then we import eeglab format 
Raw data with MNE package. We apply:
1) a notch filter to remove powerline artifact (50 Hz)
2) a 1Hz-100Hz band-pass filter
====> output = subj0*number*_*state*_filt_raw.fif  
Then concatenate the data of the same session
3) ASR and ICA fitting: This is a parallel process of preprocessing, 
    the goal is to store two sets of ica component images and generate an exclude dict from that: 
        3)do epochs in order to autoreject bad epochs and observe rejection report ===> output: cleaned epochs
        3)ICA fit and save the ICA components to reject - this is done by using function ica_component_selection()
        ====> output = fif file that save ica object and a rejecting component dict


Note: version not fullfill custer-run requirement

Suggestions:
1) decide infomation storage format
2) 

Updated on July 2019

@author: Gansheng TAN aegean0045@outlook.com    based on Manu's codes
"""

##############################################################  Set-up ################################################
import mne
import importlib
import numpy as np
from mne.report import Report
from autoreject import AutoReject
from autoreject import compute_thresholds
from autoreject import get_rejection_threshold 
import matplotlib.pyplot as plt  # noqa
import matplotlib.patches as patches  # noqa
from autoreject import set_matplotlib_defaults  # noqa
from Autoreject_report_plot import Autoreject_report_plot #Gansheng
%matplotlib qt
mne.set_log_level('WARNING')

##################### OS path in INSERM computer #####################################################################
raw_data_path = '/home/gansheng.tan/process_mne/INSERM_EEG_Enrico_Proc/data_eeglab/raw_data/'
montage_fname = '/home/gansheng.tan/process_mne/INSERM_EEG_Enrico_Proc/data_eeglab/raw_data/Biosemi64_MAS_EOG.locs'

########################################## Initialization parameter##########################################""
subj_list = ['94']
section_list=['1']
#state list defines the concatenating order
# state_list = ['VD','FA','OP']
state_list = ['VD','FA']
power_freq_array = [50]
reject_raw_data_section1 = {'07':['OP'],'10':['FA','VD'],'21':['VD'],'36':['OP']}
reject_raw_data_section2 = {'07':['OP'], '10':['VD'], '21':['FA','VD'],
                '22':['OP'], '57':['OP','FA'], '82':['FA','OP','VD']}

# bad channel rejection is not apllied in the preproc, bad channels will be defined by eyes later
bad_channel={'94':{'FA1':['Pz']}}
# example: bad_channel = {'94':{'FA1':['FP1','FP2'],{'VD1':['Cz']}} excluded for ICA analysis


################################ step00: cut and filter data and concatenate 3 recording in one section ############

###### set up montage
montage_biosemi=mne.channels.read_montage(montage_fname)

###### preproc for each raw file
for subj in subj_list:
    for section in section_list:
        reject_state=[]
        conctn_list = []
#         conctn_dict = {}
        if subj in eval('reject_raw_data_section'+section).keys():
            reject_state = eval('reject_raw_data_section'+section)[subj]
            print("the rejected states of subject {} in section {} are {}".format(subj,section,reject_state))
        for state in state_list:
            if state in reject_state:
                continue
            else:
                raw_fname = raw_data_path + 'subj0'+subj+'_'+state+section+'_mast.set'
                raw = mne.io.read_raw_eeglab(raw_fname,montage_biosemi,verbose='INFO',preload=True,eog='auto')
                
                events = mne.events_from_annotations(raw)
                events_coding=events[1]
                # take recording from 254 start of recording to 255 end of recording
                events=np.asarray(events[0])
                events_code_start = events_coding['254.0']
                events_code_end = events_coding['255.0']
                start = events[events[:,2]==events_code_start][0][0]
                stop = events[events[:,2]==events_code_end][0][0]
                raw_cut_filt = raw.copy()

                raw_cut_filt.crop(tmin = start/raw.info['sfreq'], tmax = stop/raw.info['sfreq'])
                raw_cut_filt.notch_filter(freqs=power_freq_array)
                raw_cut_filt.filter(l_freq=1,h_freq=100)
                
                ############ annotation engineering ################
                index_dlt=0
                for i in range(raw_cut_filt.annotations.__len__()):
                    if (raw_cut_filt.annotations.__getitem__(i-index_dlt)['description']) not in ['131.0','132.0']:
                        raw_cut_filt.annotations.delete(i-index_dlt)
                        index_dlt+=1
                    elif raw_cut_filt.annotations.__getitem__(i-index_dlt)['description'] in ['131.0', '132.0']:
                        mne_annotation_recode(section = section, state = state, original_annotation=
                                              raw_cut_filt.annotations.__getitem__(i-index_dlt))
                for i in range(raw_cut_filt.annotations.__len__()):
                    print(raw_cut_filt.annotations.__getitem__(i))
#                 raw.plot(title='raw plot',scalings=150e-6)                              
                raw_cut_filt.plot(title='raw plot after cut',scalings=150e-6)
                conctn_list.append(raw_cut_filt)
#                 conctn_dict.update({state+section : raw_cut_filt.to_data_frame()})
#             conctn_list = [v for v in conctn_dict.values()] 
#             for raw_cut_filt2cpnctn in conctn_list:
        raw_full = mne.io.concatenate_raws(conctn_list)
#         raw_full.plot(scalings=150e-6)    
                
                    

                
        

Reading /home/gansheng.tan/process_mne/INSERM_EEG_Enrico_Proc/data_eeglab/raw_data/subj094_VD1_mast.fdt
Reading 0 ... 308223  =      0.000 ...   601.998 secs...
Setting up band-stop filter from 49 - 51 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandstop filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 49.38
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 49.12 Hz)
- Upper passband edge: 50.62 Hz
- Upper transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 50.88 Hz)
- Filter length: 3379 samples (6.600 sec)

Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 1 - 1e+02 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuatio

In [33]:
for i in range(raw_full.annotations.__len__()):
    print(raw_full.annotations.__getitem__(i))
raw_full.plot()
print(section)
print(state)
mne_annotation_recode(section,state,raw_full.annotations.__getitem__(0))
for i in range(raw_full.annotations.__len__()):
    print(raw_full.annotations.__getitem__(i))

OrderedDict([('onset', 167.14453125), ('duration', 0.0), ('description', '132.0'), ('orig_time', None)])
OrderedDict([('onset', 212.822265625), ('duration', 0.0), ('description', '131.0'), ('orig_time', None)])
OrderedDict([('onset', 259.119140625), ('duration', 0.0), ('description', '132.0'), ('orig_time', None)])
OrderedDict([('onset', 304.826171875), ('duration', 0.0), ('description', '132.0'), ('orig_time', None)])
OrderedDict([('onset', 350.57421875), ('duration', 0.0), ('description', '131.0'), ('orig_time', None)])
OrderedDict([('onset', 396.181640625), ('duration', 0.0), ('description', '132.0'), ('orig_time', None)])
OrderedDict([('onset', 441.7890625), ('duration', 0.0), ('description', '131.0'), ('orig_time', None)])
OrderedDict([('onset', 487.396484375), ('duration', 0.0), ('description', '131.0'), ('orig_time', None)])
OrderedDict([('onset', 531.55859375), ('duration', 0.0), ('description', 'BAD boundary'), ('orig_time', None)])
OrderedDict([('onset', 531.55859375), ('dura

In [42]:
raw_full.annotations.__getitem__(0)['description']=='132.0'
raw_full.annotations.__getitem__(0)['description']='1.0'
raw_full.annotations.__getitem__(0)['description']=5
raw_full.annotations.__getitem__(0).items()

odict_items([('onset', 167.14453125), ('duration', 0.0), ('description', '132.0'), ('orig_time', None)])

In [2]:
def mne_annotation_recode(section,state,original_annotation):
    if section =='1':
        if state == 'VD':
            if original_annotation['description']=='131.0':
                original_annotation['description']='111.0'
            elif original_annotation['description']=='132.0':
                original_annotation['description']='112.0'
            else:
                print('this function only detect safe and threat period, please check original annotations')
        elif state == 'FA':
            if original_annotation['description']=='131.0':
                original_annotation['description']='121.0'
            elif original_annotation['description']=='132.0':
                original_annotation['description']='122.0'
            else:
                print('this function only detect safe and threat period, please check original annotations')
        elif state == 'OP':
            if original_annotation['description']=='131.0':
                original_annotation['description']='131.0'
            elif original_annotation['description']=='132.0':
                original_annotation['description']='132.0'
            else:
                print('this function only detect VD, FA, OP states, please check original annotations')
    elif section =='2':
        if state == 'VD':
            if original_annotation['description']=='131.0':
                original_annotation['description']='211.0'
            elif original_annotation['description']=='132.0':
                original_annotation['description']='212.0'
            else:
                print('this function only detect safe and threat period, please check original annotations')
        elif state == 'FA':
            if original_annotation['description']=='131.0':
                original_annotation['description']='221.0'
            elif original_annotation['description']=='132.0':
                original_annotation['description']='222.0'
            else:
                print('this function only detect safe and threat period, please check original annotations')
        elif state == 'OP':
            if original_annotation['description']=='131.0':
                original_annotation['description']='231.0'
            elif original_annotation['description']=='132.0':
                original_annotation['description']='232.0'
            else:
                print('this function only detect VD, FA, OP states, please check original annotations')
    else:
        print('3rd state dected, please check annotations')
        


In [28]:
import pandas as pd
frames = []
for raw_cut_filt_key in conctn_dict.keys():
    frames.append(conctn_dict[raw_cut_filt_key])
full = pd.concat(frames)

In [31]:
raw_full = mne.io.RawArray(full.T.as_matrix(),info = raw_cut_filt.info)

  """Entry point for launching an IPython kernel.


In [41]:
raw_cut_filt.annotations

<Annotations  |  573 segments : 254.0 (1), 12.0 (1), 255.0 (1)..., orig_time : None>