# Precision Grip (Baseline) Data Processing Outline

# Import modules and set user paths

In [None]:
## set plot output style

%matplotlib qt 

## import modules

import mne
import numpy as np
import re
import os
import os.path
import autoreject
from pathlib import Path
from mne.report import Report
from matplotlib import pyplot as plt
from autoreject import get_rejection_threshold 
from autoreject import AutoReject
from mne.preprocessing import compute_proj_ecg, compute_proj_eog, create_eog_epochs, create_ecg_epochs

## set user_path variable as directory to DataAnalysis folder in Dropbox

delim = os.path.sep

# Find the user's Home directory and set up paths to DataAnalysis folder in Dropbox
home_Path = str(Path.home())

if re.search('dylan', home_Path, re.IGNORECASE):
    user_path="/Users/dylandaniels/Dropbox (Brown)/99_shared/DataAnalysis"
elif re.search('tariq', home_Path, re.IGNORECASE):
    user_path="/Users/tariqcannonier/Dropbox/DataAnalysis"
elif re.search('simona', home_Path, re.IGNORECASE):
    user_path='C:\\Users\\Simona\\Dropbox (Brown)\\Dropbox_Work_VitalityProject\\DataAnalysis'

    
## option to test functions as you proceed through script cell by cell
year='2017'
session='pre'
test_fxn=True
print_diagnostic=False
test_report=False

In [None]:
def set_directories_vitality (DataAnalysis_path, year, session, print_diagnostic = False ):
    
    ## define subdirectories in relation to DataAnalysis using OS path delimited 'delim'
    
    data_path=delim+year+' Vitality EEG Analysis'+delim+'Precision Grip'+delim+session+delim+'EEG_EMG'+delim \
    +'1_Grip_'+session+'_raw_set'+delim+'Grip_'+session+'_All'+delim
    
    output_path=delim+year+' Vitality EEG Analysis'+delim+'Precision Grip'+delim+session+delim+'EEG_EMG'+delim \
    + '2_Grip_PRE_MNE_processed' +delim
    
    report_path=delim+year+' Vitality EEG Analysis'+delim+'reports'+delim+session+delim
    
    montage_path=delim+year+' Vitality EEG Analysis'+delim+'MATLAB script'+delim+session+delim
    
    ## define directories from subdirectories
    
    data_directory = DataAnalysis_path + data_path
    output_directory = DataAnalysis_path + output_path
    montage_directory = DataAnalysis_path + montage_path
    report_directory = DataAnalysis_path + report_path
    
    ## get filenames
    
    data_filenames = [f for f in os.listdir(data_directory) \
                         if f.endswith('.set')] # list .set files in data directory
    
    ## optionally print directories
    
    if print_diagnostic == True:
        print('\n###\n### Printing data directory ... \n###\n\n', data_directory, "\n\n", \
              '\n###\n### Printing output directory ... \n###\n\n', output_directory, "\n\n", \
              '\n###\n### Printing report directory ... \n###\n\n', report_directory, "\n\n", \
              '\n###\n### Printing data filenames ... \n###\n\n', data_filenames, "\n")
        
    return data_directory, output_directory, report_directory, data_filenames;

#test_fxn=True

if test_fxn==True: # test function
    data_directory, output_directory, report_directory, \
    data_filenames = set_directories_vitality (user_path, year, session, print_diagnostic)

In [None]:
## define function to get participant_info dictionary from file_list

def get_data_info( file_list, # file_list is a list of .set files to analyze \
                  data_dir, # filepath to data directory \
                  output_directory, #filepath to output directory \
                  print_diagnostic = False ): # value of True prints participant_info
    
    participants=[]
    inpaths=[]
    outpaths=[]
    
    for e in file_list:
        
        ## get participant number from filename
        pnum = e.split("_")[0] # grabs contents of filename before first underscore
        participants+=[pnum] # saves string with participant number to list
        
        ## set input path 
        inpaths+=[data_dir+e] # set the input 
        
        ## create new output name
        outname = e.split("AllChannels.set")[0]
        outname+='mne_processed.set'
        
        ## set output path
        outpaths+=[output_directory+outname]

    ## create dictionary with participant info
    
    # participant_info = {'ID': (input_path, output_path), ...}
    participant_info={}
    index=0
    for i in range(0,len(participants)):
        participant_info[participants[i]] = inpaths[i],outpaths[i]
        
    ## optionally print dictionary with participant info
    if print_diagnostic==True: # print participant_info
        print('\n###\n### Printing \'participant_info\' dictionary ... \n###\n\n----------\n')
        for key, value in participant_info.items(): 
            print('Participant:',key,'\n\nInpath:',value[0],'\n\nOutpath:',value[1],'\n\n----------\n')

    return participant_info; # return dictionary with participant info


#test_fxn=True

if test_fxn==True: # test function
    participant_info = get_data_info( data_filenames, data_directory, output_directory, print_diagnostic)

# Import and filter data; view data properties

In [None]:
##### define function as preprocess_mydata
### also separate out emg channels

## Define function to save and filter EEG channels

def filter_mydata( input_path , highpass, lowpass):
    
    ## import raw data; preload into memory
    raw_data = mne.io.read_raw_eeglab(input_path, preload=True)
    
    ## copy raw data
    working_data = raw_data.copy() 
    
    ## rename E
    working_data.rename_channels({'E':'STI 014'}) 
    
    ## set emg and stim chs
    working_data.set_channel_types({'T7':'emg','T8':'emg',\
                                    'PO7':'emg','PO8':'emg','STI 014':'stim'}) 

    ## separate out eeg channels
    eeg_only = working_data.copy().pick_types(eeg=True,emg=False)
    
    ## filter EEG channels
    eeg_only.filter(highpass,lowpass,fir_design='firwin',verbose=False)
    
    ## separate out emg channels
    # emg_only = working_data.copy().pick_types(eeg=False,emg=True)
    
    ## filter EMG channels
    # emg_only.filter( 0.01 , 200. ,fir_design='firwin',verbose=False)
    
    return raw_data, working_data, eeg_only; # emg_only # return filtered data

if test_fxn==True: # test function

    high_pass = 0.01 # set high-pass filter
    low_pass = 50. # set low-pass filter
    participant_ID='2010' # set participant to analyze 

    raw_data, working_data,\
    eeg_filtered = filter_mydata( participant_info[participant_ID][0] , high_pass, low_pass )


# Testing EMG

In [None]:
#emg_only = working_data.copy().pick_types(emg=True)
#view_data_properties( [], emg_only)

In [None]:
#type(working_data)

In [None]:
#eeg_only = working_data.copy().pick_types(eeg=True,emg=False)

In [None]:
#emg_only= working_data.copy().pick_types(eeg=False,emg=True)

In [None]:
#eeg_only.plot()
#emg_only.plot()

In [None]:
#eeg_only.filter(0.1,50.,fir_design='firwin',verbose=False)

In [None]:
#emg_only.filter(0.1,50.,fir_design='firwin',verbose=False)

# View data properties, plot channels

# Epoching

**Our data comes from EEGLAB and so we will need to use [events_from_annotations()](https://www.nmr.mgh.harvard.edu/mne/stable/generated/mne.events_from_annotations.html) command to get events from the data format EEGLAB exports**

In [None]:
## epoch data by block timestamps

def epoch_data ( data_file, print_diagnostic = False ): #define function
    
    ## identify events
    events, event_id = mne.events_from_annotations(data_file) # get events from data in EEGLAB format
    for key in event_id.keys(): # iterate through event_id keys to provide meaningful annotations
        if key == '100.0':
            event_id['StartBlock'] = event_id.pop('100.0') # annotate 100 as startblock
        if key == '200.0':
            event_id['EndBlock'] = event_id.pop('200.0') # annotate 200 as endblock
        
    ## generate array of timestamps
    timestamps=[] # create list to hold startblock and endblock times

    for event_val in range(0,len(events)):
        if events[event_val][2] == 1 and events[event_val+1][2] == 2:
            block_timestamp = [events[event_val][0],events[event_val+1][0]]
            timestamps += [block_timestamp]

    timestamps=np.asarray(timestamps) # convert timestamps list into array
    
    ## optionally print and plot events
    if print_diagnostic == True:
        print('\n###\n### Printing event IDs ... \n###\n\n',event_id,'\n\n')
        print('\n###\n### Printing events ... \n###\n\n',events,'\n\n')
        print('\n###\n### Printing timestamps ... \n###\n\n',timestamps,'\n')
        mne.viz.plot_events(events, sfreq=data_file.info['sfreq']);
    
    return timestamps, events;

if test_fxn==True: # test function
    epochs, events = epoch_data( eeg_filtered, print_diagnostic) # run epoching function
    

# Crop data; process events and epochs

In [None]:
def crop_data (data_file, timestamps, print_diagnostic = False ):
    blocks_data=[]
    blocks_events=[]
    blocks_epochs=[]
    id_label=1
    event_duration=2
    for i in range(0,len(timestamps)):
        tmin = timestamps[i][0]/data_file.info['sfreq']
        tmax = timestamps[i][1]/data_file.info['sfreq']
        #print("---\n",tmin,"\n\n",tmax,"\n")
        
        
        blocks_data.append(data_file.copy().crop \
                           (tmin=tmin,tmax=tmax)) # return a list of eeg lab arrays split into blocks by timestamps
        
        blocks_events.append(mne.make_fixed_length_events \
                             (blocks_data[i],id=id_label,duration=event_duration)) # for each block, return a list of arrays with event markers 0,1 every 2s

        # need to rename this to be event marked_block or marked_data; blocks_epochs is not accurate
        blocks_epochs.append(mne.Epochs \
                             (blocks_data[i],blocks_events[i], event_id=id_label, \
                              tmin=0,tmax=2, baseline=None, \
                              preload=True,verbose=False)) # add 2s [0,1] event markers to each block in array
        
    if print_diagnostic==True:
        print('\n###\n### Printing all epochs for each block ... \n###\n\n',blocks_events,"\n\n-----\n") 
        
    return blocks_data, blocks_events, blocks_epochs;

if test_fxn==True: # test function
    blocks_data, blocks_events, blocks_epochs = crop_data(eeg_filtered, epochs, print_diagnostic)


In [None]:
def generate_block_figs(blocks_data, blocks_events, blocks_epochs, n_epochs, duration, scalings):

    plt.ioff() # turns off plots
    
    # define figures for report
    plot_blocks = blocks_data.plot(events=blocks_events,duration=duration,show=False,scalings=scalings);
    butterfly = blocks_epochs.average().plot(show=False,scalings=scalings);
    topomap = blocks_epochs.average().plot_topomap(show=False,scalings=scalings);
    topojoint = blocks_epochs.average().plot_joint(show=False);
    
    # save report figures to list
    block_figs = [plot_blocks, butterfly, topomap, topojoint]
    
    # interactive epochs figures
    # fig_epochs = blocks_epochs.plot(show=False,scalings=scalings,n_epochs=n_epochs);
        
    return block_figs;

#test_fxn=True

if test_fxn==True: # test function
    n_epochs = 3 # Use for viewing subset of epochs.
    duration = n_epochs*2 # Use for viewing subset of epochs. Otherwise set to 40
    scalings = 1/25000 # Setting it to a constant to compare artifact in epoching
    block_figs = generate_block_figs(blocks_data[0], blocks_events[0],\
                                           blocks_epochs[0], n_epochs, duration, scalings);


# Report

In [None]:
## define report

def reporting(epochs, subject_ID, blocks_data, blocks_events, blocks_epochs,\
              working_data, eeg_filtered, events, report_directory):
    
    plt.ioff() # turns off plots
            
    rep = Report() # call Report object
        
    eeg_chs_raw = working_data.copy().pick_types(eeg=True,emg=False).plot_psd(average=False,xscale='linear');
    eeg_chs_filtered = eeg_filtered.plot_psd(average=False,xscale='linear');
    show_events = mne.viz.plot_events(events, sfreq=eeg_filtered.info['sfreq']);
    
    partic_figs=[eeg_chs_raw, eeg_chs_filtered, show_events]
    
    rep.add_figs_to_section(partic_figs, captions=["raw psd","filtered psd","events"],\
                            section="Subject "+subject_ID)
    
    for i in range(0,len(epochs)): # loop through blocks
        
        # make figures
        block_figs = generate_block_figs(blocks_data[i], blocks_events[i],\
                                         blocks_epochs[i], n_epochs, duration, scalings);
        
        # define figure captions
        captions = ['Block %d Data' % (i+1), \
                    'Block %d Butterfly' % (i+1), 'Block %d Topomap' % (i+1),'Block %d TopoJoint' % (i+1)]

        # add list of figures to report
        rep.add_figs_to_section(block_figs, captions=captions, section='Subject '+subject_ID+' Block %d' % (i+1))

    # set report filename
    filename=report_directory+subject_ID+'_'+session+'_report.html'
    
    # save report
    rep.save(filename, overwrite=True, open_browser=False)
    
    return;

#test_report=True
    
if test_report==True: # test function
    participant_ID='2010'
    reporting(epochs, participant_ID, blocks_data, blocks_events, blocks_epochs, working_data,\
              eeg_filtered, events, report_directory)


# Loop through multiple subjects

In [None]:
## code to loop through subjects and generate reports

def run_reports( subject_list, high_pass, low_pass, n_epochs, duration, scalings ):
    
    plt.ioff() # turns off plots

    data_directory, output_directory, report_directory, data_filenames = \
    set_directories_vitality (user_path, year, session, False)
    
    participant_info = get_data_info( data_filenames, data_directory, output_directory, False )
    
    for e in subject_list: # loop through subjects, set input path
        
        subject_ID=e
        
        if e in participant_info.keys():

            raw_data, working_data, eeg_filtered = filter_mydata( participant_info[e][0] , high_pass, low_pass )

            epochs, events = epoch_data( eeg_filtered, False)

            blocks_data, blocks_events, blocks_epochs = crop_data(eeg_filtered, epochs, False)

            reporting(epochs, subject_ID, blocks_data, blocks_events, blocks_epochs,\
                     working_data, eeg_filtered, events, report_directory);

# Workflow for looping through subjects

In [None]:
## define list of subject(s) to analyze
#subject_list=['2004']
subject_list=['2037', '2004', '2025', '2012', '2016', '2021', '2029', '2017',\
              '2020', '2024', '2013', '2028', '2046', '2042', '2032', '2001',\
              '2036', '2009', '2019', '2026', '2011', '2015', '2022', '2038',\
              '2003', '2034', '2030', '2007', '2039', '2031', '2002', '2045',\
              '2041', '2018', '2014', '2023', '2010']

## set analysis properties
high_pass = 0.01 
low_pass = 50.

## set report properties
n_epochs = 20
duration = n_epochs*2
scalings = 1/25000

## set group
year='2017'
session='pre'

## run workflow on subject_list
#run_reports( subject_list, high_pass, low_pass, n_epochs, duration, scalings );
run_reports( subject_list, high_pass, low_pass, n_epochs, duration, scalings );

