# Precision Grip Data Visualization and Rejection

In [None]:
# set plot output style

%matplotlib qt 

## import modules

import mne
import numpy as np
import re
import os
import os.path
from pathlib import Path
from mne.report import Report
from matplotlib import pyplot as plt
from autoreject import get_rejection_threshold
from autoreject import AutoReject

## set user_path variable as directory to DataAnalysis folder in Dropbox

delim = os.path.sep

# Find the user's Home directory and set up paths to DataAnalysis folder in Dropbox
home_Path = str(Path.home())

if re.search('dylan', home_Path, re.IGNORECASE):
    user_path="/Users/dylandaniels/Dropbox (Brown)/99_shared/DataAnalysis"
elif re.search('tariq', home_Path, re.IGNORECASE):
    user_path="/Users/tariqcannonier/Dropbox/DataAnalysis"
elif re.search('simona', home_Path, re.IGNORECASE):
    user_path='C:\\Users\\Simona\\Dropbox (Brown)\\Dropbox_Work_VitalityProject\\DataAnalysis'

    
## option to test functions as you proceed through script cell by cell
year='2017'
session='pre'
test_fxn=False
test_vis=False
print_diagnostic=False
test_report=False
participant_ID='2002' # set participant to analyze 

In [None]:
def set_directories_vitality (DataAnalysis_path, year, session, print_diagnostic = False ):
    
    ## define subdirectories in relation to DataAnalysis using OS path delimited 'delim'
        
    data_path=delim+year+' Vitality EEG Analysis'+delim+'Precision Grip'+delim+session+delim+'EEG_EMG'+delim \
    + '2_Grip_MNE_processed' +delim
    
    output_path=delim+year+' Vitality EEG Analysis'+delim+'Precision Grip'+delim+session+delim+'EEG_EMG'+delim \
    + '3_Grip_MNE_visualized' +delim    
    
    ## define directories from subdirectories
    
    data_directory = DataAnalysis_path + data_path
    output_directory = DataAnalysis_path + output_path

    
    ## Create directories if they do not exist
    for path in [output_directory]:
        if os.path.isdir(path)==False:
            os.makedirs(path)
    
    ## get filenames
    
    data_filenames = [f for f in os.listdir(data_directory) \
                         if f.endswith('.set')] # list .set files in data directory
    
    ## optionally print directories
    
    if print_diagnostic == True:
        print('\n###\n### Printing data directory ... \n###\n\n', data_directory, "\n\n", \
              '\n###\n### Printing output directory ... \n###\n\n', output_directory, "\n\n", \
              '\n###\n### Printing data filenames ... \n###\n\n', data_filenames, "\n")
        
    return data_directory, output_directory, data_filenames;

if test_fxn==True: # test function
    data_directory, output_directory, \
    data_filenames = set_directories_vitality (user_path, year, session, print_diagnostic)

In [None]:
## define function to get participant_info dictionary from file_list

def get_data_info( file_list, # file_list is a list of .set files to analyze \
                  data_dir, # filepath to data directory \
                  output_directory, #filepath to output directory \
                  print_diagnostic = False ): # value of True prints participant_info
    
    participants=[]
    inpaths=[]
    outpaths=[]
    
    for e in file_list:
        
        ## get participant number from filename
        pnum = e.split("_")[0] # grabs contents of filename before first underscore
        participants+=[pnum] # saves string with participant number to list
        
        ## set input path 
        inpaths+=[data_dir+e] # set the input 
        
        ## create new output name
        outname = e.split("AllChannels.set")[0]
        outname+='mne_processed.set'
        
        ## set output path
        outpaths+=[output_directory+outname]

    ## create dictionary with participant info
    
    # participant_info = {'ID': (input_path, output_path), ...}
    participant_info={}
    index=0
    for i in range(0,len(participants)):
        participant_info[participants[i]] = inpaths[i],outpaths[i]
        
    ## optionally print dictionary with participant info
    if print_diagnostic==True: # print participant_info
        print('\n###\n### Printing \'participant_info\' dictionary ... \n###\n\n----------\n')
        for key, value in participant_info.items(): 
            print('Participant:',key,'\n\nInpath:',value[0],'\n\nOutpath:',value[1],'\n\n----------\n')

    return participant_info; # return dictionary with participant info

if test_fxn==True: # test function
    participant_info = get_data_info( data_filenames, data_directory, output_directory, print_diagnostic)

## Import Participant data

In [None]:
def visualize_participant(data_directory, subject_ID, physio, plot):
    import glob

    n_epochs = 3
    recording = {'eeg': [], 'emg': []}
    raw = {'eeg':[], 'emg': []}
    epoch = {'eeg': [], 'emg': []}
    scalings = {'eeg':1/25000, 'emg':1/5000}
    
    for phys in physio: 
        
        # Read and plot the recording session.
        recording[phys].append(mne.io.Raw(data_directory+phys+delim+subject_ID+'_filtered_session_%s_raw.fif' % phys, verbose=False))
        max_blocks = len(glob.glob(data_directory+phys+delim+subject_ID+delim+'*epo*'))

        # Plot continuous data
        if plot == True:
            recording[phys][-1].plot(title='%s %s Continuous Recording' % (subject_ID, phys),\
                                        block=True,scalings=scalings[phys]);
        
        # Read raw and epoch data and append to empty list
        for block in range(1,max_blocks+1):
            raw[phys].append(mne.io.Raw(data_directory+phys+delim+subject_ID+delim+'block_%d_%s_raw.fif' % (block, phys), verbose=False))
            epoch[phys].append(mne.read_epochs(data_directory+phys+delim+subject_ID+delim+'block_%d_%s_epo.fif' % (block, phys), verbose=False))
            
            # Plot raw and epoch data.  Can select on or the other depending on what we want
            if plot==True:
                events = mne.make_fixed_length_events(raw[phys][-1],id=1,duration=2)
                raw[phys][-1].plot(title='%s %s Block: %d [seconds]' % (subject_ID, phys.upper(), block),\
                                      block=True,scalings=scalings[phys], events=events, duration=40);
                epoch[phys][-1].plot(title='%s %s Block: %d [epochs]' % (subject_ID, phys.upper(), block),\
                                       n_epochs=n_epochs,block=True,scalings=scalings[phys]);
                print('finished with %s %s block %s' % (subject_ID, phys.upper(), block))
                
    return raw, epoch

if test_fxn==True:
    subject_ID = '2014'
    plot=True
    
#     if test_vis==True:
#         plot=True
        
    raw, epoch = visualize_participant(data_directory, subject_ID, plot)

In [None]:
def visualize(subject_list, physio, plot):
    
    # Get directories
    data_directory, output_directory, \
    data_filenames = set_directories_vitality (user_path, year, session, print_diagnostic)
    
    # Visualize for participant
    for participant in subject_list:
        raw, epoch = visualize_participant(data_directory, participant, physio, plot)

if test_fxn==True:
    import random

    ## Participants by year
    if year is '2016':
        subject_list=['01', '02', '03', '04', '05', '06', '08', '09', '10',\
                     '11', '12', '13', '14', '15', '16', '17', '18', '20',\
                     '21', '22', '23', '24', '25', '26', '27', '28', '29',\
                     '30', '31', '32', '33', '35', '36', '37', '38', '39',\
                     '40', '42', '43', '44', '45']
        exclude = ['30','36'] # Indexing issue with these participants

    elif year is '2017':
        subject_list=['2037', '2004', '2025', '2012', '2016', '2021', '2029', '2017',\
                      '2020', '2024', '2013', '2028', '2046', '2042', '2032', '2001',\
                      '2036', '2009', '2019', '2026', '2011', '2015', '2022', '2038',\
                      '2003', '2034', '2030', '2007', '2039', '2031', '2002', '2045',\
                      '2041', '2018', '2014', '2023', '2010']
        exclude = []
    
    rand_participants = sorted(random.sample(subject_list,k=3))
    plot=True
    print(rand_participants)
    visualize(rand_participants,plot)

In [None]:
year = '2017'
plot = True
subject_list = 'prepost'
physio = ['eeg']
# physio = ['eeg','emg']


## Participants by year
if year is '2016':
    if subject_list is 'all':
        subject_list=['01', '02', '03', '04', '05', '06', '08', '09', '10',\
                     '11', '12', '13', '14', '15', '16', '17', '18', '20',\
                     '21', '22', '23', '24', '25', '26', '27', '28', '29',\
                     '30', '31', '32', '33', '35', '36', '37', '38', '39',\
                     '40', '42', '43', '44', '45']
    elif subject_list is 'prepost':
        subject_list=['03','04','05','06','08','12','13','14','15','17','18',\
                     '20','21','23','25','28','29','32','35','39','40','43','44']
    exclude = ['30','36'] # Indexing issue with these participants
    
elif year is '2017':
    if subject_list is 'all':
        subject_list=['2037', '2004', '2025', '2012', '2016', '2021', '2029', '2017',\
                      '2020', '2024', '2013', '2028', '2046', '2042', '2032', '2001',\
                      '2036', '2009', '2019', '2026', '2011', '2015', '2022', '2038',\
                      '2003', '2034', '2030', '2007', '2039', '2031', '2002', '2045',\
                      '2041', '2018', '2014', '2023', '2010']
        
    elif subject_list is 'prepost':
        subject_list=['2037','2004','2025','2017','2020','2024','2013','2028',\
                     '2046','2032','2036','2009','2019','2011','2022','2038','2003',\
                     '2034','2030','2039','2002','2045','2041','2018','2023','2010']
    
#     subject_list=['2037, 2017']
    exclude = []

## Exclude problematic participants
for item in exclude:
    subject_list.remove(item)
print(subject_list)
subject_list = sorted(subject_list)

## run workflow on subject_list
visualize(subject_list, physio, plot)

In [None]:
def rejection(epoch, block, physio):
    
    # Estimate and interpolate bads using Autoreject and ICA
    reject_global = []
    reject_log = []
    epochs_ica = []
    epochs_clean = []
    evoked_clean = []
    random_state = 42
    n_components = 8
    block = 1
    working_epoch = epoch[physio][block].copy()
    
    # Select channels of by EEG/EMG
    if physio is 'eeg':
        pick_eeg = ['Fp1', 'Fp2', 'F3', 'Fz', 'F4', 'C3', 'Cz', 'C4', 'P3', 'Pz', 'P4', 'Oz']
    elif physio is 'emg':
        pick_emg = ['PO7', 'P7', 'PO8', 'P8']

    # Plot epoch with selected channels before rejection
    working_epoch.pick_channels(pick_eeg)
    working_epoch.plot();

    n_interpolates = np.array([0]) # number of channels to interpolate.  Can make a vector of # number of channels to try so that autoreject can optimize and result
    consensus = np.linspace(0.5, 1.0, 6) # Percentage of channels that can be bad in an epoch before autoreject determines the epoch is bad.  Can do as a vector to try.

    # Setting up ICA and AR
    ica = mne.preprocessing.ICA(n_components=n_components,\
                                method='fastica',random_state=random_state, max_iter=400)
    ar = AutoReject(n_interpolates, consensus, thresh_method='random_search',\
                    random_state=random_state)

    # Determine a global rejection threshold using autoreject
    rejection_threshold = get_rejection_threshold(working_epoch)

    # Plot and select ICA components to correct on
    ica.fit(working_epoch,reject=rejection_threshold)
    ica.plot_components();
    ica.plot_sources(working_epoch, block=True);
    # ica.plot_overlay(working_epoch.average());

    # Apply ICA and plot results
    ica.apply(working_epoch)
    # working_epoch.plot();

    # Apply autoreject and view predicted rejections
    ar.fit(working_epoch) # Get error for C3-Cz and C4-Cz.  
    reject_log = ar.get_reject_log(working_epoch)
    reject_log.plot_epochs(working_epoch);

