<img src="imgs/GeorgiaTech_RGB.png" alt="GeorgiaTech_RGB" width="200"/>

# <span style='color:#B3A369'> <b>Combine Participant Data</b> </span>

> <b> Notebook Author:</b> Brian Keith (bkeith9@gatech.edu) <br>
> <b> Purpose:</b> Read in and construct the data that will be used for analysis from the source data. <br>
> 
> **Data Citation:**
> - Andreas Miltiadous and Katerina D. Tzimourta and Theodora Afrantou and Panagiotis Ioannidis and Nikolaos Grigoriadis and Dimitrios G. Tsalikakis and Pantelis Angelidis and Markos G. Tsipouras and Evripidis Glavas and Nikolaos Giannakeas and Alexandros T. Tzallas (2023). A dataset of EEG recordings from: Alzheimer's disease, Frontotemporal dementia and Healthy subjects. OpenNeuro. [Dataset] doi: doi:10.18112/openneuro.ds004504.v1.0.6
>
> **Data Source:**
> - Dataset download as a zip file from: https://nemar.org/dataexplorer/detail?dataset_id=ds004504



# <span style='color:#B3A369'> Initial Tasks </span>

## <span style='color:#003057'> Load Necessary Libraries </span>

In [1]:
import pathlib
import pandas as pd
import numpy as np
import scipy.io as sio
import pickle
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', 200)

from IPython.display import display, Markdown
def printmd(string):
    header_map = {1:'#B3A369',2:'#003057',3:'#54585A'}
    if string.startswith('#'):
        nh = string.count('#')
        string = string.replace('#','')
        display(Markdown('#'*nh + f' <span style="color:{header_map[nh]}">{string}</span>'))
    else:
        display(Markdown(string))

def find_file_types(path:str, extension:str, recursive:bool=True, excl_str = None) -> list:
    import os, glob
    '''Find files with a certain extension in a directory and its subdirectories.
    
    Args:
        path (str, required): Path to the directory to search.
        extension (str, required): Extension of the files to search for.
        recursive (bool, optional): Whether to search subdirectories as well. Defaults to True.
    
    Returns:
        list: List of file paths matching the extension.
    '''
    extension = extension.replace('.', '')
    if excl_str:
        return [i.replace('\\','/') for i in glob.glob(os.path.join(path, f'**/*.{extension}'), recursive=recursive) if excl_str not in i]
    
    return [i.replace('\\','/') for i in glob.glob(os.path.join(path, f'**/*.{extension}'), recursive=recursive)]


## <span style='color:#003057'> Find Files and Read Participant Information </span>

In [2]:
DATA_DIR = 'data/ds004504'
DERIVATIVES_DIR = DATA_DIR + '/derivatives'

participant_file = DATA_DIR + '/participants.tsv'
participants = pd.read_csv(participant_file, sep='\t')
printmd('## Participant Information')
display(participants)

data_files = find_file_types(DATA_DIR, 'set', excl_str='derivatives')
printmd(f'## Data Files')
print(f'Data files found: {len(data_files):,}')
display(pd.DataFrame(data_files, columns=['File Path']))

data_files_derivatives = find_file_types(DERIVATIVES_DIR, 'set')
printmd(f'## Data Files (Derivatives)')
print(f'Data files found: {len(data_files_derivatives):,}')
display(pd.DataFrame(data_files_derivatives, columns=['File Path']))

## <span style="color:#003057"> Participant Information</span>

Unnamed: 0,participant_id,Gender,Age,Group,MMSE
0,sub-001,F,57,A,16
1,sub-002,F,78,A,22
2,sub-003,M,70,A,14
3,sub-004,F,67,A,20
4,sub-005,M,70,A,22
5,sub-006,F,61,A,14
6,sub-007,F,79,A,20
7,sub-008,M,62,A,16
8,sub-009,F,77,A,23
9,sub-010,M,69,A,20


## <span style="color:#003057"> Data Files</span>

Data files found: 88


Unnamed: 0,File Path
0,data/ds004504/sub-001/eeg/sub-001_task-eyesclosed_eeg.set
1,data/ds004504/sub-002/eeg/sub-002_task-eyesclosed_eeg.set
2,data/ds004504/sub-003/eeg/sub-003_task-eyesclosed_eeg.set
3,data/ds004504/sub-004/eeg/sub-004_task-eyesclosed_eeg.set
4,data/ds004504/sub-005/eeg/sub-005_task-eyesclosed_eeg.set
5,data/ds004504/sub-006/eeg/sub-006_task-eyesclosed_eeg.set
6,data/ds004504/sub-007/eeg/sub-007_task-eyesclosed_eeg.set
7,data/ds004504/sub-008/eeg/sub-008_task-eyesclosed_eeg.set
8,data/ds004504/sub-009/eeg/sub-009_task-eyesclosed_eeg.set
9,data/ds004504/sub-010/eeg/sub-010_task-eyesclosed_eeg.set


## <span style="color:#003057"> Data Files (Derivatives)</span>

Data files found: 88


Unnamed: 0,File Path
0,data/ds004504/derivatives/sub-001/eeg/sub-001_task-eyesclosed_eeg.set
1,data/ds004504/derivatives/sub-002/eeg/sub-002_task-eyesclosed_eeg.set
2,data/ds004504/derivatives/sub-003/eeg/sub-003_task-eyesclosed_eeg.set
3,data/ds004504/derivatives/sub-004/eeg/sub-004_task-eyesclosed_eeg.set
4,data/ds004504/derivatives/sub-005/eeg/sub-005_task-eyesclosed_eeg.set
5,data/ds004504/derivatives/sub-006/eeg/sub-006_task-eyesclosed_eeg.set
6,data/ds004504/derivatives/sub-007/eeg/sub-007_task-eyesclosed_eeg.set
7,data/ds004504/derivatives/sub-008/eeg/sub-008_task-eyesclosed_eeg.set
8,data/ds004504/derivatives/sub-009/eeg/sub-009_task-eyesclosed_eeg.set
9,data/ds004504/derivatives/sub-010/eeg/sub-010_task-eyesclosed_eeg.set


# <span style='color:#B3A369'> Data Consolidation </span>

## <span style='color:#003057'> Combine Participant Data and Label Information </span>

In [3]:

def explode_labels(df: pd.DataFrame, verbose = False) -> pd.DataFrame:
    '''Recursively explode columns with arrays in them into separate rows.

    Args:
        df (pd.DataFrame): Dataframe to explode
    
    Returns:
        pd.DataFrame: Exploded dataframe
    '''
    if verbose:
        display(df)
    
    for col in df.columns:
        if df[col].apply(type).eq(np.ndarray).any():
            df = df.explode(col)
    
    # check if there are still arrays in the dataframe
    if df.map(type).eq(np.ndarray).any().any():
        df = explode_labels(df)
    
    return df

def get_data_and_labels(files_df: pd.DataFrame,  return_labels = True, verbose = False,) -> tuple:
    '''Load files from a dataframe containing file paths and return a tuple 
    of data and labels dataframes. This function is specific to the structure
    of the dataset for this analysis.
    
    Note that the additional processing time for the labels is minimal so the 
    change for return_labels is only applied to the return statement even 
    though the labels are loaded regardless.
    
    Args:
        files_df (pd.DataFrame): Dataframe containing file paths
        return_labels (bool, optional): Whether to return the labels dataframe. Defaults to True. 
        verbose (bool, optional): Whether to display an example of the processing. Defaults to False.
    
    Returns:
        tuple: Tuple containing data and labels dataframes
    '''
    data_dfs = []
    label_dfs = []
    for c, fn in enumerate(files_df):
        
        tmp = sio.loadmat(fn)

        #! Get participant ID from filename
        participant_id = pathlib.Path(fn).stem.split('_')[0]
        
        #! Load Channel Labels and explode into separate rows
        #note that the channel labels are stored in a nested 1d arrays so the
        #explode function needs to be called recursively
        tmp_labels = pd.DataFrame(tmp['chanlocs'][0])
        tmp_labels = explode_labels(tmp_labels)
        tmp_labels.insert(0, 'participant_id', participant_id)
        label_dfs.append(tmp_labels)
        
        #! Load Dataset and append time and dataset ID 
        tmp_data = pd.DataFrame(tmp['data']).T
        tmp_data.columns = tmp_labels['labels'].values
        #time based on 500hz sample rate and is indicated every 2, so time is in ms 
        tmp_data.insert(0, 'time_ms', tmp['times'][0])
        tmp_data.insert(0, 'time_s', tmp['times'][0]/1000)
        
        tmp_data.insert(0, 'participant_id', participant_id)
        data_dfs.append(tmp_data)
        
        if c == 0 and verbose:
            printmd(f'## Data Example: {participant_id}')
            printmd('### Channel Label Example')
            display(tmp_labels)
            
            printmd('### Data Example')
            display(tmp_data)
            display(Markdown('---'))

    concat_data = pd.concat(data_dfs, ignore_index=True)
    concat_data.time_ms = concat_data.time_ms.astype(np.int32)
    concat_data.time_s = concat_data.time_s.astype(np.float64)
    concat_data.participant_id = concat_data.participant_id.astype('category')
    concat_labels = pd.concat(label_dfs, ignore_index=True)
    
    if return_labels:
        #? NOTE: It was noted that the only difference between the label information
        #? in the raw and preprocessed data is that the raw data has values in the
        #? 'ref' column and the preprocessed data is only NaNs so we will only
        #? return the labels data once for the raw data. This check was validated
        #? by dropping the ref column and comparing the two dataframes with isequal.
        return (concat_data, concat_labels)

    return concat_data

printmd('# Processing Raw Version of Data from Set Files')
data_raw, labels = get_data_and_labels(
    data_files, 
    return_labels = True, 
    verbose=True
    )

printmd('## Combined Dataset and Channel Labels (Raw)')
printmd('### Data')
display(data_raw)
data_raw.info()
printmd('### Channel Labels')
display(labels)

printmd('# Processing Preprocessed Version of Data from Set Files')
data_preprocessed = get_data_and_labels(
    data_files_derivatives, 
    return_labels = False,
    verbose=True
)

printmd('## Combined Dataset and Channel Labels (Preprocessed)')
printmd('### Data')
display(data_preprocessed)
data_preprocessed.info()

# <span style="color:#B3A369"> Processing Raw Version of Data from Set Files</span>

## <span style="color:#003057"> Data Example: sub-001</span>

### <span style="color:#54585A"> Channel Label Example</span>

Unnamed: 0,participant_id,labels,type,theta,radius,X,Y,Z,sph_theta,sph_phi,sph_radius,urchan,ref
0,sub-001,Fp1,EEG,-19.330008,0.524968,83.9171,29.4367,-6.99,19.330008,-4.494258,89.204591,1,A1 A2
1,sub-001,Fp2,EEG,19.385429,0.524989,84.8959,-29.8723,-7.08,-19.385429,-4.498096,90.276213,2,A1 A2
2,sub-001,F3,EEG,-43.410838,0.333395,53.1112,50.2438,42.192,43.410838,29.988986,84.412107,3,A1 A2
3,sub-001,F4,EEG,43.66767,0.341495,54.3048,-51.8362,40.814,-43.66767,28.530945,85.450486,4,A1 A2
4,sub-001,C3,EEG,-100.091205,0.254935,-11.6317,65.3581,64.358,100.091205,44.111744,92.460423,5,A1 A2
5,sub-001,C4,EEG,99.224598,0.260682,-10.9003,-67.1179,63.58,-99.224598,43.077206,93.091597,6,A1 A2
6,sub-001,P3,EEG,-146.067901,0.330544,-78.7878,53.0073,55.94,146.067901,30.502073,110.211501,7,A1 A2
7,sub-001,P4,EEG,144.679127,0.330934,-78.5602,-55.6667,56.561,-144.679127,30.431813,111.667512,8,A1 A2
8,sub-001,O1,EEG,-165.341503,0.47584,-112.449,29.4134,8.839,165.341503,4.348748,116.567807,9,A1 A2
9,sub-001,O2,EEG,165.099907,0.475911,-112.156,-29.8426,8.8,-165.099907,4.336093,116.391534,10,A1 A2


### <span style="color:#54585A"> Data Example</span>

Unnamed: 0,participant_id,time_s,time_ms,Fp1,Fp2,F3,F4,C3,C4,P3,P4,O1,O2,F7,F8,T3,T4,T5,T6,Fz,Cz,Pz
0,sub-001,0.000,0,-189.892563,-141.845688,-107.373039,28.466799,-108.447258,-36.474609,-73.583984,173.486328,149.462891,156.201172,-19.873045,-112.158195,-59.130859,-12.158201,-78.271477,-131.396469,-103.271477,-18.212889,-126.708977
1,sub-001,0.002,2,-180.419907,-137.353500,-100.048820,32.275391,-103.369133,-33.642578,-69.384766,177.099609,150.146484,161.279297,-4.345701,-108.544914,-54.736328,-9.423826,-75.537102,-128.173813,-98.681633,-14.404295,-122.900383
2,sub-001,0.004,4,-166.992172,-135.058578,-105.761711,35.351562,-101.953117,-29.882812,-67.968742,180.078125,144.238281,160.546875,4.199220,-103.710930,-53.320312,-5.273437,-85.546867,-124.609367,-98.535149,-11.523437,-120.410149
3,sub-001,0.006,6,-160.205063,-132.958969,-105.322258,38.037109,-99.755852,-27.099607,-65.869133,183.349609,144.482422,161.865234,5.908205,-99.365227,-50.927734,-2.392576,-84.912102,-121.044914,-95.556633,-9.326170,-118.212883
4,sub-001,0.008,8,-159.326157,-124.462883,-104.150383,41.162109,-98.486320,-24.462891,-64.794914,184.619141,146.044922,163.427734,5.419923,-96.337883,-48.095699,-2.587890,-82.470695,-120.068352,-91.748039,-7.666015,-116.357414
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
299895,sub-001,599.790,599790,114.746086,66.894531,92.773430,157.617188,58.984375,69.824219,114.746086,89.062492,140.722656,40.527344,-0.683592,79.785156,91.406242,178.417969,113.183586,113.671867,67.578125,190.917969,52.050781
299896,sub-001,599.792,599792,112.841789,70.751953,91.455070,157.470703,60.498047,69.384766,115.673820,89.599602,138.037109,43.505859,-3.759764,78.466797,93.701164,176.611328,111.279289,113.818352,67.138672,190.771484,52.685547
299897,sub-001,599.794,599794,113.134758,76.806641,93.798820,153.369141,62.158203,71.533203,120.947258,94.482414,144.384766,50.048828,1.513674,77.880859,92.626945,177.880859,116.650383,120.068352,68.212891,193.408203,58.154297
299898,sub-001,599.796,599796,110.107414,68.115234,91.943352,154.345703,59.814453,68.212891,120.068352,90.380852,142.626953,44.970703,-4.443357,67.529297,88.623039,173.095703,115.673820,115.771477,63.330078,191.357422,55.224609


---

## <span style="color:#003057"> Combined Dataset and Channel Labels (Raw)</span>

### <span style="color:#54585A"> Data</span>

Unnamed: 0,participant_id,time_s,time_ms,Fp1,Fp2,F3,F4,C3,C4,P3,P4,O1,O2,F7,F8,T3,T4,T5,T6,Fz,Cz,Pz
0,sub-001,0.000,0,-189.892563,-141.845688,-107.373039,28.466799,-108.447258,-36.474609,-73.583984,173.486328,149.462891,156.201172,-19.873045,-112.158195,-59.130859,-12.158201,-78.271477,-131.396469,-103.271477,-18.212889,-126.708977
1,sub-001,0.002,2,-180.419907,-137.353500,-100.048820,32.275391,-103.369133,-33.642578,-69.384766,177.099609,150.146484,161.279297,-4.345701,-108.544914,-54.736328,-9.423826,-75.537102,-128.173813,-98.681633,-14.404295,-122.900383
2,sub-001,0.004,4,-166.992172,-135.058578,-105.761711,35.351562,-101.953117,-29.882812,-67.968742,180.078125,144.238281,160.546875,4.199220,-103.710930,-53.320312,-5.273437,-85.546867,-124.609367,-98.535149,-11.523437,-120.410149
3,sub-001,0.006,6,-160.205063,-132.958969,-105.322258,38.037109,-99.755852,-27.099607,-65.869133,183.349609,144.482422,161.865234,5.908205,-99.365227,-50.927734,-2.392576,-84.912102,-121.044914,-95.556633,-9.326170,-118.212883
4,sub-001,0.008,8,-159.326157,-124.462883,-104.150383,41.162109,-98.486320,-24.462891,-64.794914,184.619141,146.044922,163.427734,5.419923,-96.337883,-48.095699,-2.587890,-82.470695,-120.068352,-91.748039,-7.666015,-116.357414
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35295145,sub-088,794.090,794090,-234.960922,-185.839828,-96.582031,-72.167969,-54.687496,-90.332031,-44.531250,-40.039062,-54.199215,-36.132812,-31.738281,-19.726562,9.472656,-84.179688,-88.183594,-47.558594,-102.148438,-123.828110,-89.355469
35295146,sub-088,794.092,794092,-238.623032,-206.884750,-103.076172,-82.470703,-59.423828,-99.658203,-48.388672,-47.314453,-59.228516,-41.650391,-34.033203,-34.130859,-8.251953,-89.501953,-89.892578,-52.880859,-113.037109,-131.298813,-94.970703
35295147,sub-088,794.094,794094,-238.183578,-223.730453,-106.542969,-91.699219,-59.570312,-106.347656,-49.609375,-54.687500,-63.574219,-48.730469,-34.082031,-48.828125,-44.335938,-105.664062,-93.945312,-62.109375,-119.824203,-136.914047,-99.414062
35295148,sub-088,794.096,794096,-236.621078,-236.230453,-102.343750,-93.847656,-55.859375,-106.542969,-47.070312,-56.738281,-65.820312,-52.148438,-33.496094,-53.417969,-34.375000,-112.792969,-94.628906,-68.945312,-120.800766,-136.718735,-98.730469


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 35295150 entries, 0 to 35295149
Data columns (total 22 columns):
 #   Column          Dtype   
---  ------          -----   
 0   participant_id  category
 1   time_s          float64 
 2   time_ms         int32   
 3   Fp1             float32 
 4   Fp2             float32 
 5   F3              float32 
 6   F4              float32 
 7   C3              float32 
 8   C4              float32 
 9   P3              float32 
 10  P4              float32 
 11  O1              float32 
 12  O2              float32 
 13  F7              float32 
 14  F8              float32 
 15  T3              float32 
 16  T4              float32 
 17  T5              float32 
 18  T6              float32 
 19  Fz              float32 
 20  Cz              float32 
 21  Pz              float32 
dtypes: category(1), float32(19), float64(1), int32(1)
memory usage: 2.9 GB


### <span style="color:#54585A"> Channel Labels</span>

Unnamed: 0,participant_id,labels,type,theta,radius,X,Y,Z,sph_theta,sph_phi,sph_radius,urchan,ref
0,sub-001,Fp1,EEG,-19.330008,0.524968,83.9171,29.4367,-6.99,19.330008,-4.494258,89.204591,1,A1 A2
1,sub-001,Fp2,EEG,19.385429,0.524989,84.8959,-29.8723,-7.08,-19.385429,-4.498096,90.276213,2,A1 A2
2,sub-001,F3,EEG,-43.410838,0.333395,53.1112,50.2438,42.192,43.410838,29.988986,84.412107,3,A1 A2
3,sub-001,F4,EEG,43.66767,0.341495,54.3048,-51.8362,40.814,-43.66767,28.530945,85.450486,4,A1 A2
4,sub-001,C3,EEG,-100.091205,0.254935,-11.6317,65.3581,64.358,100.091205,44.111744,92.460423,5,A1 A2
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1667,sub-088,T5,EEG,-135.399961,0.507672,-73.4527,72.4343,-2.487,135.399961,-1.381027,103.190175,15,A1 A2
1668,sub-088,T6,EEG,135.004941,0.507823,-73.0683,-73.0557,-2.54,-135.004941,-1.408193,103.356487,16,A1 A2
1669,sub-088,Fz,EEG,0.305708,0.229781,58.512,-0.3122,66.462,-0.305708,48.639461,88.549134,17,A1 A2
1670,sub-088,Cz,EEG,177.495882,0.029055,-9.167,-0.4009,100.244,-177.495882,84.770046,100.663072,18,A1 A2


# <span style="color:#B3A369"> Processing Preprocessed Version of Data from Set Files</span>

## <span style="color:#003057"> Data Example: sub-001</span>

### <span style="color:#54585A"> Channel Label Example</span>

Unnamed: 0,participant_id,labels,type,theta,radius,X,Y,Z,sph_theta,sph_phi,sph_radius,urchan,ref
0,sub-001,Fp1,EEG,-19.330008,0.524968,83.9171,29.4367,-6.99,19.330008,-4.494258,89.204591,1,
1,sub-001,Fp2,EEG,19.385429,0.524989,84.8959,-29.8723,-7.08,-19.385429,-4.498096,90.276213,2,
2,sub-001,F3,EEG,-43.410838,0.333395,53.1112,50.2438,42.192,43.410838,29.988986,84.412107,3,
3,sub-001,F4,EEG,43.66767,0.341495,54.3048,-51.8362,40.814,-43.66767,28.530945,85.450486,4,
4,sub-001,C3,EEG,-100.091205,0.254935,-11.6317,65.3581,64.358,100.091205,44.111744,92.460423,5,
5,sub-001,C4,EEG,99.224598,0.260682,-10.9003,-67.1179,63.58,-99.224598,43.077206,93.091597,6,
6,sub-001,P3,EEG,-146.067901,0.330544,-78.7878,53.0073,55.94,146.067901,30.502073,110.211501,7,
7,sub-001,P4,EEG,144.679127,0.330934,-78.5602,-55.6667,56.561,-144.679127,30.431813,111.667512,8,
8,sub-001,O1,EEG,-165.341503,0.47584,-112.449,29.4134,8.839,165.341503,4.348748,116.567807,9,
9,sub-001,O2,EEG,165.099907,0.475911,-112.156,-29.8426,8.8,-165.099907,4.336093,116.391534,10,


### <span style="color:#54585A"> Data Example</span>

Unnamed: 0,participant_id,time_s,time_ms,Fp1,Fp2,F3,F4,C3,C4,P3,P4,O1,O2,F7,F8,T3,T4,T5,T6,Fz,Cz,Pz
0,sub-001,0.000,0,-33.716686,-25.142326,-10.501626,-15.470965,-14.639780,-18.083931,-12.670144,-15.455706,-14.112548,-14.500907,-21.664469,-17.408562,-15.743222,-15.870240,-12.270000,-18.645056,-16.640219,-15.989630,-15.898245
1,sub-001,0.002,2,-28.189100,-20.730621,-10.805439,-13.357321,-13.841861,-15.904325,-11.895370,-13.762712,-14.834889,-13.384614,-18.855465,-14.015569,-15.190090,-14.149742,-13.692374,-16.790701,-14.803726,-14.482277,-14.147045
2,sub-001,0.004,4,-22.796139,-16.693195,-11.653964,-11.569100,-13.329861,-13.841828,-11.468915,-12.353460,-15.685031,-12.559989,-16.338762,-11.195667,-14.826118,-13.128342,-15.350288,-15.361226,-13.299286,-13.112459,-12.649577
3,sub-001,0.006,6,-18.145367,-13.420150,-12.855981,-10.171793,-13.044628,-12.095045,-11.322964,-11.337563,-16.428089,-12.052039,-14.305224,-9.178638,-14.566225,-12.879128,-16.850992,-14.529299,-12.216455,-11.933325,-11.532134
4,sub-001,0.008,8,-14.718303,-11.144460,-14.124736,-9.136185,-12.878322,-10.791430,-11.326547,-10.731927,-16.821066,-11.807705,-12.845049,-8.007304,-14.309454,-13.280541,-17.802584,-14.327282,-11.534676,-10.953032,-10.835276
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
299895,sub-001,599.790,599790,12.347760,16.889381,14.600148,15.726487,8.090471,12.126588,7.098454,10.281487,9.279147,9.496131,16.724451,23.856003,12.883119,17.124908,10.150877,13.269211,13.643437,7.915736,6.077712
299896,sub-001,599.792,599792,10.741529,14.647578,13.454813,13.139357,8.248627,11.563673,7.710814,10.497419,9.358212,10.134442,14.581519,21.509949,11.650985,16.230841,10.247348,13.998811,12.443630,8.004057,6.416523
299897,sub-001,599.794,599794,7.807443,11.263392,11.253476,9.817191,7.324058,9.839645,7.271523,9.549775,8.229297,9.727098,11.118062,17.517929,9.129510,14.342964,9.576555,13.486362,10.192595,7.039798,5.746032
299898,sub-001,599.796,599796,3.916294,7.115328,8.160104,6.081618,5.349512,7.077271,5.776646,7.451472,5.902449,8.201610,6.701581,12.293653,5.606554,11.544965,8.047968,11.716477,7.101904,5.041760,4.065930


---

## <span style="color:#003057"> Combined Dataset and Channel Labels (Preprocessed)</span>

### <span style="color:#54585A"> Data</span>

Unnamed: 0,participant_id,time_s,time_ms,Fp1,Fp2,F3,F4,C3,C4,P3,P4,O1,O2,F7,F8,T3,T4,T5,T6,Fz,Cz,Pz
0,sub-001,0.000000,0,-33.716686,-25.142326,-10.501626,-15.470965,-14.639780,-18.083931,-12.670144,-15.455706,-14.112548,-14.500907,-21.664469,-17.408562,-15.743222,-15.870240,-12.270000,-18.645056,-16.640219,-15.989630,-15.898245
1,sub-001,0.002000,2,-28.189100,-20.730621,-10.805439,-13.357321,-13.841861,-15.904325,-11.895370,-13.762712,-14.834889,-13.384614,-18.855465,-14.015569,-15.190090,-14.149742,-13.692374,-16.790701,-14.803726,-14.482277,-14.147045
2,sub-001,0.004000,4,-22.796139,-16.693195,-11.653964,-11.569100,-13.329861,-13.841828,-11.468915,-12.353460,-15.685031,-12.559989,-16.338762,-11.195667,-14.826118,-13.128342,-15.350288,-15.361226,-13.299286,-13.112459,-12.649577
3,sub-001,0.006000,6,-18.145367,-13.420150,-12.855981,-10.171793,-13.044628,-12.095045,-11.322964,-11.337563,-16.428089,-12.052039,-14.305224,-9.178638,-14.566225,-12.879128,-16.850992,-14.529299,-12.216455,-11.933325,-11.532134
4,sub-001,0.008000,8,-14.718303,-11.144460,-14.124736,-9.136185,-12.878322,-10.791430,-11.326547,-10.731927,-16.821066,-11.807705,-12.845049,-8.007304,-14.309454,-13.280541,-17.802584,-14.327282,-11.534676,-10.953032,-10.835276
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34915555,sub-088,784.690024,784690,-4.135646,-0.074674,-15.040091,-7.601286,-17.016638,-14.248407,-14.030102,-8.703975,-11.357444,-10.035007,-1.590909,0.402359,-17.356049,-7.901550,-14.289450,-2.597813,-12.404937,-19.662407,-14.343632
34915556,sub-088,784.692024,784692,-7.215837,-3.562078,-17.623934,-10.803946,-18.230413,-18.139311,-14.779044,-12.332369,-13.380805,-13.356746,-4.750472,-3.408606,-16.651541,-9.504310,-15.314070,-6.040915,-15.770036,-22.450966,-16.778763
34915557,sub-088,784.694024,784694,-10.053381,-7.011106,-19.690132,-13.762733,-18.910858,-21.165850,-14.957758,-15.149434,-14.852368,-15.954807,-7.644172,-7.084083,-16.226851,-11.081904,-15.606584,-8.441842,-18.919390,-24.639957,-18.478544
34915558,sub-088,784.696024,784696,-12.064882,-9.775346,-20.759008,-15.952149,-18.799904,-22.923990,-14.444428,-16.820267,-15.604250,-17.562740,-9.676021,-9.987882,-15.555845,-12.325222,-15.101740,-9.697705,-21.295584,-25.882179,-19.193089


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 34915560 entries, 0 to 34915559
Data columns (total 22 columns):
 #   Column          Dtype   
---  ------          -----   
 0   participant_id  category
 1   time_s          float64 
 2   time_ms         int32   
 3   Fp1             float32 
 4   Fp2             float32 
 5   F3              float32 
 6   F4              float32 
 7   C3              float32 
 8   C4              float32 
 9   P3              float32 
 10  P4              float32 
 11  O1              float32 
 12  O2              float32 
 13  F7              float32 
 14  F8              float32 
 15  T3              float32 
 16  T4              float32 
 17  T5              float32 
 18  T6              float32 
 19  Fz              float32 
 20  Cz              float32 
 21  Pz              float32 
dtypes: category(1), float32(19), float64(1), int32(1)
memory usage: 2.9 GB


# <span style='color:#B3A369'> Export Data to Pickle </span>

In [4]:
data_dict = {
    'participants': participants,
    'data_raw': data_raw,
    'data_pre': data_preprocessed,
    'labels': labels,
    }

export_name = 'data/ConsolidatedParticipantData.pkl'
with open(export_name, 'wb') as f:
    pickle.dump(data_dict, f, pickle.HIGHEST_PROTOCOL)

print(f'Data saved to: {export_name}')

Data saved to: data/ConsolidatedParticipantData.pkl


# <span style='color:#B3A369'> Export Source Code </span>

In [5]:
user = str(pathlib.Path.home()).split('\\')[-1]
export_flag = False
if user != 'bkeith' and user != 'Brian':
    raise Exception('User running code is not the student. No need to run below.')
else:
    export_flag = True
    print('User running code is the student. Continue to file Export.')

def export_code(cur_file: str, output_dir: str = '', output_name: str = '', cell_tags_exist: bool = False, template:str = 'lab'):
    """Export Jupyter Notebook as HTML file

    Args:
        cur_file (str, required): Name of the file function is being used in FULL PATH of the file. Defaults to the name of the ipynb file.
        output_dir (str, optional): Directory to output the file to. Defaults to local directory of Jupyter Notebook.
        output_name (str, optional): Name of the file that will be exported. Defaults to the name of the ipynb file.
        cell_tags_exist (bool, optional): Flag for if cell tags exist . Defaults to False.
        template (str, optional): Template to use for export. Defaults to 'lab'. Options are 'lab' or 'classic'. 'classic' should be used if you're planning to convert the HTML to PDF. 'lab' is better for viewing in browser.
    """
    
    from subprocess import run
    from os import getcwd
    
    if output_dir == '':
        output_dir = getcwd().replace('\\','/')

    if output_name == '':
        cur_file = cur_file.replace('\\', '/')
        output_name = cur_file.split('/')[-1].split('.')[0] + '.html'

    if cell_tags_exist == False:
        process = run([
            'jupyter', 
            'nbconvert',
            "--output-dir={}".format(output_dir),     
            '--to','html',  
            cur_file,
            '--template',f'{template}',
            '--output', f'{output_name}'], 
            shell=True,
            capture_output=True)
    else:
        process = run([
            'jupyter', 
            'nbconvert',
            "--output-dir={}".format(output_dir),     
            '--to','html',
            '--template',f'{template}',
            '--TagRemovePreprocessor.enabled=True',
            '--TagRemovePreprocessor.remove_cell_tags={\"remove_cell\"}',
            '--TagRemovePreprocessor.remove_input_tags={\"remove_input\"}',
            '--no-prompt',
            cur_file,
            '--output', f'{output_name}'], 
            shell=True,
            capture_output=True)
        
    if process.returncode == 0:
        display(Markdown(f'<h3>Code saved to {output_name}</h3>'))
    else:
        display(Markdown('<h1> REPORT ERROR:'))
        import re
        print(re.sub(r'\\.',lambda x:{'\\n':'\n','\\t':'\t', '\\r': '\r',"\\'":"'", '\\\\': '\\'}.get(x[0],x[0]),str(process.stderr)))

cur_file = __vsc_ipynb_file__
output_dir = 'zlogs'
output_name = cur_file.split('\\')[-1].split('.')[0] + f'.html'
cell_tags_exist = False
template = 'classic'

if export_flag == True:
    export_code(cur_file,output_dir, output_name, cell_tags_exist, template=template)

if export_flag == True:
    import os
    from bs4 import BeautifulSoup as soup
    import base64

    soup_html = soup(open(os.path.join(output_dir, output_name)).read())
    img_tags = soup_html.findAll('img')
    img_path = os.path.join(os.path.dirname(output_dir), 'imgs')

    for tag in img_tags:
        #skip any images that already have base64
        if 'base64' in tag['src']:
            continue
        
        img_src = tag['src'].split('/')[-1]
        print(f'Replacing {img_src}')
        tag['src'] = os.path.join(img_path, img_src)
        
        base64_str = base64.b64encode(open(os.path.join(img_path, img_src), 'rb').read()).decode('utf-8')
        new_src = 'data:image/png;base64,' + base64_str
        
        tag['src'] = new_src

    with open(os.path.join(output_dir, output_name), 'w') as f:
        f.write(str(soup_html))

User running code is the student. Continue to file Export.


<h3>Code saved to 01 Consolidate Participant Data.html</h3>

Replacing GeorgiaTech_RGB.png
