# The Pupil Features Extracted in Batch Pipeline.
## Introuduction
The features to be extracted includes blinking rate and chunks of wavelet coefficients.

## Reference
1. The previous codes.https://github.com/BaiYunpeng1949/MobileEyeComputing/tree/master/ProcessDataNTestAlgorithm
2. TODO: add papers here.
3. My work: https://docs.google.com/document/d/1oLv3oJQLjst1_pYgd_UA3RRL1fRGSbZ6uvlMuxmZR2k/edit#heading=h.r01ccf7ox05g

## Implementation

In [1]:
import numpy as np
import pandas as pd
import streamlit as st
import plotly.express as px
from sklearn.metrics import r2_score
from scipy.spatial.distance import euclidean
from fastdtw import fastdtw
from os import walk
import pandas as pd
import datetime
import csv
import os

In [2]:
def plotter(ax, data1, data2, param_dict):
    out = ax.plot(data1, data2, **param_dict)
    return out

## [Suspended] File-wise/Task-wise Data Pre-processing
### Left and Right Eye Synchronization
Referenced from the previous work located as: https://github.com/BaiYunpeng1949/MobileEyeComputing/blob/master/ProcessDataNTestAlgorithm/LeftRightEyesSyncData.ipynb

In [3]:
def _pre_sync(left_eye_file_path, right_eye_file_path):   
    # Configure the parameters
    entity_dwt_align = 'Timestamp'
    entity_dwt_apply_dia = 'Diameter'
    entity_dwt_apply_conf = 'Confidence'
    left_eye = 'left'
    right_eye = 'right'
    
    # Tag labels
    SAMPLING_RATE_LEFT = int((left_eye_file_path.split('_')[-1]).split('Hz')[0])
    data_left = pd.read_csv(left_eye_file_path)

    SAMPLING_RATE_RIGHT = int((right_eye_file_path.split('_')[-1]).split('Hz')[0])
    data_right = pd.read_csv(right_eye_file_path)
    
    df_left = data_left[['Timestamp','Confidence','Diameter','Event']].copy()
    df_right = data_right[['Timestamp','Confidence','Diameter','Event']].copy()
    
    # Determine the left and right eye data's size first: which one is bigger?
    # Identify the number of elements in the left/right eyes data. If one applies the up-sampling method, the larger eye needs to be put in the first argument.
    len_left = len(df_left)
    len_right = len(df_right)
    if len_left >= len_right:
        df_reference = df_left.copy()  # df_reference: the one that being put in the first argument, as a reference.
        df_alignment = df_right.copy() # df_alignment: the one that being put in the second argument, to be aligned to the reference.
        df_origin = df_left.copy()
        SR_SYNC = SAMPLING_RATE_LEFT
    elif len_left < len_right:
        df_reference = df_right.copy()
        df_alignment = df_left.copy()
        df_origin = df_right.copy()
        SR_SYNC = SAMPLING_RATE_RIGHT
    
    # Calculate for warping.
    distance, path = fastdtw(df_reference[entity_dwt_align], df_alignment[entity_dwt_align], dist=euclidean)
    
    return path, df_reference, df_alignment, entity_dwt_apply_dia, entity_dwt_apply_conf, df_origin, SR_SYNC

In [4]:
# Define a function scnchronize and align/merge 2 eyes' numerical values, including diamter and confidence values.
# The method of getting the average/mean value of 2 eyes' data as computing target is referenced from the mention in LHIPA.
def _synchronize_merge_data(path_dwt, df_reference, df_alignment, entity_dwt_apply):
    # Synchronize
    data_sync = []
    for i in range(0, len(path_dwt)):
        data_sync.append([path_dwt[i][0],  # The index column is for dropping out duplicates.
                         df_reference[entity_dwt_apply].iloc[path_dwt[i][0]],
                         df_alignment[entity_dwt_apply].iloc[path_dwt[i][1]]])
    df_sync = pd.DataFrame(data=data_sync,
                           columns=['Index', 
                                    'Reference '+entity_dwt_apply, 
                                    'Alignment '+entity_dwt_apply]).dropna()
    df_sync = df_sync.drop_duplicates(subset=['Index']) # Drop the duplicates according to the index of the reference.
    df_sync = df_sync.reset_index(drop=True)
    # Merge/Align
    df_sync['Avg'+entity_dwt_apply] = df_sync.loc[:, ['Reference '+entity_dwt_apply, 'Alignment '+entity_dwt_apply]].mean(axis = 1)
    
    return df_sync

In [5]:
# Synchronize the given 2 eyes' data.
def sync(left_eye_file_path, right_eye_file_path):
    # Prepare for the sychronization.
    path, df_reference, df_alignment, entity_dwt_apply_dia, entity_dwt_apply_conf, df_origin, SR_SYNC = _pre_sync(left_eye_file_path = left_eye_file_path, 
                                                                                                                  right_eye_file_path = right_eye_file_path)

    
    # Synchronize, merge, and label data.
    df_sync_dia = _synchronize_merge_data(path_dwt=path,
                                          df_reference=df_reference,
                                          df_alignment=df_alignment,
                                          entity_dwt_apply=entity_dwt_apply_dia)
    df_sync_conf = _synchronize_merge_data(path_dwt=path,
                                           df_reference=df_reference,
                                           df_alignment=df_alignment,
                                           entity_dwt_apply=entity_dwt_apply_conf)
    
    # Integrate into one dataframe.
    df_sync = pd.DataFrame()
    df_sync['Timestamp'] = df_origin['Timestamp']
    df_sync['Confidence'] = df_sync_conf['AvgConfidence']
    df_sync['Diameter'] = df_sync_dia['AvgDiameter']
    df_sync['Event'] = df_origin['Event']
    
    # Output and save into a csv file.
    df_export = df_sync.copy()
    file_name = left_eye_file_path.split('/')[-2:]

    folder_path = '../Data/PreprocessedData/' + file_name[0] + '/'
    if os.path.exists(folder_path) is False:
        os.makedirs(folder_path)

    write_file_name = 'synchronized_' + str(SR_SYNC) + 'Hz.csv'
    write_file_path = folder_path + write_file_name
    df_export.to_csv(write_file_path)
    
    return df_sync

### Deblinks and Blinking Rate Extraction

### Smoothing

### Interpolation

### Artefact Rejection

## Wavelet Coefficient Extraction

## Run in Batch

In [6]:
# Argument configuration.
CONF_2DMODE = '2D'
CONF_LEFT = 'left'
CONF_RIGHT = 'right'
raw_data_path = '../Data/RawData4ML/VersionOctober/' # In this case, the dirpath is mypath, dirnames contains sub-folders's names I need, and no filenames since there is no files there.
dir_features = '../Data/Results/'

# Create a dataframe to store results details. TODO: to be modified to align to short time windows. Columns - features; Rows - short segmentations.
df_features = pd.DataFrame(columns=['Filename', 'Max level value', 
                                   'IPA 2', 'IPA 3', 'IPA 4', 'IPA 5', 'IPA 6', 'IPA 7', 'IPA 8', 'IPA 9',
                                   'LHIPA 2', 'LHIPA 3', 'LHIPA 4', 'LHIPA 5', 'LHIPA 6', 'LHIPA 7', 'LHIPA 8', 'LHIPA 9'])  # Needs to be changed according to requirements.

In [7]:
# List all directory names
dirs_list = []
for (dir_path, dir_names, file_names) in walk(raw_data_path):
    dirs_list.extend(dir_names)
    break

In [8]:
%%time
# Traverse all the file names in a given directory.
for dir_name in dirs_list:
    dir_path = raw_data_path + dir_name + '/'
    file_names_list = []
    for (_, _, file_names) in walk(dir_path):
        file_names_list.extend(file_names)
    
    # Find the targetted files.
    for file_name in file_names:
        if CONF_2DMODE in file_name:
            if CONF_LEFT in file_name:
                file_path_left = dir_path + file_name
            elif CONF_RIGHT in file_name:
                file_path_right = dir_path + file_name
      
    # We suspended the two eyes' synchronization for extracting more information because the difference of two pupil was also recognized as an indicator of cognitive workload.
    # TODO: reference is needed here.
    # Synchronize 2 eyes' data.
    df_sync = sync(left_eye_file_path=file_path_left, 
                   right_eye_file_path=file_path_right)
    
    break

2022-11-14 00:39:01.333 INFO    numexpr.utils: Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
2022-11-14 00:39:01.334 INFO    numexpr.utils: NumExpr defaulting to 8 threads.


Wall time: 1.2 s


In [9]:
df_sync

Unnamed: 0,Timestamp,Confidence,Diameter,Event
0,352723.490801,0.582239,42.578041,default
1,352723.498762,0.595818,42.676222,default
2,352723.509503,0.735812,44.720957,default
3,352723.517714,0.731661,44.501411,default
4,352723.529483,0.594411,44.662136,default
...,...,...,...,...
5323,352783.026782,0.568662,46.947025,sitting
5324,352783.036907,0.547047,47.382788,sitting
5325,352783.046935,0.657286,47.375761,sitting
5326,352783.060591,0.539107,46.407917,sitting
