# The Pupil Features Extracted in Batch Pipeline.
## Introuduction
The features to be extracted includes blinking rate and chunks of wavelet coefficients.

## Reference
1. The previous codes.https://github.com/BaiYunpeng1949/MobileEyeComputing/tree/master/ProcessDataNTestAlgorithm
2. TODO: add papers here.
3. My work: https://docs.google.com/document/d/1oLv3oJQLjst1_pYgd_UA3RRL1fRGSbZ6uvlMuxmZR2k/edit#heading=h.r01ccf7ox05g

## Implementation

In [1]:
import numpy as np
import pandas as pd
import streamlit as st
import plotly.express as px
from sklearn.metrics import r2_score
from scipy.spatial.distance import euclidean
from fastdtw import fastdtw
from os import walk
import pandas as pd
import datetime
import csv
import os

In [2]:
def plotter(ax, data1, data2, param_dict):
    out = ax.plot(data1, data2, **param_dict)
    return out

## File-wise/Task-wise Data Pre-processing
### [Suspended] Left and Right Eye Synchronization and Calculate the Difference
Since I now implement ML/AI models to do a more inclusive analysis where more features could be analyzed, I will use the feature of the difference between left and right eye data.


1. Referenced from the previous work located as: https://github.com/BaiYunpeng1949/MobileEyeComputing/blob/master/ProcessDataNTestAlgorithm/LeftRightEyesSyncData.ipynb

2. The difference between two eyes were found correlated with cognitive workload estimation as well, see the related paper: Optimizing the usage of pupillary based indicators for cognitive workload. Reading link: https://docs.google.com/document/d/1jBezc9kqaziGlWk6sSgCvyjHTlpD5G6OoNZyh7K2HIo/edit#heading=h.qqfv1ot6zjd8

Besides, there is a considerable difference in pupil size variation for right and left eyes of the participants. See the paper: Exploring pupil size variation as a cognitive load indicator in visualization studies, link: https://drive.google.com/file/d/1z8O1NGNYA87La-CVMSr7cQkJ-VVOOUSe/view

In [3]:
def _pre_dtw_sync(left_eye_file_path, right_eye_file_path):   
    # Configure the parameters
    entity_dwt_align = 'Timestamp'
    entity_dwt_apply_dia = 'Diameter'
    entity_dwt_apply_conf = 'Confidence'
    left_eye = 'left'
    right_eye = 'right'
    
    # Tag labels
    SAMPLING_RATE_LEFT = int((left_eye_file_path.split('_')[-1]).split('Hz')[0])
    data_left = pd.read_csv(left_eye_file_path)

    SAMPLING_RATE_RIGHT = int((right_eye_file_path.split('_')[-1]).split('Hz')[0])
    data_right = pd.read_csv(right_eye_file_path)
    
    df_left = data_left[['Timestamp','Confidence','Diameter','Event']].copy()
    df_right = data_right[['Timestamp','Confidence','Diameter','Event']].copy()
    
    # Determine the left and right eye data's size first: which one is bigger?
    # Identify the number of elements in the left/right eyes data. If one applies the up-sampling method, the larger eye needs to be put in the first argument.
    len_left = len(df_left)
    len_right = len(df_right)
    if len_left >= len_right:
        df_reference = df_left.copy()  # df_reference: the one that being put in the first argument, as a reference.
        df_alignment = df_right.copy() # df_alignment: the one that being put in the second argument, to be aligned to the reference.
        df_origin = df_left.copy()
        SR_SYNC = SAMPLING_RATE_LEFT
    elif len_left < len_right:
        df_reference = df_right.copy()
        df_alignment = df_left.copy()
        df_origin = df_right.copy()
        SR_SYNC = SAMPLING_RATE_RIGHT
    
    # Calculate for warping.
    distance, path = fastdtw(df_reference[entity_dwt_align], df_alignment[entity_dwt_align], dist=euclidean)
    
    return path, df_reference, df_alignment, entity_dwt_apply_dia, entity_dwt_apply_conf, df_origin, SR_SYNC

In [4]:
# Define a function scnchronize and align/merge 2 eyes' numerical values, including diamter and confidence values.
# The method of getting the average/mean value of 2 eyes' data as computing target is referenced from the mention in LHIPA.
def _dtw_synchronize_merge_data(path_dwt, df_reference, df_alignment, entity_dwt_apply):
    # Synchronize
    data_sync = []
    for i in range(0, len(path_dwt)):
        data_sync.append([path_dwt[i][0],  # The index column is for dropping out duplicates.
                         df_reference[entity_dwt_apply].iloc[path_dwt[i][0]],
                         df_alignment[entity_dwt_apply].iloc[path_dwt[i][1]]])
    df_sync = pd.DataFrame(data=data_sync,
                           columns=['Index', 
                                    'Reference '+entity_dwt_apply, 
                                    'Alignment '+entity_dwt_apply]).dropna()
    df_sync = df_sync.drop_duplicates(subset=['Index']) # Drop the duplicates according to the index of the reference.
    df_sync = df_sync.reset_index(drop=True)
    # Merge/Align
    df_sync['Avg'+entity_dwt_apply] = df_sync.loc[:, ['Reference '+entity_dwt_apply, 'Alignment '+entity_dwt_apply]].mean(axis = 1)
    # Calculate the difference
    df_sync['Diff'+entity_dwt_apply] = df_sync['Reference '+entity_dwt_apply] - df_sync['Alignment '+entity_dwt_apply]
    
    return df_sync

In [5]:
# Synchronize the given 2 eyes' data.
def dtw_sync(left_eye_file_path, right_eye_file_path):
    # Prepare for the sychronization.
    path, df_reference, df_alignment, entity_dwt_apply_dia, entity_dwt_apply_conf, df_origin, SR_SYNC = _pre_dtw_sync(left_eye_file_path = left_eye_file_path, 
                                                                                                                  right_eye_file_path = right_eye_file_path)

    
    # Synchronize, merge, and label data.
    df_sync_dia = _dtw_synchronize_merge_data(path_dwt=path,
                                              df_reference=df_reference,
                                              df_alignment=df_alignment,
                                              entity_dwt_apply=entity_dwt_apply_dia)
    df_sync_conf = _dtw_synchronize_merge_data(path_dwt=path,
                                               df_reference=df_reference,
                                               df_alignment=df_alignment,
                                               entity_dwt_apply=entity_dwt_apply_conf)
    
    # Integrate into one dataframe.
    df_sync = pd.DataFrame()
    df_sync['Timestamp'] = df_origin['Timestamp']
    df_sync['Confidence'] = df_sync_conf['AvgConfidence']
    df_sync['Diameter'] = df_sync_dia['AvgDiameter']
    df_sync['Event'] = df_origin['Event']
    df_sync['DiffDiameter'] = df_sync_dia['DiffDiameter']
    
    # Output and save into a csv file.
    df_export = df_sync.copy()
    file_name = left_eye_file_path.split('/')[-2:]

    folder_path = '../Data/PreprocessedData/' + file_name[0] + '/'
    if os.path.exists(folder_path) is False:
        os.makedirs(folder_path)

    write_file_name = 'synchronized_' + str(SR_SYNC) + 'Hz.csv'
    write_file_path = folder_path + write_file_name
    df_export.to_csv(write_file_path)
    
    return df_sync

### Merge Two Eyes' Data using Timestamps, Interpolate, and Calculate the Difference
Reference: a blog introducing 2 sensors' data fusion - https://stackoverflow.com/questions/14079766/synchronize-dataset-multiple-users-multiple-timestamps

#### Configuration Part 

In [77]:
TS = 'Timestamp'
CF = 'Confidence'
DM = 'Diameter'
LDM = 'Left Diameter'
RDM = 'Right Diameter'
LCF = 'Left Confidence'
RCF = 'Right Confidence'
AVEDM = 'Average Diameter'
AVECF = 'Average Confidence'
DIFFDM = 'Difference Diameter'  # By my default: Left - Right - unit in pixels.

#### Implementation

In [78]:
def upsample_timestamps_sync(left_eye_file_path, right_eye_file_path):
    # Read data from csv into dataframes.
    df_left = pd.read_csv(left_eye_file_path)
    df_right = pd.read_csv(right_eye_file_path)
    df_left = df_left[[TS,CF,DM]].copy()
    df_right = df_right[[TS,CF,DM]].copy()
    
    # Get the diameter data indexed by timestamps.
    left_diameters = df_left[DM].to_numpy() 
    series_left = pd.Series(left_diameters, index=df_left[TS])
    right_diameters = df_right[DM].to_numpy() 
    series_right = pd.Series(right_diameters, index=df_right[TS])
    
    # Synchronize 2 eyes' data by listing all timestamps, this process is actually a up-sampling.
    df_sync = pd.DataFrame([series_left, series_right]).T.sort_index()
    df_sync = df_sync.rename(columns={0: LDM, 1: RDM})

    # Interpolate all the NAN values using 'Spline 3' method with a bi-directional strategy to fill both the first and the last NAN values.
    df_sync = df_sync.interpolate(method='spline', order=3, limit_direction='both', axis=0)
    
    # Align the confidence values according to the timestamps. 
    # Reference: Adding values in new column based on indexes with pandas in python. https://stackoverflow.com/questions/45636105/adding-values-in-new-column-based-on-indexes-with-pandas-in-python
    df_sync[LCF] = df_sync.index.to_series().map(df_left.set_index(TS)[CF])
    df_sync[RCF] = df_sync.index.to_series().map(df_right.set_index(TS)[CF])
    
    # Interpolate NAN values using the normal linear method.
    df_sync[LCF] = df_sync[LCF].interpolate(method='linear', limit_direction='both', axis=0)
    df_sync[RCF] = df_sync[RCF].interpolate(method='linear', limit_direction='both', axis=0)
    
    # Get the difference and average of two eyes' diameter data and confidence values.
    df_sync[AVEDM] = (df_sync[LDM] + df_sync[RDM]) / 2
    df_sync[DIFFDM] = df_sync[LDM] - df_sync[RDM]
    df_sync[AVECF] = (df_sync[LCF] + df_sync[RCF]) / 2
    
    return df_sync

### Deblinks and Blinking Rate Extraction

### Smoothing

### Interpolation

### Artefact Rejection

## Wavelet Coefficient Extraction

## Run in Batch

In [80]:
# Argument configuration.
CONF_2DMODE = '2D'
CONF_LEFT = 'left'
CONF_RIGHT = 'right'
raw_data_path = '../Data/RawData4ML/VersionOctober/' # In this case, the dirpath is mypath, dirnames contains sub-folders's names I need, and no filenames since there is no files there.
dir_features = '../Data/Results/'

# Create a dataframe to store results details. TODO: to be modified to align to short time windows. Columns - features; Rows - short segmentations.
df_features = pd.DataFrame(columns=['Filename', 'Max level value', 
                                   'IPA 2', 'IPA 3', 'IPA 4', 'IPA 5', 'IPA 6', 'IPA 7', 'IPA 8', 'IPA 9',
                                   'LHIPA 2', 'LHIPA 3', 'LHIPA 4', 'LHIPA 5', 'LHIPA 6', 'LHIPA 7', 'LHIPA 8', 'LHIPA 9'])  # Needs to be changed according to requirements.

In [81]:
# List all directory names
dirs_list = []
for (dir_path, dir_names, file_names) in walk(raw_data_path):
    dirs_list.extend(dir_names)
    break

In [82]:
%%time
# Traverse all the file names in a given directory.
for dir_name in dirs_list:
    dir_path = raw_data_path + dir_name + '/'
    file_names_list = []
    for (_, _, file_names) in walk(dir_path):
        file_names_list.extend(file_names)
    
    # Find the targetted files.
    for file_name in file_names:
        if CONF_2DMODE in file_name:
            if CONF_LEFT in file_name:
                file_path_left = dir_path + file_name
            elif CONF_RIGHT in file_name:
                file_path_right = dir_path + file_name
      
    # We suspended the two eyes' synchronization for extracting more information because the difference of two pupil was also recognized as an indicator of cognitive workload.
    # TODO: reference is needed here.
    # Synchronize 2 eyes' data.
    df_sync = upsample_timestamps_sync(left_eye_file_path=file_path_left, right_eye_file_path=file_path_right)
    
    break

CPU times: total: 78.1 ms
Wall time: 78.6 ms


In [83]:
df_sync

Unnamed: 0_level_0,Left Diameter,Right Diameter,Left Confidence,Right Confidence,Average Diameter,Difference Diameter,Average Confidence
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
352723.490801,43.808819,45.411758,0.562328,0.602150,44.610288,-1.602939,0.582239
352723.492894,43.873134,41.347263,0.575907,0.602150,42.610198,2.525870,0.589028
352723.498762,44.005180,45.536383,0.589486,0.708128,44.770782,-1.531203,0.648807
352723.509503,43.845901,45.697473,0.551540,0.814106,44.771687,-1.851571,0.682823
352723.517701,44.138456,45.596012,0.547389,0.920084,44.867234,-1.457556,0.733737
...,...,...,...,...,...,...,...
352783.046935,44.030834,50.373090,0.671217,0.618062,47.201962,-6.342255,0.644639
352783.049092,44.351574,50.720688,0.592153,0.643356,47.536131,-6.369114,0.617755
352783.060591,43.223335,50.387328,0.513089,0.604240,46.805332,-7.163993,0.558665
352783.064396,44.410248,49.592499,0.467182,0.565125,47.001373,-5.182251,0.516153
