Here is the outline of the full preprocess on the data to get it ready for the model:
1. standartize the data from (processing.py)  
2. remove nan heavy trials (nanProcessing.py)
3. interpolate the missing values (nanProcessing.py)

In [1]:
import pandas as pd
import numpy as np

In [2]:
from processing.processData import process
from nanHandling.nanProcessing import nanPercentages
from nanHandling.nanProcessing import nan_to_interp

In [3]:
# !pip3 install alive-progress
from alive_progress import alive_bar

In [4]:
trainingData = pd.read_pickle('../../fulldata/processed/trainingData.pkl')
trainingSubjectInfo = pd.read_pickle('../../fulldata/processed/trainingSubjectInfo.pkl')

In [5]:
testingData = pd.read_pickle('../../fulldata/processed/testingData.pkl')
testingSubjectInfo = pd.read_pickle('../../fulldata/processed/testingSubjectInfo.pkl')

In [6]:
def deepProcess(dataframe, subjectframe, nancut = .4):
    print('Copying dataframes...')
    dataframe = dataframe.copy()
    subjectframe = subjectframe.copy()
    # 1. Process data
    print('Processing data...')
    dataframe = process(dataframe)
    # 2. Remove NaN-heavy trials
    print('Removing NaN-heavy trials')
    percentnan = nanPercentages(dataframe)
    subjectframe['percentNanWhole'] = percentnan['whole']
    subjectframe['percentNanMax'] = percentnan['max']
    nancutSubjectInfo = subjectframe.query('percentNanWhole < .4 and percentNanMax < .6')
    nancutData = dataframe.loc[nancutSubjectInfo.index]
    nancutData.sort_values(by=['time'], inplace=True)
    # 3. Interpolate NaNs
    print('Interpolating NaNs...')
    sts = nancutData.index.unique().to_list()
    with alive_bar(len(sts), force_tty=True) as bar:
        for st in sts:
            nancutData.loc[[st], ['right_pupil', 'left_pupil', 'right_gaze_x', 'right_gaze_y', 'left_gaze_x', 'left_gaze_y']] = nancutData.loc[[st], ['right_pupil', 'left_pupil', 'right_gaze_x', 'right_gaze_y', 'left_gaze_x', 'left_gaze_y']].apply(nan_to_interp)
            bar()
    return nancutData

In [7]:
X_y_train = deepProcess(trainingData, trainingSubjectInfo)
X_y_train.to_pickle('../../fulldata/processed/X_y_train.pkl')

Copying dataframes...
Processing data...
Removing NaN-heavy trials
Interpolating NaNs...
|▋⚠︎                                      | (!) 61/3843 [2%] in 1:18.7 (0.77/s)  


KeyboardInterrupt: 

In [None]:
X_y_test = deepProcess(trainingData, trainingSubjectInfo)
X_y_test.to_pickle('../../fulldata/processed/X_y_test.pkl')