In [31]:
import pandas as pd
import numpy as np
from sklearn.ensemble import ExtraTreesRegressor
import warnings
warnings.filterwarnings('ignore')
from sklearn import preprocessing
from math import ceil, floor

In [32]:
def slide_window(A, win_size, stride, padding = None):
    '''Collects windows that slides over a one-dimensional array.

    If padding is None, the last (rightmost) window is dropped if it
    is incomplete, otherwise it is padded with the padding value.
    '''
    if win_size <= 0:
        raise ValueError('Window size must be positive.')
    if not (0 < stride <= win_size):
        raise ValueError(f'Stride must satisfy 0 < stride <= {win_size}.')
    if not A.base is None:
        raise ValueError('Views cannot be slided over!')

    n_elems = len(A)
    if padding is not None:
        n_windows = ceil(n_elems / stride)
        A = np.pad(A, (0, n_windows * win_size - n_elems),
                   constant_values = padding)
    else:
        n_windows = floor(n_elems / stride)
    shape = n_windows, win_size

    elem_size = A.strides[-1]
    return np.lib.stride_tricks.as_strided(
        A, shape = shape,
        strides = (elem_size * stride, elem_size),
        writeable = False)

def window_np_array(X,Y,win_size,win_stride,padding=0):
    X_out = []
    Y_out = []
    for i in range(len(X)):
        out_vec = slide_window(np.array(X[i]),win_size,win_stride)
        for j in range(len(out_vec)):
            X_out.append(out_vec[j])
            Y_out.append(Y[i])
    return np.array(X_out), np.array(Y_out)

In [33]:
df = pd.read_pickle("./gazeldata.pkl")

In [34]:
feat = "Embeddings Hist"
control = "Subject"

In [35]:
df_group = df.groupby([control])["Gaze Target XY",feat]

In [36]:
g_id = list(df_group.groups.keys())

In [None]:
feature_len= 26
n_windows = 5
window_size = n_windows * feature_len
stride_size = window_size

Y_OUT, Y_GT = None, None

for i in range(len(g_id)):
    
    print("Participant",g_id[i])
    
    X_train, Y_train, X_test, Y_test = None, None, None, None
    
    for j in range(len(g_id)):
        df_extract = df_group.get_group(g_id[j])
        x = np.array(df_extract[feat].tolist())
        y = np.array(df_extract["Gaze Target XY"].tolist())
        if g_id[i] == g_id[j]:
            X_test = x
            Y_test = y
        else:
            if X_train is None:
                X_train = x
                Y_train = y
            else:
                X_train = np.vstack((X_train,x))
                Y_train = np.vstack((Y_train,y))
                
    X_train = np.nan_to_num(X_train)
    X_test = np.nan_to_num(X_test)
    
    X_train = X_train.reshape(X_train.shape[0],X_train.shape[1]*X_train.shape[2])
    X_test = X_test.reshape(X_test.shape[0],X_test.shape[1]*X_test.shape[2])
    
    X_train, Y_train = window_np_array(X_train,Y_train,win_size=window_size,win_stride=stride_size)
    X_test, Y_test = window_np_array(X_test,Y_test,win_size=window_size,win_stride=stride_size)
    
    print("\t Train shape:",X_train.shape,Y_train.shape)
    print("\t Test shape:",X_test.shape,Y_test.shape)
    
    min_max_scaler_x = preprocessing.MinMaxScaler()
    X_train = min_max_scaler_x.fit_transform(X_train)
    X_test = min_max_scaler_x.transform(X_test)
    
    clf = ExtraTreesRegressor(n_estimators=100)
    
    clf.fit(X_train,Y_train)
    
    Y_pred = clf.predict(X_test)
    
    
    if Y_OUT is None:
        Y_OUT = Y_pred
        Y_GT = Y_test
    else:
        Y_OUT = np.vstack((Y_OUT,Y_pred))
        Y_GT = np.vstack((Y_GT,Y_test))
    
    Y_err = abs(Y_pred - Y_test)
    x_err = np.mean(Y_err,axis=0)[0] * 6.4
    y_err = np.mean(Y_err,axis=0)[1] * 12.8
    
    print("\t X_Error",x_err,"Y_Error",y_err)


Participant 0
	 Train shape: (6758, 130) (6758, 2)
	 Test shape: (756, 130) (756, 2)
	 X_Error 0.3920564373897715 Y_Error 1.1130934744268086
Participant 1
	 Train shape: (6772, 130) (6772, 2)
	 Test shape: (742, 130) (742, 2)
	 X_Error 0.48651212938005406 Y_Error 2.713466307277627
Participant 2
	 Train shape: (6760, 130) (6760, 2)
	 Test shape: (754, 130) (754, 2)
	 X_Error 0.5291883289124671 Y_Error 1.6494924845269683
Participant 3
	 Train shape: (6776, 130) (6776, 2)
	 Test shape: (738, 130) (738, 2)
	 X_Error 0.5155844625112919 Y_Error 2.8291093044263764
Participant 4
	 Train shape: (6758, 130) (6758, 2)
	 Test shape: (756, 130) (756, 2)
	 X_Error 0.752183421516754 Y_Error 1.025911816578483
Participant 5
	 Train shape: (6764, 130) (6764, 2)
	 Test shape: (750, 130) (750, 2)
	 X_Error 0.5397760000000001 Y_Error 2.5145813333333358
Participant 6
	 Train shape: (6758, 130) (6758, 2)
	 Test shape: (756, 130) (756, 2)
	 X_Error 0.5982081128747798 Y_Error 1.716500881834216
Participant 7
	 

In [None]:
Y_OUT = Y_OUT * [6.4, 12.8]
Y_GT = Y_GT * [6.4, 12.8]

Y_err = abs(Y_OUT - Y_GT)
x_err = np.mean(Y_err,axis=0)[0] 
y_err = np.mean(Y_err,axis=0)[1]

print("X Error",x_err,"Y Error",y_err)

In [None]:
from scipy.spatial import distance

total_err = []
for i in range(len(Y_OUT)):
    dist = distance.euclidean(Y_OUT[i],Y_GT[i])
    total_err.append(dist)
print("Total Eucilidean Error",np.mean(total_err),"SD",np.std(total_err))