In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import ExtraTreesRegressor
import warnings
warnings.filterwarnings('ignore')
from sklearn import preprocessing
from math import ceil, floor
from scipy.spatial import distance

In [2]:
def slide_window(A, win_size, stride, padding = None):
    '''Collects windows that slides over a one-dimensional array.

    If padding is None, the last (rightmost) window is dropped if it
    is incomplete, otherwise it is padded with the padding value.
    '''
    if win_size <= 0:
        raise ValueError('Window size must be positive.')
    if not (0 < stride <= win_size):
        raise ValueError(f'Stride must satisfy 0 < stride <= {win_size}.')
    if not A.base is None:
        raise ValueError('Views cannot be slided over!')

    n_elems = len(A)
    if padding is not None:
        n_windows = ceil(n_elems / stride)
        A = np.pad(A, (0, n_windows * win_size - n_elems),
                   constant_values = padding)
    else:
        n_windows = floor(n_elems / stride)
    shape = n_windows, win_size

    elem_size = A.strides[-1]
    return np.lib.stride_tricks.as_strided(
        A, shape = shape,
        strides = (elem_size * stride, elem_size),
        writeable = False)

def window_np_array(X,Y,win_size,win_stride,padding=0):
    X_out = []
    Y_out = []
    for i in range(len(X)):
        out_vec = slide_window(np.array(X[i]),win_size,win_stride)
        for j in range(len(out_vec)):
            X_out.append(out_vec[j])
            Y_out.append(Y[i])
    return np.array(X_out), np.array(Y_out)

In [3]:
df = pd.read_pickle("./gazeldata.pkl")

In [4]:
feat = "Embeddings Hist"
control = "Subject"

In [5]:
df_group = df.groupby([control])["Gaze Target XY",feat]

In [6]:
g_id = list(df_group.groups.keys())

In [9]:
for n_windows in range(1,11):

    feature_len= 26
    window_size = n_windows * feature_len
    stride_size = window_size

    Y_OUT, Y_GT = None, None

    for i in range(len(g_id)):

        X_train, Y_train, X_test, Y_test = None, None, None, None

        for j in range(len(g_id)):
            df_extract = df_group.get_group(g_id[j])
            x = np.array(df_extract[feat].tolist())
            y = np.array(df_extract["Gaze Target XY"].tolist())
            if g_id[i] == g_id[j]:
                X_test = x
                Y_test = y
            else:
                if X_train is None:
                    X_train = x
                    Y_train = y
                else:
                    X_train = np.vstack((X_train,x))
                    Y_train = np.vstack((Y_train,y))

        X_train = np.nan_to_num(X_train)
        X_test = np.nan_to_num(X_test)

        X_train = X_train.reshape(X_train.shape[0],X_train.shape[1]*X_train.shape[2])
        X_test = X_test.reshape(X_test.shape[0],X_test.shape[1]*X_test.shape[2])

        X_train, Y_train = window_np_array(X_train,Y_train,win_size=window_size,win_stride=stride_size)
        X_test, Y_test = window_np_array(X_test,Y_test,win_size=window_size,win_stride=stride_size)

        min_max_scaler_x = preprocessing.MinMaxScaler()
        X_train = min_max_scaler_x.fit_transform(X_train)
        X_test = min_max_scaler_x.transform(X_test)

        clf = ExtraTreesRegressor(n_estimators=100)

        clf.fit(X_train,Y_train)

        Y_pred = clf.predict(X_test)


        if Y_OUT is None:
            Y_OUT = Y_pred
            Y_GT = Y_test
        else:
            Y_OUT = np.vstack((Y_OUT,Y_pred))
            Y_GT = np.vstack((Y_GT,Y_test))


    Y_OUT = Y_OUT * [6.4, 12.8]
    Y_GT = Y_GT * [6.4, 12.8]

    Y_err = abs(Y_OUT - Y_GT)

    print(n_windows)
    print("\t X Error",np.mean(Y_err,axis=0)[0],"SD",np.std(Y_err,axis=0)[0])
    print("\t Y Error",np.mean(Y_err,axis=0)[1],"SD",np.std(Y_err,axis=0)[1])

    total_err = []
    for i in range(len(Y_OUT)):
        dist = distance.euclidean(Y_OUT[i],Y_GT[i])
        total_err.append(dist)
    print("\t Total Eucilidean Error",np.mean(total_err),"SD",np.std(total_err))

1
	 X Error 0.5796744210806367 SD 0.398608698313283
	 Y Error 2.4380584508916896 SD 1.7064247556168681
	 Total Eucilidean Error 2.585481322429523 SD 1.6328539790013612
2
	 X Error 0.5632721142755709 SD 0.38869776628026803
	 Y Error 2.305115499955649 SD 1.6718031994532834
	 Total Eucilidean Error 2.4523370637396407 SD 1.600902170614612
3
	 X Error 0.5490782835004254 SD 0.37215368838845925
	 Y Error 2.09814450063585 SD 1.6023249171663037
	 Total Eucilidean Error 2.2475034466757577 SD 1.5356982360131775
4
	 X Error 0.5293741460385081 SD 0.3631216422275669
	 Y Error 1.9213194925028796 SD 1.5197086522638952
	 Total Eucilidean Error 2.0761013131165966 SD 1.450131246219063
5
	 X Error 0.5149370952000742 SD 0.3586378231543496
	 Y Error 1.8374653535622394 SD 1.4728017267608264
	 Total Eucilidean Error 1.9925852129325712 SD 1.4031426589280667
6
	 X Error 0.5158555585129995 SD 0.3480964766928352
	 Y Error 1.7125181439091521 SD 1.3918286523454253
	 Total Eucilidean Error 1.8711571145899923 SD 1.32