In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.svm import SVR
import warnings
warnings.filterwarnings('ignore')
from sklearn import preprocessing
from math import ceil, floor
from scipy.spatial import distance

In [2]:
def slide_window(A, win_size, stride, padding = None):
    '''Collects windows that slides over a one-dimensional array.

    If padding is None, the last (rightmost) window is dropped if it
    is incomplete, otherwise it is padded with the padding value.
    '''
    if win_size <= 0:
        raise ValueError('Window size must be positive.')
    if not (0 < stride <= win_size):
        raise ValueError(f'Stride must satisfy 0 < stride <= {win_size}.')
    if not A.base is None:
        raise ValueError('Views cannot be slided over!')

    n_elems = len(A)
    if padding is not None:
        n_windows = ceil(n_elems / stride)
        A = np.pad(A, (0, n_windows * win_size - n_elems),
                   constant_values = padding)
    else:
        n_windows = floor(n_elems / stride)
    shape = n_windows, win_size

    elem_size = A.strides[-1]
    return np.lib.stride_tricks.as_strided(
        A, shape = shape,
        strides = (elem_size * stride, elem_size),
        writeable = False)

def window_np_array(X,Y,win_size,win_stride,padding=0):
    X_out = []
    Y_out = []
    for i in range(len(X)):
        out_vec = slide_window(np.array(X[i]),win_size,win_stride)
        for j in range(len(out_vec)):
            X_out.append(out_vec[j])
            Y_out.append(Y[i])
    return np.array(X_out), np.array(Y_out)

In [3]:
df = pd.read_pickle("./gazeldata.pkl")

In [4]:
feat = "Embeddings Hist"
control = "Subject"

In [5]:
df_group = df.groupby([control])["Gaze Target XY",feat]

In [6]:
g_id = list(df_group.groups.keys())

In [23]:
gazecoordsAndGrid = []

for n_windows in range(1,11):

    feature_len= 26
    window_size = n_windows * feature_len
    stride_size = window_size

    Y_OUT, Y_GT = None, None

    for i in range(len(g_id)):

        X_train, Y_train, X_test, Y_test = None, None, None, None

        for j in range(len(g_id)):
            df_extract = df_group.get_group(g_id[j])
            x = np.array(df_extract[feat].tolist())
            y = np.array(df_extract["Gaze Target XY"].tolist())
            
            if g_id[i] == g_id[j]:
                X_test = x
                Y_test = y
            else:
                if X_train is None:
                    X_train = x
                    Y_train = y
                else:
                    X_train = np.vstack((X_train,x))
                    Y_train = np.vstack((Y_train,y))

        X_train = np.nan_to_num(X_train)
        X_test = np.nan_to_num(X_test)

        X_train = X_train.reshape(X_train.shape[0],X_train.shape[1]*X_train.shape[2])
        X_test = X_test.reshape(X_test.shape[0],X_test.shape[1]*X_test.shape[2])

        X_train, Y_train = window_np_array(X_train,Y_train,win_size=window_size,win_stride=stride_size)
        X_test, Y_test = window_np_array(X_test,Y_test,win_size=window_size,win_stride=stride_size)

        min_max_scaler_x = preprocessing.MinMaxScaler()
        X_train = min_max_scaler_x.fit_transform(X_train)
        X_test = min_max_scaler_x.transform(X_test)

#         clf = ExtraTreesRegressor(n_estimators=100)
        
        clf_x = SVR(kernel='poly')
        clf_x.fit(X_train,Y_train[:,0])
        
        clf_y = SVR(kernel='poly')
        clf_y.fit(X_train,Y_train[:,1])

#         Y_pred = clf.predict(X_test)

        Y_pred_x = clf_x.predict(X_test)
        Y_pred_y = clf_y.predict(X_test)
        Y_pred = np.zeros_like(Y_test)
        Y_pred[:,0] = Y_pred_x
        Y_pred[:,1] = Y_pred_y


        if Y_OUT is None:
            Y_OUT = Y_pred
            Y_GT = Y_test
        else:
            Y_OUT = np.vstack((Y_OUT,Y_pred))
            Y_GT = np.vstack((Y_GT,Y_test))


    Y_OUT = Y_OUT * [6.4, 12.8]
    Y_GT = Y_GT * [6.4, 12.8]

    Y_err = abs(Y_OUT - Y_GT)

    print(n_windows)
    print("\t X Error",np.mean(Y_err,axis=0)[0],"SD",np.std(Y_err,axis=0)[0])
    print("\t Y Error",np.mean(Y_err,axis=0)[1],"SD",np.std(Y_err,axis=0)[1])

    total_err = []
    for i in range(len(Y_OUT)):
        dist = distance.euclidean(Y_OUT[i],Y_GT[i])
        total_err.append(dist)
    print("\t Total Eucilidean Error",np.mean(total_err),"SD",np.std(total_err))

1
	 X Error 0.5851512293297926 SD 0.39303941004699966
	 Y Error 2.463375012325794 SD 1.9106847301074092
	 Total Eucilidean Error 2.6111137502385375 SD 1.8433392285654628
2
	 X Error 0.5518889821441925 SD 0.383981711464237
	 Y Error 2.3507555185530875 SD 1.871933928432054
	 Total Eucilidean Error 2.493250376659022 SD 1.8071840257880993
3
	 X Error 0.5187315467093893 SD 0.3762814674137409
	 Y Error 2.185742766331993 SD 1.7541237274650732
	 Total Eucilidean Error 2.323888019639583 SD 1.6925235963716463
4
	 X Error 0.488976190157724 SD 0.36187415725025185
	 Y Error 2.044100865868727 SD 1.6615608076122708
	 Total Eucilidean Error 2.1783973165502166 SD 1.6011771977866678
5
	 X Error 0.4846662010414216 SD 0.3567255578610415
	 Y Error 2.0213771626933124 SD 1.6105270660165807
	 Total Eucilidean Error 2.1521627596085007 SD 1.5524538545308073
6
	 X Error 0.46939469096245884 SD 0.3551139873434418
	 Y Error 1.825201174158733 SD 1.465486406744136
	 Total Eucilidean Error 1.9586721605199329 SD 1.4103