In [3]:
# Make cells wider
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:70% !important; }</style>"))

In [4]:
import json
import numpy as np
import os

# !pip install pywebarchive
import webarchive


import re
import json
import pandas as pd

import matplotlib.pyplot as plt
import matplotlib
import pandas as pd

nparr = np.array

# Importing and loading data into Pandas DFs

In [5]:
# Load in eval trial data
def cleanhtml(raw_html):
    cleanr = re.compile('<.*?>')
    cleantext = re.sub(cleanr, '', raw_html)
    return cleantext

gestureNames = ["Forward flick", "Right flick", "Right tilt", "Left flick", \
                "Left tilt", "Pull close", "Push away", "Turn to right", "Turn to left"]

# Import all webarchives in the directory
files = []
calib_files = []
for r,d,f in os.walk("."):
    for file in f:
        if 'GAZEL dot' in r:
            if ".webarchive" in file:
                files.append(os.path.join(r, file))
            elif "gazel_checkpoint" in file:
                calib_files.append(os.path.join(r, file))


# Webarchive format -> parsed JSON dictionaries
fileData = []
for file in files:
    tmp = webarchive.open(file)
    jsonStr = cleanhtml(tmp._main_resource.data.decode())
    try:
        jsonData = json.loads(jsonStr)
        for key in jsonData.keys():
            jsonData[key] = json.loads(jsonData[key])
        fileData.append(jsonData)
    except:
        print("Failed on " + file)
        
# Load in calibration round data

# 5 rounds, a = [[], [], [], [], []]
# Each round is made up of [x,y] pairs.
#   The x is embeddings
#   The y is dot ground truth
calibData = []
for calib_file in calib_files:
    with open(calib_file, 'r') as f:
        a = json.loads(f.read())
        for i in range(len(a)):
            a[i] = json.loads(a[i])
        calibData.append(a)
        
print("Successfully parsed " + str(len(fileData)) + " trials", end=" ")
print("and " + str(len(calibData)) + " calibration rounds")


Successfully parsed 10 trials and 10 calibration rounds


In [6]:
# Create DFs, making first a list of each trial separately, tagged with user ID and everything
dataList = []
calibList = []

for subNum in range(len(fileData)):
    print("Parsing subject #", subNum, end=", ")
    data = fileData[subNum]
    for evalName in data.keys():
        for gestureBlock in data[evalName]:
            for segment in gestureBlock:
                # Unpack each segment trial
                timestamp, detected, target, histories = segment

                detectedGest = detected[0]
                detectedGaze = detected[1]

                # Get the ground truth gesture and square out
                gestureTarget = target[0]
                gestureName = gestureNames[gestureTarget]
                gazeTarget = target[1]

                # Unpack the histories array
                headsize_hist, embeddings_hist, gazepreds_hist, IMU_hist, \
                                    gestdetect_hist, facevisible_hist = histories

                dataList.append([subNum, evalName, gestureName, gazeTarget, timestamp, 
                                 gestureTarget, gazeTarget, detectedGest, detectedGaze, 
                                 headsize_hist, embeddings_hist, gazepreds_hist, gestdetect_hist, IMU_hist, 
                                     facevisible_hist])

    subjectCalData = calibData[subNum]
    for calRound in subjectCalData:
        x,y = calRound
        for i in range(len(x)):
            calibList.append([subNum, [x[i]],y[i]])


# Target gestures are top to bottom
# | 1  5 |
# | 2  6 | 
# | 3  7 |
# | 4  8 |

Parsing subject # 0, Parsing subject # 1, Parsing subject # 2, Parsing subject # 3, Parsing subject # 4, Parsing subject # 5, Parsing subject # 6, Parsing subject # 7, Parsing subject # 8, Parsing subject # 9, 

In [7]:
# Adding columns to the eval DF and filtering             RUN CELL ABOVE FIRST
firstInd = 5
minLength = 10

# Function to filter the embeddings and gaze prediction histories based on movement
def getFirstActivity(lst):
    tmp = next((i for i,x in enumerate(lst) if sum(x) != 0), -1)
    # if tmp == -1: tmp = len(lst)
    return tmp

############## Create dataframe out of the list
GAZELdata = pd.DataFrame(dataList, columns=['Subject', "Set", "Gesture Target Name", "Gaze Target", "Timestamp",
       "Target Gesture", "Target Gaze", "Detected Gesture", "Detected Gaze",
        "Headsize Hist", "Embeddings Hist", "Gazepreds Hist", "Gestpreds Hist", "IMU Hist", "Face Detection Hist"
                                   ])
print("\nGAZELdata has length after creating DF: ", len(GAZELdata))


####### Find index of first motion, then trim each gazepred and embedding history there. 
            # Also remove the first few samples to get rid of reaction time
firstIndices = list(map(getFirstActivity, GAZELdata['Gestpreds Hist']))
for col in ['Gazepreds Hist', 'Embeddings Hist', 'Face Detection Hist']:
#         GAZELdata[col] = list(map(lambda x: x[0][firstInd:(x[1]-1)], zip(GAZELdata[col],firstIndices)))
    GAZELdata[col] = list(map(lambda x: x[2:12], GAZELdata[col]))

# #### Remove samples where head is not visible.
# indices = [np.argwhere(nparr(x) < .9) for x in GAZELdata['Face Detection Hist']]
# for col in ['Gazepreds Hist', 'Embeddings Hist', 'Face Detection Hist']:
#         GAZELdata[col] = list(map((lambda x: np.delete(x[0], x[1],axis=0)), zip(GAZELdata[col],indices)))        
        
# Then remove all rows where the length is too short (1 sample is about 100 ms)
GAZELdata = GAZELdata.loc[[len(x) ==10 for x in GAZELdata['Gazepreds Hist']]]



######## Add grid? and Calib? column
GAZELdata["Grid?"] = [x[0] == "g" for x in GAZELdata["Set"]]
GAZELdata['Calib?'] = False


######### Add target locations in XY
GAZELdata['Gaze Target XY'] = np.zeros((len(GAZELdata), 2)).tolist()
def seg2GridCoords(segNum):
    return [(1 + (segNum-1)//4)/3, 1/8 + (((segNum-1) % 4) )/4]
def seg2ListCoords(segNum):
    return [0.5, 1/12 + (segNum-1)/6]

grids = [x[0]=='g' for x in GAZELdata['Set']]
lists = [not x for x in grids]

# GAZELdata.loc[grids, 'Gaze Target XY'] = pd.Series([seg2GridCoords(x) for x in GAZELdata.loc[grids, 'Gaze Target']])
# GAZELdata.loc[lists, 'Gaze Target XY'] = pd.Series([seg2ListCoords(x) for x in GAZELdata.loc[lists, 'Gaze Target']])
GAZELdata.loc[grids, 'Gaze Target XY'] = GAZELdata.loc[grids, 'Gaze Target'].map(seg2GridCoords)
GAZELdata.loc[lists, 'Gaze Target XY'] = GAZELdata.loc[lists, 'Gaze Target'].map(seg2ListCoords)


######## Add raw XY from base model
GAZELdata['Raw Gazepreds Hist'] = [[y[12:14] for y in x] for x in GAZELdata['Embeddings Hist']]


print("After filtering, GAZELdata has length: ", len(GAZELdata))
# # Plot one of the eye histories for fun
# plt.plot(GAZELdata['Gazepreds Hist'][0])


GAZELdata has length after creating DF:  3780
After filtering, GAZELdata has length:  3757


In [8]:
# Create calib DF and training DF
def xyToSegGrid(lst):
    x,y = lst
    return int(((y//.25)+1) + 4*(x > .5))

def xyToSegList(lst):
    x,y = lst
    return int(y//(1/6) + 1)

def f(clf, dataset):
    newOutputs = [np.mean(clf.predict(x), axis=0) for x in dataset['Embeddings Hist']]
    print("Testing abs error on same conditions as base model", mean_absolute_error(dataset['Gaze Target XY'].tolist(), newOutputs))
    
    # Accuracy for new model
    predSegs = []
    for i in range(len(newOutputs)):
        curXY = newOutputs[i]
#         print(dataset.iloc[i])
        if dataset.iloc[i]["Set"][0] == "g":
            predSeg = xyToSegGrid(curXY)
        else:
            predSeg = xyToSegList(curXY)
        predSegs.append(predSeg)


CALIBdata = pd.DataFrame(calibList, columns=["Subject", "Embeddings Hist", "Gaze Target XY"])
CALIBdata['Set'] = "grid" # Assume grid for accuracy calcs
CALIBdata['Gaze Target'] = CALIBdata['Gaze Target XY'].map(xyToSegGrid)
CALIBdata['Grid?'] = True
CALIBdata['Calib?'] = True


ALLdata = pd.concat([GAZELdata, CALIBdata], sort=False).reset_index()
print("All data length: ", len(ALLdata))

postDF = ALLdata[['Subject', 'Set', 'Calib?', 'Grid?', 'Embeddings Hist', 'Gaze Target XY', 'Gaze Target']].copy()

All data length:  17757


In [None]:
# Functions for getting base accuracy and cm error from a subject
def getBaseErrorCM(subjectData):
    a = np.array(subjectData['Gazepreds Hist'])
    b = [np.mean(x, axis=0) for x in a]
    return mean_absolute_error(subjectData['Gaze Target XY'].to_list(), b)

def getErrorWithModel(subjectData, model):
    a = subjectData['Gazepreds Hist']
    feats = subjectData['Embeddings Hist']
    b = [np.mean(model.predict(x), axis=0) for x in feats]

    return mean_absolute_error(subjectData['Gaze Target XY'].to_list(), b)

def printAcc(suff, correct, total):
    print(suff, str(correct) + "/" + str(total) + " = " + str(round(correct/total*100,2)) + "%")

def getNatureModelAccuracy(subData):
    xy = subData['Raw Gazepreds Hist']
    avgs = np.array([np.mean(x,axis=0) for x in xy])
    
    classOuts = np.zeros((len(avgs),1))
    classOuts[np.invert(grids)] = nparr(list(map(xyToSegList, avgs[np.invert(grids)]))).reshape(-1,1)
    classOuts[grids] = nparr(list(map(xyToSegGrid, avgs[grids]))).reshape(-1,1)
    
    printAcc("\nNature Model Base Accuracy:", sum(classOuts.reshape(1,-1)[0] == subData['Target Gaze']), len(subData))
    
def getBaseAccuracy(subData):
    gazeGT = subData['Target Gaze']
    gazePred = subData['Detected Gaze'].to_numpy().astype(int)
    correct = sum(gazeGT == gazePred)
    printAcc("\nUnprocessed Gaze Accuracy:", correct, len(gazePred))
    
    gestGT = subData['Target Gesture']
    gestPred = subData['Detected Gesture']
    correct = sum(gestGT == gestPred)
    printAcc("Unprocessed Gestures Accuracy:", correct, len(gestPred))

# getBaseAccuracy(GAZELdata)
# getNatureModelAccuracy(GAZELdata)

getBaseErrorCM(GAZELdata)
# Add per-square metrics to this

In [None]:
np.array(GAZELdata['Embeddings Hist'][500]).shape

In [2]:
GAZELdata['Embeddings Hist']

NameError: name 'GAZELdata' is not defined

In [12]:
CALIBdata.to_pickle("calibdata.pkl")

In [11]:
GAZELdata.to_pickle("gazeldata")

In [13]:
CALIBdata

Unnamed: 0,Subject,Embeddings Hist,Gaze Target XY,Set,Gaze Target,Grid?,Calib?
0,0,"[[-0.7128906846046448, -0.72509765625, -2.0507...","[0.5, 0.5]",grid,3,True,True
1,0,"[[-0.4467773735523224, 0.2177734524011612, -0....","[0.5, 0.5]",grid,3,True,True
2,0,"[[-0.86083984375, 0.3203125, -1.04394519329071...","[0.5, 0.5]",grid,3,True,True
3,0,"[[-0.732421875, 0.297607421875, -0.84570318460...","[0.5, 0.5]",grid,3,True,True
4,0,"[[-0.607421875, 0.1700439602136612, -0.8593750...","[0.5, 0.5]",grid,3,True,True
5,0,"[[-0.63134765625, 0.01597595401108265, -0.6577...","[0.5, 0.5]",grid,3,True,True
6,0,"[[-0.66357421875, 0.2457275390625, -0.81640625...","[0.5, 0.5]",grid,3,True,True
7,0,"[[-0.7099609375, 0.318359375, -0.8486328125, -...","[0.5, 0.5]",grid,3,True,True
8,0,"[[-0.560546875, 0.296630859375, -0.86816412210...","[0.5, 0.5]",grid,3,True,True
9,0,"[[-0.4899902641773224, 0.388427734375, -0.9067...","[0.5, 0.5]",grid,3,True,True


In [None]:
plt.plot(GAZELdata['Raw Gazepreds Hist'][80])

# The Machine Begins to Learn Regression

In [14]:
# List of regression models

import copy 
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR, NuSVR, LinearSVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import RANSACRegressor
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, \
    ExtraTreesRegressor,AdaBoostRegressor, VotingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.neural_network import MLPRegressor

from sklearn.preprocessing import MinMaxScaler
from sklearn.multioutput import MultiOutputRegressor
from sklearn.model_selection import RepeatedKFold,cross_val_score
from sklearn.metrics import mean_absolute_error

models = [
    ExtraTreesRegressor(),
    ExtraTreesRegressor(n_estimators=100),
    ExtraTreesRegressor(n_estimators=100, max_depth=10),
    MLPRegressor(),
    MLPRegressor(solver="lbfgs"),
    MLPRegressor(hidden_layer_sizes=(32,128), max_iter=300, solver="lbfgs"),
    KNeighborsRegressor(n_neighbors = 16), 
    RandomForestRegressor(), 
    ExtraTreesRegressor(), 
    SVR(),
    NuSVR(),
    LinearSVR(),
    LinearRegression(),
#     RANSACRegressor() # Broken
]

#### Splitting data into pairs
def getRegressPairsFromTable(tbl, together=False): 
    c = []
    gridOrList = []
    gazeTargets = []
    for i in tbl.index:
        embeds = tbl.loc[i, 'Embeddings Hist']
        XY = tbl.loc[i, 'Gaze Target XY']
#         faceVis = tbl.loc[i, 'Face Detection Hist']
        if together:
            c.append([embeds[-6:],XY])
            gazeTargets.append(tbl.loc[i, 'Gaze Target'])
            gridOrList.append(tbl.loc[i, 'Set'])
        else:
            for j,sample in enumerate(embeds): 
                # remove samples where face is not visible
#                 if faceVis[j] > .9:
                c.append([sample, XY])
                gazeTargets.append(tbl.loc[i, 'Gaze Target'])
                gridOrList.append(tbl.loc[i, 'Set'])
    return c,gazeTargets, gridOrList


Unnamed: 0,Subject,Set,Gesture Target Name,Gaze Target,Timestamp,Target Gesture,Target Gaze,Detected Gesture,Detected Gaze,Headsize Hist,Embeddings Hist,Gazepreds Hist,Gestpreds Hist,IMU Hist,Face Detection Hist,Grid?,Calib?,Gaze Target XY,Raw Gazepreds Hist
0,0,grid1_results,Left flick,2,1616861680190,3,2,3,2.0,"[0.15474085291843467, 0.15339179082189278, 0.1...","[[-1.0722655057907104, 0.58056640625, -1.44726...","[[0.4408655819624567, 0.6998267022815959], [0....","[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0,...","[[[-1.4482886795351362, 0.3025457372458007, -0...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]",True,False,"[0.3333333333333333, 0.375]","[[0.4929199516773224, 0.75927734375], [0.53222..."
1,0,grid1_results,Left flick,3,1616861688421,3,3,8,3.0,"[0.15691431449493426, 0.15682658274679012, 0.1...","[[-0.39697265625, 0.697265625, -1.326171755790...","[[0.46081477168312146, 0.255917388931368], [0....","[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0,...","[[[1.4740792603976929, 2.564706791592404, 3.40...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]",True,False,"[0.3333333333333333, 0.625]","[[0.4870605170726776, 0.58837890625], [0.35034..."
2,0,grid1_results,Left flick,8,1616861696207,3,8,8,8.0,"[0.15572123907585184, 0.15590843944757327, 0.1...","[[-0.2340088039636612, 0.390380859375, -0.9882...","[[0.6430930385547162, 0.23461348346047345], [0...","[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0,...","[[[-0.7633721438833612, -0.3915406208514074, 0...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]",True,False,"[0.6666666666666666, 0.875]","[[0.61474609375, 0.5478515625], [0.56103515625..."
3,0,grid1_results,Left flick,6,1616861702620,3,6,3,6.0,"[0.15649527662761092, 0.15630403107833635, 0.1...","[[-0.2170410454273224, 0.86328125, -1.38867175...","[[0.3356502771019181, 0.18717963203196353], [0...","[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0,...","[[[1.156149215737526, 1.021446039195533, 1.432...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]",True,False,"[0.6666666666666666, 0.375]","[[0.4465332329273224, 0.50341796875], [0.55908..."
4,0,grid1_results,Left flick,5,1616861708054,3,5,8,5.0,"[0.15681730829338922, 0.1567678157949997, 0.15...","[[-0.5068359971046448, 0.8930664658546448, -1....","[[0.3037755333955007, 0.3673229709353406], [0....","[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0,...","[[[0.8394411346085568, 0.8610143938237479, 1.5...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]",True,False,"[0.6666666666666666, 0.125]","[[0.432861328125, 0.6455078721046448], [0.4162..."
5,0,grid1_results,Left flick,4,1616861713979,3,4,8,4.0,"[0.15683194002032294, 0.15739750893226956, 0.1...","[[-0.7070311903953552, 0.60546875, -1.30859363...","[[0.49261997935215845, 0.45672004414251005], [...","[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0,...","[[[-1.1717048648505244, -0.7750242865248442, -...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]",True,False,"[0.3333333333333333, 0.875]","[[0.5102539658546448, 0.7382813096046448], [0...."
6,0,grid1_results,Left flick,7,1616861720415,3,7,3,8.0,"[0.15679371817075435, 0.15674527243999392, 0.1...","[[-0.8242188096046448, 0.791015625, -1.3261717...","[[0.38453372016137155, 0.6072503645783636], [0...","[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0,...","[[[0.023514030120413074, 0.18751153116188723, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]",True,False,"[0.6666666666666666, 0.625]","[[0.453857421875, 0.75634765625], [0.7734375, ..."
7,0,grid1_results,Left flick,1,1616861730176,3,1,8,1.0,"[0.15779309213154807, 0.15782172202412895, 0.1...","[[-0.27197265625, 0.525390625, -1.181640505790...","[[0.5464330200780081, 0.230955765204733], [0.2...","[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0,...","[[[0.7486443350421097, 0.12430590773772805, 0....","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]",True,False,"[0.3333333333333333, 0.125]","[[0.5400390625, 0.56982421875], [0.37890625, 0..."
8,0,grid1_results,Push away,3,1616861738486,6,3,6,4.0,"[0.1587893197686826, 0.15909939764786302, 0.15...","[[-0.380615234375, 0.7080078721046448, -1.4599...","[[0.41660506252301893, 0.28522602557355514], [...","[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0,...","[[[1.3037482163193264, 0.5035805093557842, -0....","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]",True,False,"[0.3333333333333333, 0.625]","[[0.48388671875, 0.625], [0.4362793266773224, ..."
9,0,grid1_results,Push away,6,1616861749136,6,6,6,7.0,"[0.15591088026816394, 0.15591535882660829, 0.1...","[[-0.40478515625, 0.67919921875, -1.3554686307...","[[0.4411056027122824, 0.3427244470813625], [0....","[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0,...","[[[1.0854122548765832, 1.0483486602323322, 2.0...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]",True,False,"[0.6666666666666666, 0.375]","[[0.4931640625, 0.65966796875], [0.49145507812..."


In [15]:
# Regression 
# GAZELdata, CALIBdata, ALLdata, postDF
def regressXYtogether_SingleEmbedThenVote(regressor, trainingDF, testingDF):
    train, trainGazetargs, trainSet = getRegressPairsFromTable(trainingDF, together=False)
    train_x = [np.array(x[0]).flatten() for x in train]
    train_y = [x[1] for x in train]

    print("Fitting...")
    model.fit(train_x, train_y)

    # Test on the testing data
    print("Predicting...")
    preds = [model.predict(x) for x in testingDF['Embeddings Hist']]
    avgedPreds = [x.mean(axis=0) for x in preds]
    p = testingDF['Gaze Target XY'].to_list()
    q = avgedPreds
    score = mean_absolute_error(p, q)
    
    print("Combined euclidean error:", score)
    print("model", regressor)
    return score


def regressXYseparate_SingleEmbedThenVote(regressor, trainingDF, testingDF):
    train, trainGazetargs, trainSet = getRegressPairsFromTable(trainingDF, together=False)
    train_x = [np.array(x[0]).flatten() for x in train]
    train_y_x = [x[1][0] for x in train]
    train_y_y = [x[1][1] for x in train]

    regressorX = regressor
    regressorY = copy.deepcopy(regressorX)
    print("Fitting...")
    regressorX.fit(train_x, train_y_x)
    regressorY.fit(train_x, train_y_y)
    
    # Test on the testing data
    print("Predicting...")
    preds_x = [regressorX.predict(x) for x in testingDF['Embeddings Hist']]
    preds_y = [regressorY.predict(x) for x in testingDF['Embeddings Hist']]
    avgedPreds_x = [x.mean() for x in preds_x]
    avgedPreds_y = [x.mean() for x in preds_y]
    p_x = [x[0] for x in testingDF['Gaze Target XY'].to_list()]
    p_y = [x[1] for x in testingDF['Gaze Target XY'].to_list()]
    q_x = avgedPreds_x
    q_y = avgedPreds_y
    score_x = mean_absolute_error(p_x, q_x)
    score_y = mean_absolute_error(p_y, q_y)

    print("Mean euclidean error x:", score_x, \
        "\n\t Centerimeter error (6.4cm width)", score_x*6.4)
    print("Mean euclidean error y:", score_y, \
        "\n\t Centerimeter error (12.8cm width)", score_y*12.8)
    
    print("Combined:", (score_x**2+score_y**2)**.5, \
          "\n\t Centimeters:", ((score_x*6.4)**2 + (score_y*12.8)**2)**.5)
    print("model", regressor)
    return score_x,score_y, (p_x, p_y, q_x, q_y)


regDF = GAZELdata.copy()
i=1
testDF = regDF.loc[regDF['Subject'] == i]
trainDF = regDF.drop(testDF.index).reset_index()

model = ExtraTreesRegressor(n_estimators=100)
# regressXYtogether_SingleEmbedThenVote(model, trainDF, testDF)
sx, sy, preds = regressXYseparate_SingleEmbedThenVote(model, trainDF, testDF)
# classifier_SingleEmbedThenVote(model, trainDF, testDF)






Fitting...
Predicting...
Mean euclidean error x: 0.07836927223719678 
	 Centerimeter error (6.4cm width) 0.5015633423180594
Mean euclidean error y: 0.2181238769092543 
	 Centerimeter error (12.8cm width) 2.791985624438455
Combined: 0.23177525430664808 
	 Centimeters: 2.836679310995209
model ExtraTreesRegressor(bootstrap=False, criterion='mse', max_depth=None,
                    max_features='auto', max_leaf_nodes=None,
                    min_impurity_decrease=0.0, min_impurity_split=None,
                    min_samples_leaf=1, min_samples_split=2,
                    min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=None,
                    oob_score=False, random_state=None, verbose=0,
                    warm_start=False)


In [None]:
gtx,gty, predx, predy = preds

In [None]:
xerr = nparr(gtx) - nparr(predx)
yerr = nparr(gty) - nparr(predy)

In [None]:
print(np.std(xerr))
xerr.mean()

In [None]:
np.std(yerr)
yerr.mean()

# The Machine Begins to Learn Classification

In [None]:
# List of classifier models
from sklearn.neighbors import KNeighborsClassifier,RadiusNeighborsClassifier
from sklearn.linear_model import SGDClassifier,LogisticRegression
from sklearn.ensemble import ExtraTreesClassifier, RandomForestClassifier
from sklearn.svm import SVC, LinearSVC


from sklearn.multioutput import MultiOutputClassifier
from sklearn.metrics import classification_report

from sklearn.metrics import mean_absolute_error,accuracy_score
import scipy
mode = scipy.stats.mode

import copy
dcopy = copy.deepcopy

# Classification
models = [
    KNeighborsClassifier(n_neighbors=14),
    ExtraTreesClassifier(),
    ExtraTreesClassifier(n_estimators=100),
    RandomForestClassifier(),
    RandomForestClassifier(n_estimators=100),
    SGDClassifier(),
    LogisticRegression(),
    SVC(),
    LinearSVC()
]

#### Splitting data into pairs
def getClassPairsFromTable(tbl, together=False): 
    c = []
    gridOrList = []
    gazeTargets = []
    for i in tbl.index:
        embeds = tbl.loc[i, 'Embeddings Hist']
        XY = tbl.loc[i, 'Gaze Target XY']
#         faceVis = tbl.loc[i, 'Face Detection Hist']
        if together:
            c.append([embeds[-6:],XY])
            gazeTargets.append(tbl.loc[i, 'Gaze Target'])
            gridOrList.append(tbl.loc[i, 'Set'])
        else:
            for j,sample in enumerate(embeds): 
                # remove samples where face is not visible
#                 if faceVis[j] > .9:
                c.append([sample, XY])
                gazeTargets.append(tbl.loc[i, 'Gaze Target'])
                gridOrList.append(tbl.loc[i, 'Set'])
    return c,gazeTargets, gridOrList


In [None]:
# Classification 
# GAZELdata, CALIBdata, ALLdata, postDF
def classifier_SingleEmbedThenVote(clf, trainingDF, testingDF):
    train, trainGazetargs, trainSet = getClassPairsFromTable(trainingDF, together=False)
    train_x = [np.array(x[0]).flatten() for x in train]
    train_y = trainGazetargs
#     print(train_y)

    print("Fitting...")
    clf.fit(train_x, train_y)
    
    # Test on the testing data
    print("Predicting...")
    voters = [clf.predict(x) for x in testingDF['Embeddings Hist']]
    voteOutputs = [mode(x).mode[0] for x in voters]
    preds = np.array(voteOutputs)
    gt = testingDF['Gaze Target'].to_numpy()
    print("score", sum(preds == gt)/len(preds))
    print("model", clf)
    return sum(preds == gt)/len(preds)



In [None]:
for i in range(9):
    print(i)
    classDF = GAZELdata.copy()
    classDF = classDF.loc[classDF['Grid?']]

    testDF = classDF.loc[classDF['Subject'] == i]
    trainDF = classDF.drop(testDF.index).reset_index()

    model = ExtraTreesClassifier(n_estimators=100)
    classifier_SingleEmbedThenVote(model, trainDF, testDF)



In [None]:
# Did i fuck up by doing overlapping labels for the list and grid?
classDF = GAZELdata.copy()

# classDF.loc[grids, 'Gaze Target'] = classDF.loc[grids, 'Gaze Target'].map(seg2GridCoords)
# classDF.loc[lists, 'Gaze Target'] = classDF.loc[lists, 'Gaze Target'].map(lambda x: x+8)

# classDF = classDF.loc[np.invert(classDF['Grid?'])]
classDF = classDF.loc[classDF['Grid?']]

# split = 0.8
# trainDF = classDF.sample(frac = split)
# testDF = classDF.drop(trainDF.index).reset_index()

testDF = classDF.loc[classDF['Subject'] == 1]
trainDF = classDF.drop(testDF.index).reset_index()

# actualTrainDF = pd.concat([CALIBdata, trainDF], sort=False).reset_index()
# model = LinearSVC()
# model = KNeighborsClassifier(n_neighbors=14)
model = ExtraTreesClassifier(n_estimators=100)

classifier_SingleEmbedThenVote(model, trainDF, testDF)






In [None]:
GAZELdata


In [None]:
len(testDF['Embeddings Hist'][0][0])

In [None]:
########### Grid
ETC 
65.9 grid separate, with calib data added. ETC 100
65.4 grid separate, without calib data. ETC 100

KNN
70 grid sep, w/o calib. Using KNN 

SVC
63 

LinearSVC
47 


########### List
54 list without calib, KNN 12
49 list without calib, ETC 100
49.8 list with calib, labels from grid though. ETC 100           - REDO



In [None]:
np.invert(classDF['Grid?'])


In [None]:
# subData = GAZELdata
# gazeGT = subData['Target Gaze'].to_numpy()
# gazePred = subData['Detected Gaze'].to_numpy().astype(int)


In [None]:
sum(gazeGT == gazePred)