# Moby Eye Tracking

Notebook for developing fast, accurate eye tracking straight from your webcam.

In [1]:
# Imports
import os
import face_recognition
import cv2
import random
import time
import sys

import matplotlib.pyplot as plt 
import numpy as np

from tkinter import *

from sklearn.ensemble import RandomForestRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.preprocessing import StandardScaler

import keras 
from keras.models import Model
from keras.layers import Input, concatenate, Conv2D, Dense, MaxPool2D, Flatten 

%matplotlib inline 

Using TensorFlow backend.


In [2]:
# Convenience functions
def small_dot(tkinter_canvas, centre_x, centre_y, radius=5, fill="red"):
    """Given the centre point of a dot, this convenience function will draw a small dot with given radius"""
    
    tkinter_canvas.create_oval(centre_x - radius, centre_y - radius,
                               centre_x + radius, centre_y + radius, fill=fill)
    
    return

def random_dot(tkinter_canvas, tk_width, tk_height):
    
    border = 5 # Should be same, or higher than radius of dots
    
    random_width = random.randint(border, tk_width - border)
    random_height = random.randint(border, tk_height - border)
    
    small_dot(tkinter_canvas, random_width, random_height)
    
    return random_width, random_height

def neural_model(dummy_sample):
    
    print("About to initialise a neural network with input shape: ", dummy_sample.shape)
    
    visible = Input(shape=(dummy_sample.shape))
    
    c11 = Conv2D(4, 3)(visible)
    c12 = Conv2D(4, 3)(c11)
    p1 = Conv2D(8, 1, strides=2)(c12)
    c21 = Conv2D(8, 3)(p1)
    c22 = Conv2D(8, 3)(c21)
    p2 = Conv2D(16, 1, strides=2)(c22)
    #c31 = Conv2D(8, 3)(p2)
    #c32 = Conv2D(8, 3)(c31)
    #p3 = Conv2D(16, 1, strides=2)(c32)
    
    f1 = Flatten()(p2)
    d1 = Dense(200, activation="relu")(f1)
    d2 = Dense(200, activation="relu")(d1)
    output = Dense(2)(d2)
    
    model = Model(inputs=visible, outputs=output)
    
    model.compile(loss=keras.losses.MeanSquaredError(), optimizer="adam")
    
    return model

def extract_facial_features(frame, display=False):
    
    # Basic code for facial landmark extraction from webcam from:
    # https://elbruno.com/2019/05/29/vscode-lets-do-some-facerecognition-with-20-lines-in-python-3-n/    
    rgb_frame = frame[:, :, ::-1].copy()
    frame_copy = frame.copy()
    bw_frame = np.mean(rgb_frame, axis=2)

    face_landmarks_list = face_recognition.face_landmarks(rgb_frame)
    
    # Extract region around eyes, before green lines added. Uses face_recognition
    border_height = 10
    border_width = 15
    
    # Creat linear ingredients to bundle with the eye data
    grad_x = np.zeros(frame_copy.shape[:2], dtype=np.float)
    grad_y = np.zeros(frame_copy.shape[:2], dtype=np.float)
    
    for i in range(border_height * 2):
        grad_x[i, :] = i / (border_height * 2)
        
    for j in range(border_width * 2):
        grad_y[:, j] = j / (border_width * 2)
    
    try:
        left_eye = np.mean(np.array(face_landmarks_list[0]["left_eye"]), axis=0, dtype=int)
        left_eye_region = bw_frame[left_eye[1] - border_height: left_eye[1] + border_height,
                                   left_eye[0] - border_width: left_eye[0] + border_width]
        left_eye_x_grad = grad_x[left_eye[1] - border_height: left_eye[1] + border_height,
                                 left_eye[0] - border_width: left_eye[0] + border_width]
        left_eye_y_grad = grad_y[left_eye[1] - border_height: left_eye[1] + border_height,
                                 left_eye[0] - border_width: left_eye[0] + border_width]
        
        left_eye_flattened = left_eye_region.reshape(1,-1)[0]
    
        right_eye = np.mean(np.array(face_landmarks_list[0]["right_eye"]), axis=0, dtype=int)
        right_eye_region = bw_frame[right_eye[1] - border_height: right_eye[1] + border_height,
                                    right_eye[0] - border_width: right_eye[0] + border_width]
        right_eye_x_grad = grad_x[right_eye[1] - border_height: right_eye[1] + border_height,
                                  right_eye[0] - border_width: right_eye[0] + border_width]
        right_eye_y_grad = grad_y[right_eye[1] - border_height: right_eye[1] + border_height,
                                  right_eye[0] - border_width: right_eye[0] + border_width]
        
        right_eye_flattened = right_eye_region.reshape(1,-1)[0]
            
        # Scale features
        scaler = StandardScaler()
        left_eye_region = scaler.fit_transform(left_eye_region)
        right_eye_region = scaler.fit_transform(right_eye_region)
        
        eyes_and_gradients = np.stack((left_eye_region, left_eye_x_grad, left_eye_y_grad,
                                       right_eye_region, right_eye_x_grad, right_eye_y_grad), axis=2)
    except IndexError:
        print("Could not extract eye regions, probably because face not detected")
        return [], [], [], []
        
    for face_landmarks in face_landmarks_list:

        for facial_feature in face_landmarks.keys():
            pts = np.array([face_landmarks[facial_feature]], np.int32) 
            pts = pts.reshape((-1,1,2))
            cv2.polylines(frame, [pts], False, (0,255,0))

    if display:
        cv2.imshow('Video', frame)
        
    # print(face_landmarks_list)
    
    # I suspect this code will break if multiple faces
    landmark_array = np.array(np.zeros((0, 2)))
    if face_landmarks_list != []:
        for landmark in face_landmarks_list[0].values():
            landmark_array = np.concatenate((landmark_array, np.array(landmark)))
    else:
        print("No face detected") 
    
    # Concatenate the extracted facial features, with the region around the eyes 
    everything_array = np.concatenate(
        (landmark_array[0], left_eye_flattened, right_eye_flattened))
    landmark_array = landmark_array[0]
    
    # if cv2.waitKey(1) & 0xFF == ord('q'):
    #     break
    
    everything_array = everything_array.reshape(1, -1)
    landmark_array = landmark_array.reshape(1, -1)
    
    # print(landmark_array[0].shape)
    
    return rgb_frame, everything_array, landmark_array, eyes_and_gradients

def predict_gaze(video_capture, webcam_resolution,  
                 tk_width, tk_height, model, model_type, canvas):
    
    ret, frame = video_capture.read()
    (rgb_frame, everything_array, 
     landmark_array, eyes_and_gradients) = extract_facial_features(frame)
    
    try:
        if model_type == "neural net":
            X = np.expand_dims(eyes_and_gradients, 0)
            predicted_gaze = model.predict(X)[0]
        else:
            predicted_gaze = model.predict(everything_array)[0]
    
        print("Predicted gaze is: ", predicted_gaze)
    except ValueError:
        print("Could not predict, probably no face in image")
        predicted_gaze = np.array([0., 0.])
    
    # Scale the prediction to webcam resolution
    predicted_pixel = [predicted_gaze[0] * tk_width, predicted_gaze[1] * tk_height]
    # print(predicted_pixel, predicted_gaze, webcam_resolution)
    
    # Display the prediction as a grey circle
    small_dot(canvas, predicted_pixel[0], predicted_pixel[1], radius=5, fill="grey")
    
    return rgb_frame, everything_array, eyes_and_gradients, predicted_gaze

def capture(counter, canvas, model, model_type, training_X, training_y, tk_width, tk_height, 
            video_capture, rgb_frame, webcam_resolution, 
            landmark_array, eyes_and_gradients, current_target, predicted_gaze, move_smoothly=False, randomise_dot=True):
    """Will capture an image, coordinate pair when the user is looking at the dot"""
    
    path = "data/MZeina_1/"
    train_every = 1
        
    # print("About to learn...")
    if len(landmark_array) != 0:
        current_target = np.array(current_target) / np.array([tk_width, tk_height])
        
        if model_type == "neural net":
            # Neural network can train on each sample at a time, unlike random forest
            training_X = np.expand_dims(eyes_and_gradients, 0)
            training_y = np.expand_dims(current_target, 0)
            # training_X.append(eyes_and_gradients)
        else:
            training_X.append(landmark_array[0])
            training_y.append(current_target)
        
        plt.imsave(path + str(current_target) + ".jpg", rgb_frame)
        
        if counter % train_every == 0:
            model.fit(training_X, training_y)
        
    else:
        print("Face not detected, will not train on this sample")
    
    #canvas.delete("all")
    if move_smoothly:
        speed = 20
        scaled_counter = (counter * speed) % (tk_width * tk_height)
        target_x = (scaled_counter // tk_height * speed) % tk_width
        if (scaled_counter // tk_height)%2 == 0:
            target_y = scaled_counter % tk_height
        else:
            # reverse the direction for alternative lines, so it doesn't skip up to the top
            target_y = tk_height - scaled_counter % tk_height
        print("counter, scaled_counter, are :", counter, scaled_counter)
        print("about to move small circle to", target_x, target_y)
        small_dot(canvas, target_x, target_y)
        current_target = [target_x, target_y]
    elif randomise_dot:
        current_target = random_dot(canvas, tk_width, tk_height)
    # print(random_width, random_height)
    
    return model, current_target

def train_retrospectively(path_to_images, model):
    
    # Build data frame of past images, and the extract features
    # For any non-small neural network, I should replace this technique with a generator
    
    training_X = []
    training_y = []
    counter = 0
    path_to_images = "captures_one/"
    
    # Currently only looks in a single directory
    files = os.listdir(path_to_images)
    
    for file in files:
        print("About to process image number ", counter)
        image = cv2.imread(path_to_images + file)
        rgb_frame, everything_array, landmark_array, eyes_and_gradients = extract_facial_features(image)
        coordinates = [float(coordinate) for coordinate in file[1: -5].split(" ") if len(coordinate) != 0]
        
        training_X.append(eyes_and_gradients)
        training_y.append(coordinates)
        
        counter += 1
                       
    return training_X, training_y

In [3]:
# Functions that leverage the above to do something useful
def train_and_preview(pretrained_model=None):
    ########## Universal Initialisation ##########
    counter = 0
    captures_per_point = 5
    
    ########## Initialise Video Stream ##########
    video_capture = cv2.VideoCapture(0)
    
    # Extract webcam resolution
    ret, frame = video_capture.read()
    webcam_resolution = frame.shape[:2]
    # print(webcam_resolution) 
    
    ########## Initialise ML Model ##########
    
    # Dummy sample, to help initialising models
    (rgb_frame, dummy_features, 
     landmark_array, eyes_and_gradients) = extract_facial_features(frame)
    
    model_type = "neural net"
    
    if pretrained_model:
        model = pretrained_model
    elif model_type == "random forest":
        # Random forest 
        RF = RandomForestRegressor(n_estimators=500, n_jobs=-1, warm_start=False)
        model = MultiOutputRegressor(RF)
        model.fit(np.zeros_like(dummy_features), np.array([0.5, 0.5]).reshape(1, -1))
    elif model_type == "neural net":
        model = neural_model(eyes_and_gradients)
        model.summary()
        
    # To do:Train on existing pictures
    
    # Initialise
    training_X = []
    training_y = []
    
    ########## Initialise Tkinter ##########
    window = Tk()
    window.attributes("-fullscreen", True)
    
    window.update_idletasks() 
    tk_width = window.winfo_width() 
    tk_height = window.winfo_height()

    canvas = Canvas(window, width = tk_width, height = tk_height)
    canvas.pack()
    
    window.bind("<F11>", lambda event: window.attributes("-fullscreen",
                                        not window.attributes("-fullscreen")))
    window.bind("<Escape>", lambda event: window.attributes("-fullscreen", False))
    # window.bind("c", lambda event: capture(canvas, RFMO, tk_width, tk_height, video_capture, webcam_resolution, landmark_array, current_target, predicted_gaze))
    
    # Variables to store red dot target
    current_target = random_dot(canvas, tk_width, tk_height)
    
    while True:
        
        rgb_frame, landmark_array, eyes_and_gradients, predicted_gaze = predict_gaze(
            video_capture, webcam_resolution, tk_width, tk_height, model, model_type, canvas)
        
        if counter % 4 == 0 and counter != 0:
            canvas.delete("all")
            
            RFMO, current_target = capture(
                counter, canvas, model, model_type, training_X, training_y, tk_width, tk_height, video_capture, 
                rgb_frame, webcam_resolution, landmark_array, eyes_and_gradients, 
                current_target, predicted_gaze, randomise_dot=True)
                
        counter += 1
        
        # Update GUI
        window.update_idletasks()
        window.update()
    return

In [7]:
train_and_preview()

About to initialise a neural network with input shape:  (20, 30, 6)
Model: "model_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 20, 30, 6)         0         
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 18, 28, 4)         220       
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 16, 26, 4)         148       
_________________________________________________________________
conv2d_9 (Conv2D)            (None, 8, 13, 8)          40        
_________________________________________________________________
conv2d_10 (Conv2D)           (None, 6, 11, 8)          584       
_________________________________________________________________
conv2d_11 (Conv2D)           (None, 4, 9, 8)           584       
_________________________________________________________

Predicted gaze is:  [0.69399506 0.8271124 ]
Predicted gaze is:  [0.78751427 0.342043  ]
Predicted gaze is:  [0.7872325  0.10718215]
Predicted gaze is:  [0.35229227 0.27243385]
Epoch 1/1
Predicted gaze is:  [ 0.6971316  -0.01190475]
Predicted gaze is:  [0.7237834 0.2505372]
Predicted gaze is:  [0.64974403 0.14743295]
Predicted gaze is:  [0.58120376 0.22431116]
Epoch 1/1
Predicted gaze is:  [0.8268204  0.10877416]
Predicted gaze is:  [0.536121  0.2846065]
Predicted gaze is:  [1.0454475  0.22861853]
Predicted gaze is:  [0.579589  0.3486818]
Epoch 1/1
Predicted gaze is:  [1.041026   0.14152183]
Predicted gaze is:  [1.0641453  0.19466554]
Predicted gaze is:  [1.2099812  0.61792886]
Predicted gaze is:  [1.1858127 0.5499597]
Epoch 1/1
Predicted gaze is:  [1.2664746 0.6379895]
Predicted gaze is:  [1.1314116  0.39706242]
Predicted gaze is:  [1.015637  0.6972949]
Predicted gaze is:  [1.3041911  0.34093496]
Epoch 1/1
Predicted gaze is:  [1.1894969  0.46078092]
Predicted gaze is:  [0.9403878  0.44

Predicted gaze is:  [0.30435807 0.6781568 ]
Predicted gaze is:  [0.8952108  0.72415346]
Predicted gaze is:  [0.9884488  0.48383254]
Predicted gaze is:  [0.8742567  0.51994723]
Epoch 1/1
Predicted gaze is:  [1.0513551 0.7267562]
Predicted gaze is:  [0.26968402 0.39058465]
Predicted gaze is:  [0.380792   0.24444549]
Predicted gaze is:  [0.35956347 0.3143505 ]
Epoch 1/1
Predicted gaze is:  [0.37509045 0.23279758]
Predicted gaze is:  [0.24428532 0.800131  ]
Predicted gaze is:  [0.3129493 0.8566703]
Predicted gaze is:  [0.291754   0.93510514]
Epoch 1/1
Predicted gaze is:  [0.22512075 0.8690362 ]
Predicted gaze is:  [0.85121655 1.2415218 ]
Predicted gaze is:  [0.8233978 1.1520666]
Predicted gaze is:  [0.9004023 1.1487156]
Epoch 1/1
Predicted gaze is:  [0.82535243 1.0153704 ]
Predicted gaze is:  [0.23829778 0.49687544]
Predicted gaze is:  [0.11484661 0.18634906]
Predicted gaze is:  [0.03060838 0.42644933]
Epoch 1/1
Predicted gaze is:  [-0.00228725  0.22261113]
Predicted gaze is:  [0.19431198 

Predicted gaze is:  [0.8354603  0.06845733]
Predicted gaze is:  [0.37175605 0.20150648]
Predicted gaze is:  [0.17313598 0.11829602]
Predicted gaze is:  [0.19045147 0.12670937]
Epoch 1/1
Predicted gaze is:  [0.17547315 0.12425093]
Predicted gaze is:  [0.58953166 0.22489803]
Predicted gaze is:  [0.47406992 0.2852406 ]
Predicted gaze is:  [0.40885657 0.22708304]
Epoch 1/1
Predicted gaze is:  [0.67889905 0.3247522 ]
Predicted gaze is:  [0.631534   0.09231906]
Predicted gaze is:  [0.822276   0.18196492]
Predicted gaze is:  [0.6403891  0.17164522]
Epoch 1/1
Predicted gaze is:  [0.656794 0.135689]
Predicted gaze is:  [0.5424661  0.31627473]
Predicted gaze is:  [0.47340247 0.45904654]
Predicted gaze is:  [0.5601225  0.44454053]
Epoch 1/1
Predicted gaze is:  [0.68973416 0.42387646]
Predicted gaze is:  [0.07702402 0.32755363]
Predicted gaze is:  [0.3032613  0.27610993]
Predicted gaze is:  [0.11621972 0.21635261]
Epoch 1/1
Predicted gaze is:  [0.11695537 0.40434307]
Predicted gaze is:  [0.5315258

Predicted gaze is:  [1.125382   0.16976155]
Predicted gaze is:  [0.60761625 0.8772748 ]
Predicted gaze is:  [0.07074695 0.30845678]
Predicted gaze is:  [0.07180857 0.308044  ]
Epoch 1/1
Predicted gaze is:  [0.1449106  0.24423651]
Predicted gaze is:  [0.40701535 0.17145129]
Predicted gaze is:  [0.35048598 0.35075653]
Predicted gaze is:  [0.2731603  0.47729886]
Epoch 1/1
Predicted gaze is:  [0.2733941  0.43772742]
Predicted gaze is:  [0.63895786 0.40022764]
Predicted gaze is:  [0.7707744  0.26379168]
Predicted gaze is:  [0.8605849  0.11595824]
Epoch 1/1
Predicted gaze is:  [0.84211934 0.3695874 ]
Predicted gaze is:  [0.40965706 0.58180046]
Predicted gaze is:  [0.5469094  0.32760248]
Predicted gaze is:  [0.6026148 0.1263982]
Epoch 1/1
Predicted gaze is:  [0.51793545 0.13532849]
Predicted gaze is:  [-0.08200487  0.3985481 ]
Predicted gaze is:  [-0.04066393  0.33768606]
Predicted gaze is:  [0.0458844  0.23915312]
Epoch 1/1
Predicted gaze is:  [-0.00538557  0.41951767]
Predicted gaze is:  [0

Predicted gaze is:  [ 0.5603129  -0.07654276]
Predicted gaze is:  [0.5710977 0.1988803]
Predicted gaze is:  [1.0700082 0.640306 ]
Predicted gaze is:  [0.8485418 0.8024216]
Epoch 1/1
Predicted gaze is:  [0.8733451  0.50428253]
Predicted gaze is:  [0.41651592 1.2911384 ]
Predicted gaze is:  [0.25333112 1.0688143 ]
Predicted gaze is:  [0.30048472 1.1487402 ]
Epoch 1/1
Predicted gaze is:  [0.3267159 0.9123354]
Predicted gaze is:  [0.75372857 0.6851219 ]
Predicted gaze is:  [0.84797394 0.29518607]
Predicted gaze is:  [0.84544593 0.32899237]
Epoch 1/1
Predicted gaze is:  [0.80580896 0.41139364]
Predicted gaze is:  [0.47952998 0.8121802 ]
Predicted gaze is:  [0.5676166 0.9445092]
Predicted gaze is:  [0.6300428  0.99896353]
Epoch 1/1
Predicted gaze is:  [0.57618237 0.94487303]
Predicted gaze is:  [0.16944781 0.52023005]
Predicted gaze is:  [0.3522905  0.13574757]
Predicted gaze is:  [0.37020677 0.19413686]
Epoch 1/1
Predicted gaze is:  [0.33517087 0.18776144]
Predicted gaze is:  [0.6722499  0.

Predicted gaze is:  [0.51703095 0.55674237]
Predicted gaze is:  [0.93787795 0.7538744 ]
Predicted gaze is:  [0.89700305 0.8550027 ]
Predicted gaze is:  [0.7832952 0.7163778]
Epoch 1/1
Predicted gaze is:  [0.8116472  0.78500074]
Predicted gaze is:  [0.12834154 0.55586934]
Predicted gaze is:  [0.08609934 0.37144378]
Predicted gaze is:  [0.27630997 0.29540676]
Epoch 1/1
Predicted gaze is:  [0.16691901 0.30433998]
Predicted gaze is:  [0.1465389 0.3870296]
Predicted gaze is:  [0.23763865 0.59089744]
Predicted gaze is:  [0.17599887 0.58358544]
Epoch 1/1
Predicted gaze is:  [0.25790375 0.650638  ]
Predicted gaze is:  [0.9312325 0.6251978]
Predicted gaze is:  [1.0018016 0.2531385]
Predicted gaze is:  [0.8146784  0.20034133]
Epoch 1/1
Predicted gaze is:  [0.90356547 0.21901476]
Predicted gaze is:  [0.14881614 0.1921566 ]
Predicted gaze is:  [0.05468836 0.21619198]
Predicted gaze is:  [0.08217588 0.24576306]
Epoch 1/1
Predicted gaze is:  [0.21010096 0.14703234]
Predicted gaze is:  [0.6780312 0.8

Predicted gaze is:  [0.7383797  0.42895392]
Predicted gaze is:  [0.11812145 0.7833307 ]
Predicted gaze is:  [-0.01243074  0.7677792 ]
Predicted gaze is:  [0.05844104 0.9182324 ]
Epoch 1/1
Predicted gaze is:  [0.06333888 0.7912607 ]
Predicted gaze is:  [0.64952   0.7346037]
Predicted gaze is:  [0.5936166  0.50462884]
Predicted gaze is:  [0.83339953 0.54493845]
Epoch 1/1
Predicted gaze is:  [0.8822703 0.5372952]
Predicted gaze is:  [0.19136721 0.6900943 ]
Predicted gaze is:  [0.06716993 0.5097982 ]
Predicted gaze is:  [0.04632788 0.33721328]
Epoch 1/1
Predicted gaze is:  [-0.01840136  0.5223675 ]
Predicted gaze is:  [0.6277616 0.5006704]
Predicted gaze is:  [0.7115289  0.42917556]
Predicted gaze is:  [0.71815884 0.4431677 ]
Epoch 1/1
Predicted gaze is:  [0.6806491  0.41790825]
Predicted gaze is:  [0.7083171  0.47286332]
Predicted gaze is:  [0.5600294  0.41976112]
Predicted gaze is:  [0.5507795 0.5213867]
Epoch 1/1
Predicted gaze is:  [0.53387135 0.49815434]
Predicted gaze is:  [0.8591072

Predicted gaze is:  [0.539902   0.23973407]
Predicted gaze is:  [0.57525796 0.34745002]
Predicted gaze is:  [0.47204196 0.38899013]
Predicted gaze is:  [0.5037766  0.41159868]
Epoch 1/1
Predicted gaze is:  [0.39464438 0.46231896]
Predicted gaze is:  [0.79333156 0.683715  ]
Predicted gaze is:  [0.87373084 0.776492  ]
Predicted gaze is:  [0.75967836 0.68792975]
Epoch 1/1
Predicted gaze is:  [0.7691001 0.8872677]
Predicted gaze is:  [0.2258546 0.6231768]
Predicted gaze is:  [0.27521634 0.5304794 ]
Predicted gaze is:  [0.30815935 0.5224753 ]
Epoch 1/1
Predicted gaze is:  [0.22627306 0.61471254]
Predicted gaze is:  [0.31662118 0.5321416 ]
Predicted gaze is:  [0.44412947 0.60079503]
Predicted gaze is:  [0.3452211  0.46804157]
Epoch 1/1
Predicted gaze is:  [0.35930896 0.54980737]
Predicted gaze is:  [0.5189573  0.30762863]
Predicted gaze is:  [0.59793794 0.13624002]
Predicted gaze is:  [0.47208297 0.2608291 ]
Epoch 1/1
Predicted gaze is:  [0.47074863 0.3356313 ]
Predicted gaze is:  [0.3245384

TclError: invalid command name ".!canvas"

In [None]:
video_capture = cv2.VideoCapture(0)
RF = RandomForestRegressor(n_estimators=100, n_jobs=-1, warm_start=True)
RFMO = MultiOutputRegressor(RF)
RFMO.fit(np.zeros_like(extract_facial_features(video_capture)), np.array([0, 0]).reshape(1, -1))
# RFMO.predict(np.array([1,1,1]).reshape(1, -1))
# RFMO.fit(np.array([1,1,1]).reshape(1, -1), np.array([1, 0]).reshape(1, -1))

In [69]:
class ScreenshotGenerator(keras.utils.Sequence):
    
    def __init__(self, path_to_images, batch_size=4):
        
        self.path_to_images = path_to_images
        self.batch_size = batch_size
    
        self.files = []# os.listdir(path_to_images)
        self.filenames = []
        
        for root, dirs, files in os.walk(path_to_images):
            for name in files:
                self.files.append(os.path.join(root, name))
                self.filenames.append(name)
    
    def __len__(self):
        
        return len(self.files) // self.batch_size
    
    def __load__(self, index):
        """Returns and processes a single sample, in conjunction with __getitem__"""
        
        # Ensures that if an image is picked without a succesfully detected face, 
        #  it looks for another random one to replace it
        got_good_image = False
        
        while not got_good_image:
        
            file = self.files[index]
            filename = self.filenames[index]
                        
            image = cv2.imread(file)
            
            rgb_frame, everything_array, landmark_array, eyes_and_gradients = extract_facial_features(image)
            coordinates = [float(coordinate) for coordinate in filename[1: -5].split(" ") if len(coordinate) != 0]
            
            X = eyes_and_gradients
            y = coordinates
            
            if len(X) == 0:
                print("This image did not have a recognisable face, will pull a random one in its place")
                index = random.randint(0, self.__len__())
            else:
                got_good_image = True
        
        return X, y
    
    def __getitem__(self, batch):
        
        batch_X = [self.__load__(index)[0] for index in 
                   range((batch * self.batch_size), (batch + 1) * self.batch_size)]
        batch_y = [self.__load__(index)[1] for index in 
                   range((batch * self.batch_size), (batch + 1) * self.batch_size)]
        
        batch_X = np.array(batch_X)
        batch_y = np.array(batch_y)
        
        return batch_X, batch_y

    def on_epoch_end(self):
        
        return

In [39]:
video_capture = cv2.VideoCapture(0)
_, frame = video_capture.read()
video_capture.release()

In [40]:
(rgb_frame, dummy_features, 
     landmark_array, eyes_and_gradients) = extract_facial_features(frame)

In [41]:
model = neural_model(eyes_and_gradients)
#model.summary()

About to initialise a neural network with input shape:  (20, 30, 6)


In [71]:
screenshot_generator = ScreenshotGenerator("eye_tracking_data/", 4)
model.fit_generator(screenshot_generator, epochs=4)

Epoch 1/4
 860/5687 [===>..........................] - ETA: 2:57:40 - loss: 0.0200

KeyboardInterrupt: 