# Model_based gaze estimation

In [1]:
import torch
import numpy as np
import os
import json

from src.datasources.unityeyes import UnityEyesDataset
import src.models.gaze_modelbased as GM
import src.utils.gaze as gaze_util

In [2]:
elg_model = torch.load('./models/v0.2/model-v0.2-(36, 60)-epoch-89-loss-0.7151.pth')
elg_model.eval()

ELG(
  (convPre): Conv(
    (conv): Conv2d(1, 32, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
    (batchNorm2d): BatchNorm2d(1, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (residualBlockPre1): ResidualBlock(
    (convLow1): Conv(
      (conv): Conv2d(32, 32, kernel_size=(1, 1), stride=(1, 1))
      (batchNorm2d): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (convLow2): Conv(
      (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (batchNorm2d): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (convLow3): Conv(
      (conv): Conv2d(32, 64, kernel_size=(1, 1), stride=(1, 1))
      (batchNorm2d): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (convUp): Conv(
      (conv): Conv2d(32, 64, kernel_size=(1, 1), stride=(1, 1))
      (batchNorm2d): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track

In [3]:
val_root = "E:/Datasets/UnityEyes_Windows/800x600/val"
val_dataset = UnityEyesDataset(val_root, eye_image_shape=(36, 60), generate_heatmaps=True, random_difficulty=False)

In [4]:
import torch.utils.data as D
val_dataloader = D.DataLoader(val_dataset, batch_size=1)

In [11]:
file_stem = val_dataset.file_stems[1]
json_path = os.path.join(val_root, f'{file_stem}.json')
with open(json_path, 'r') as f:
    json_data = json.load(f)
look_vec = np.array(eval(json_data['eye_details']['look_vec']))[:3]

print("look_vec:", look_vec)

# Change the direction of x-axis to convenient the calculation of (pitch, yaw)
# In image coordinate system
look_vec[0] = -look_vec[0]
gaze = gaze_util.vector_to_pitchyaw(look_vec.reshape((1, 3))).flatten()
# Rotate coord system respect to x-axis
if gaze[1] > 0.0:
    gaze[1] = np.pi - gaze[1]
elif gaze[1] < 0.0:
    gaze[1] = -(np.pi + gaze[1])
gaze = gaze.astype(np.float32)
print("gaze:", gaze)

look_vec: [-0.6073  0.2457 -0.7555]
gaze: [0.24824658 0.6770771 ]


In [10]:
entry = next(val_dataset.entry_generator())

look_vec: [-0.6073  0.2457 -0.7555]
gaze: [0.24824658 0.67707712]


In [6]:
entry = next(iter(val_dataloader))

[-0.4474  0.3418 -0.8264]
[ 0.34884414 -0.49620443]


In [6]:
eye_input = entry['eye'].cuda()

In [7]:
heatmaps_label = entry['heatmaps'].cuda()
landmarks_label = entry['landmarks'].cuda()
radius_label = entry['radius'].cuda()
gaze_label = entry['gaze'].cuda()

In [8]:
heatmaps_predict, ldmks_predict, radius_predict = elg_model(eye_input)

In [9]:
ldmks_predict - landmarks_label

tensor([[[-0.0385,  0.0567],
         [ 0.1204, -0.4359],
         [-0.8040, -0.0328],
         [-0.1560, -0.3720],
         [ 0.2353, -0.0252],
         [-0.0995, -0.2216],
         [-0.4782,  0.1631],
         [-0.5874,  0.0181],
         [-0.0424, -0.4859],
         [ 0.2518, -0.3518],
         [ 0.2509, -0.1021],
         [ 0.2502,  0.0216],
         [ 0.0250,  0.2360],
         [ 0.0209,  0.2355],
         [-0.1944,  0.1314],
         [-0.1389, -0.2617],
         [-0.2212, -0.1346],
         [-0.4763,  0.1384]]], device='cuda:0', grad_fn=<SubBackward0>)

In [10]:
ldmks = ldmks_predict.cpu().detach().numpy()

In [11]:
iris_ldmks = np.array(ldmks[0][0:8])

In [12]:
iris_ldmks

array([[13.23596  , 20.954514 ],
       [14.5897255, 12.914356 ],
       [26.481472 ,  8.350813 ],
       [40.672108 , 14.19507  ],
       [47.656075 , 20.519577 ],
       [40.492092 , 24.457382 ],
       [26.573088 , 27.282785 ],
       [16.16451  , 24.81226  ]], dtype=float32)

In [13]:
iris_center = np.array(ldmks[0][-2])

In [14]:
iris_center

array([23.062601, 13.701266], dtype=float32)

In [15]:
eyeball_center = np.array(ldmks[0][-1])

In [16]:
eyeball_center

array([32.609627, 17.941515], dtype=float32)

In [17]:
eyeball_radius = radius_predict.cpu().detach().numpy()[0]

In [18]:
radius_predict - torch.unsqueeze(radius_label, dim=0)

tensor([[-0.3562]], device='cuda:0', grad_fn=<SubBackward0>)

In [19]:
gaze_predict = GM.estimate_gaze_from_landmarks(iris_ldmks, iris_center, eyeball_center, eyeball_radius)
predict = gaze_predict.reshape(1, 2)
predict

  result = scipy.optimize.minimize(gaze_fit_loss_func, x0=np.array([theta, phi, delta, phase]),


array([[0.2719683, 0.6788541]], dtype=float32)

In [20]:
label = gaze_label.cpu().detach().numpy()
label

array([[0.24832767, 0.67705154]], dtype=float32)

In [21]:
gaze_util.angular_error(predict, label)

array([1.35818017])