In [None]:
from __future__ import print_function, division

import glob
#import os
#import cv2
import scipy.io as sio

import torchvision.transforms.functional as TF
from torchvision import transforms

class MPI_Preprocess(nn.Module):

    def __init__(self):
      super().__init__()
      self.norm = transforms.Normalize([0.5], [0.5])

    @torch.no_grad()  # disable gradients for effiency
    def forward(self, x):

        img = TF.to_tensor(x.copy()) # CxHxW
        #print("preproccess")
        #print(torch.max(x_out))
        #print(torch.min(x_out))
        img = img.float()

        x, y = (96, 160)
        x = int((256/224) * x)
        y = int((256/224) * y)
        output_size = (x, y)
        img = TF.resize(img, output_size)


        h, w = img.shape[-2:]
        new_h, new_w = (96, 160)

        top =  int((h - new_h)/2)
        left =  int((w - new_w)/2)

        img = img[:, top: top + new_h, left: left + new_w]

        img = self.norm(img)

        return img

#"./gazeset/imgs"

class MPIIGaze(Dataset):

    def __init__(self, mpii_dir: str = './mpi/MPIIGaze'):

        self.mpii_dir = mpii_dir

        eval_files = glob.glob(f'{mpii_dir}/Evaluation Subset/sample list for eye image/*.txt')

        self.trans = MPI_Preprocess()

        self.eval_entries = []
        for ef in eval_files:
            person = os.path.splitext(os.path.basename(ef))[0]
            with open(ef) as f:
                lines = f.readlines()
                for line in lines:
                    line = line.strip()
                    if line != '':
                        img_path, side = [x.strip() for x in line.split()]
                        day, img = img_path.split('/')
                        self.eval_entries.append({
                            'day': day,
                            'img_name': img,
                            'person': person,
                            'side': side
                        })

    def __len__(self):
        return len(self.eval_entries)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        return self._load_sample(idx)

    def _load_sample(self, i):
        entry = self.eval_entries[i]
        mat_path = os.path.join(self.mpii_dir, 'Data/Normalized', entry['person'], entry['day'] + '.mat')
        mat = sio.loadmat(mat_path)

        filenames = mat['filenames']
        row = np.argwhere(filenames == entry['img_name'])[0][0]
        side = entry['side']

        img = mat['data'][side][0, 0]['image'][0, 0][row]

        if side == 'right':
            img = np.fliplr(img)
        
        img = self.trans(img)

        (x, y, z) = mat['data'][side][0, 0]['gaze'][0, 0][row]

        theta = np.arcsin(-y)
        phi = np.arctan2(-x, -z)
        gaze = np.array([-theta, phi])

        return {
            'img': img,
            'gaze': gaze,
            'side': side,
            'name' : mat_path + entry['img_name'] + entry['side']
        }

In [None]:
def pitchyaw_to_vector(pitchyaws):
    r"""Convert given yaw (:math:`\theta`) and pitch (:math:`\phi`) angles to unit gaze vectors.
    Args:
        pitchyaws (:obj:`numpy.array`): yaw and pitch angles :math:`(n\times 2)` in radians.
    Returns:
        :obj:`numpy.array` of shape :math:`(n\times 3)` with 3D vectors per row.
    """
    n = pitchyaws.shape[0]
    sin = np.sin(pitchyaws)
    cos = np.cos(pitchyaws)
    out = np.empty((n, 3))
    out[:, 0] = np.multiply(cos[:, 0], sin[:, 1])
    out[:, 1] = sin[:, 0]
    out[:, 2] = np.multiply(cos[:, 0], cos[:, 1])
    return out

In [None]:
radians_to_degrees = 180.0 / np.pi


def angularError(a, b):
    """Calculate angular error (via cosine similarity)."""
    a = pitchyaw_to_vector(a) if a.shape[1] == 2 else a
    b = pitchyaw_to_vector(b) if b.shape[1] == 2 else b

    ab = np.sum(np.multiply(a, b), axis=1)
    a_norm = np.linalg.norm(a, axis=1)
    b_norm = np.linalg.norm(b, axis=1)

    # Avoid zero-values (to avoid NaNs)
    a_norm = np.clip(a_norm, a_min=1e-7, a_max=None)
    b_norm = np.clip(b_norm, a_min=1e-7, a_max=None)

    similarity = np.divide(ab, np.multiply(a_norm, b_norm))

    return np.arccos(similarity) * radians_to_degrees

In [None]:
datasetm = MPIIGaze()

print('N', len(datasetm))
for i, sample in enumerate(datasetm):
  print(sample['name'])
  img = sample['img'][None].to(device)
  break

plt.imshow(img[0][0])

In [None]:
def pitchyaw_to_vector(pitchyaws):
    r"""Convert given yaw (:math:`\theta`) and pitch (:math:`\phi`) angles to unit gaze vectors.
    Args:
        pitchyaws (:obj:`numpy.array`): yaw and pitch angles :math:`(n\times 2)` in radians.
    Returns:
        :obj:`numpy.array` of shape :math:`(n\times 3)` with 3D vectors per row.
    """
    n = pitchyaws.shape[0]
    sin = torch.sin(pitchyaws)
    cos = torch.cos(pitchyaws)
    out = torch.empty((n, 3))
    out[:, 0] = torch.mul(cos[:, 0], sin[:, 1])
    out[:, 1] = sin[:, 0]
    out[:, 2] = torch.mul(cos[:, 0], cos[:, 1])
    return out

radians_to_degrees = 180.0 / np.pi


def angularError(a, b):
    """Calculate angular error (via cosine similarity)."""
    a = pitchyaw_to_vector(a) if a.shape[1] == 2 else a
    b = pitchyaw_to_vector(b) if b.shape[1] == 2 else b

    ab = torch.sum(torch.mul(a, b), dim=1)
    a_norm = torch.linalg.norm(a, dim=1)
    b_norm = torch.linalg.norm(b, dim=1)

    # Avoid zero-values (to avoid NaNs)
    a_norm = torch.clamp(a_norm, min=1e-7,)
    b_norm = torch.clamp(b_norm, min=1e-7,)

    similarity = torch.div(ab, torch.mul(a_norm, b_norm))

    return torch.acos(similarity) * radians_to_degrees

In [None]:
load_name = "/content/drive/MyDrive/eye_w/weights/trans_2_3e4_att_256_1learnparam_noNorm_land_alt_MdataN-Step-Checkpoint_30_62160.ckpt"

proj_a = Gaze_Track_pl.load_from_checkpoint(load_name)

proj_a.freeze()
proj_a.eval()
#proj_a.to('cuda')

In [None]:
#from util.preprocess import gaussian_2d
#from matplotlib import pyplot as plt
#import util.gaze

#from eye.gaze_track.utils import draw_gaze
#import matplotlib.pyplot as plt


datasetm = MPIIGaze()

eyenet = proj_a.to(device)

errors = []

biggest_errors = [0] * 20
index_of_big_error = [0] * 20

random_indexes = sorted(
    np.random.choice(len(datasetm), size=20, replace=False)
)

print('N', len(datasetm))
for i, sample in enumerate(datasetm):
  img = sample['img'][None].to(device)

  gaze_pred2, heatmaps_pred2, landmarks_pred2 = eyenet.forward(img)

  gaze2 = torch.from_numpy(sample['gaze'].reshape((1, 2)))
  #gaze_pred2 = np.asarray(gaze_pred2.cpu().numpy())

  if sample['side'] == 'right':
      gaze_pred2[:, 1] = -gaze_pred2[:, 1]

  angular_error = angularError(gaze2, gaze_pred2)

  for j, errorval in enumerate(biggest_errors):
    if angular_error > errorval:
      biggest_errors[j] = angular_error

      sorted_indexes = sorted(range(len(biggest_errors)),key=biggest_errors.__getitem__)
      biggest_errors = sorted(biggest_errors)

      index_of_big_error[sorted_indexes.index(j)] = i

      break
 

  errors.append(angular_error)

  """
  gaze_pred2 = gaze_pred2[0]


  landmarks_pred2 = proj_a.tranform_into_actual_coor(landmarks_pred2)
  landmarks_pred2 = landmarks_pred2[0].numpy()#.detach().numpy()


  eye_img = np.copy(img[0])
  eye_img = eye_img.transpose(1, 2, 0)
  eye_img = cv2.merge((eye_img,eye_img,eye_img))

  eye_img = cv2.normalize(eye_img, None, alpha = 0, beta = 255, norm_type = cv2.NORM_MINMAX, dtype = cv2.CV_32F)
  eye_img = eye_img.astype(np.uint8)

  landmarks_pred2[:, [1, 0]] = landmarks_pred2[:, [0, 1]]

  for (x, y) in landmarks_pred2[0:]:
      eye_img = cv2.circle(eye_img, (int(x), int(y)), 1, color=(0, 255, 0), thickness=3)
    
  eye_img = draw_gaze(eye_img, landmarks_pred2[-1], gaze_pred2, color=(255, 0, 0))
  
  plt.imshow(eye_img)
  """

  if i%1000 == 0:
    print('---', i)
    print('error', angular_error)
    print('mean error', torch.mean(torch.stack(errors)))
    print('side', sample['side'])
    print('gaze', gaze2)
    print('gaze pred', gaze_pred2)


In [None]:
print('mean error', torch.mean(torch.stack(errors)))

In [None]:
w = 25
h = 25
fig = plt.figure(figsize=(24, 24))
columns = 5
rows = 4

i = 0

for index in random_indexes:

  img = dataset[index]['img'][None]#.to(device)
  gaze_pred, heatmaps_pred, landmarks_pred = eyenet.forward(img)
  gaze = dataset[index]['gaze'].reshape((1, 2))
  gaze_pred = np.asarray(gaze_pred.numpy()) # .cpu()  

  if sample['side'] == 'right':
    print("right")
    gaze_pred[0, 1] = -gaze_pred[0, 1]
  else:
    print("left")

  angular_error = angularError(gaze, gaze_pred)
  
  gaze_pred = gaze_pred[0]
  gaze = gaze[0]

  landmarks_pred = landmarks_pred.numpy()

  landmarks_pred = proj_a.tranform_into_actual_coor(landmarks_pred)
  landmarks_pred = landmarks_pred[0]

  eye_img = np.copy(img[0])
  eye_img = eye_img.transpose(1, 2, 0)
  eye_img = cv2.merge((eye_img,eye_img,eye_img))

  eye_img = cv2.normalize(eye_img, None, alpha = 0, beta = 255, norm_type = cv2.NORM_MINMAX, dtype = cv2.CV_32F)
  eye_img = eye_img.astype(np.uint8)

  landmarks_pred[:, [1, 0]] = landmarks_pred[:, [0, 1]]

  for (x, y) in landmarks_pred[0:]:
    eye_img = cv2.circle(eye_img, (int(x), int(y)), 1, color=(0, 255, 0), thickness=3)
    
  eye_img = draw_gaze(eye_img, landmarks_pred[-1], gaze_pred, color=(255, 0, 0))

  eye_img = draw_gaze(eye_img, landmarks_pred[-1], gaze, color=(0, 255, 0))

  print(index, angular_error)

  fig.add_subplot(rows, columns, i+1)
  plt.imshow(eye_img)

  i+=1


plt.show()

In [None]:
#
w = 25
h = 25
fig = plt.figure(figsize=(20, 20))
columns = 5
rows = 2
for i in range(0, columns*rows):

    print("before norm")
    print(i, xx['img'][i].size(), torch.min(xx['img'][i]), torch.max(xx['img'][i]))
    
    xx['img'][i] = dd(xx['img'][i][None].to('cuda'), 1)
    xx['img'][i] = xx['img'][i].to('cpu')[0]

    print("after norm")
    print(i, xx['img'][i].size(), torch.min(xx['img'][i]), torch.max(xx['img'][i]))

    gaze = xx['gaze'][i].detach().numpy()
    image = xx['img'][i].detach().numpy()
    landmarks = np.copy(xx['landmarks'][i].detach().numpy())

    eye_img = np.copy(image)
    eye_img = eye_img.transpose(1, 2, 0)
    eye_img = cv2.merge((eye_img,eye_img,eye_img))

    eye_img = cv2.normalize(eye_img, None, alpha = 0, beta = 255, norm_type = cv2.NORM_MINMAX, dtype = cv2.CV_32F)
    eye_img = eye_img.astype(np.uint8)

    landmarks[:, [1, 0]] = landmarks[:, [0, 1]]

    for (x, y) in landmarks[0:]:
        eye_img = cv2.circle(eye_img, (int(x), int(y)), 1, color=(0, 255, 0), thickness=3)
        
    eye_img = draw_gaze(eye_img, landmarks[-1], gaze, color=(255, 0, 0))

    fig.add_subplot(rows, columns, i+1)
    plt.imshow(eye_img)

plt.show()

In [None]:
load_name = "/content/drive/MyDrive/eye_w/weights/trans_2_3e4_att_256_1learnparam_noNorm_land_alt_MdataN-Step-Checkpoint_30_62160.ckpt"

proj_a = Gaze_Track_pl.load_from_checkpoint(load_name)

proj_a.freeze()
proj_a.eval()
#proj_a.to('cuda')

In [None]:
main_path = "./mpi/MPIIGaze"

path = main_path + '/Data/Normalized'

listOfFiles = []
for (dirpath, dirnames, filenames) in os.walk(path):
    listOfFiles += [os.path.join(dirpath, file) for file in filenames]

#full_name = os.path.join(person, day, side, img)

In [None]:
eval_files = glob.glob(main_path + '/Evaluation Subset/sample list for eye image/*.txt')
full_names = read_files_mpi_val(eval_files, path)

In [None]:
images, gazes, images_index_name = process_mpi_files(listOfFiles)

In [None]:
list(images_index_name.keys())[0]

In [None]:
full_names[0]

In [None]:
images_train, gazes_train, images_val, gazes_val = devide_val(images, gazes, images_index_name, full_names)

In [None]:
images_train.shape, images_val.shape, images.shape

In [None]:
plt.imshow(images_val[0])

In [None]:
pre_trained_name_file = "/content/drive/MyDrive/eye_w/weights/trans_2_3e4_att_256_1learnparam_noNorm_land_alt_MdataN-Step-Checkpoint_29_60088.ckpt"
pretrained_model = Gaze_Track_pl.load_from_checkpoint(pre_trained_name_file)

for name, param in pretrained_model.network.named_parameters():
  if 'zero_class_token' in name:
    print(name)
    print(param)
    print(param.shape)
    param[:, 1:].requires_grad = False#.detach()
    print(param)

In [None]:
feature_extcractor.zero_class_token
feature_extcractor.patch_embedding.resize.weight
feature_extcractor.patch_embedding.resize.bias
feature_extcractor.positional_embedding.positional_embedding
feature_extcractor.encoder.layers.0.att.block.att.in_proj_weight
feature_extcractor.encoder.layers.0.att.block.att.in_proj_bias
feature_extcractor.encoder.layers.0.att.block.att.out_proj.weight
feature_extcractor.encoder.layers.0.att.block.att.out_proj.bias
feature_extcractor.encoder.layers.0.layer_norm.weight
feature_extcractor.encoder.layers.0.layer_norm.bias
feature_extcractor.encoder.layers.0.ff.w_1.weight
feature_extcractor.encoder.layers.0.ff.w_1.bias
feature_extcractor.encoder.layers.0.ff.w_2.weight
feature_extcractor.encoder.layers.0.ff.w_2.bias
feature_extcractor.encoder.layers.0.ff.layer_norm.weight
feature_extcractor.encoder.layers.0.ff.layer_norm.bias
feature_extcractor.encoder.layers.1.att.block.att.in_proj_weight
feature_extcractor.encoder.layers.1.att.block.att.in_proj_bias
feature_extcractor.encoder.layers.1.att.block.att.out_proj.weight
feature_extcractor.encoder.layers.1.att.block.att.out_proj.bias
feature_extcractor.encoder.layers.1.layer_norm.weight
feature_extcractor.encoder.layers.1.layer_norm.bias
feature_extcractor.encoder.layers.1.ff.w_1.weight
feature_extcractor.encoder.layers.1.ff.w_1.bias
feature_extcractor.encoder.layers.1.ff.w_2.weight
feature_extcractor.encoder.layers.1.ff.w_2.bias
feature_extcractor.encoder.layers.1.ff.layer_norm.weight
feature_extcractor.encoder.layers.1.ff.layer_norm.bias
feature_extcractor.add_train_land.w_1.weight
feature_extcractor.add_train_land.w_1.bias
feature_extcractor.add_train_land.w_2.weight
feature_extcractor.add_train_land.w_2.bias
feature_extcractor.add_train_land.layer_norm.weight
feature_extcractor.add_train_land.layer_norm.bias
gaze_mlp.0.weight
gaze_mlp.0.bias
gaze_mlp.3.weight
gaze_mlp.3.bias
landmarks_extract.0.weight
landmarks_extract.0.bias
landmarks_extract.3.weight
landmarks_extract.3.bias