# Import Libraries

In [None]:
from google.colab import drive
drive.mount('/content/drive')

%cd './drive/MyDrive/gaze_estimation'

In [None]:
import os
import pandas as pd
import numpy as np
import cv2
import random

from tqdm.auto import tqdm
from glob import glob
from scipy.io import loadmat


In [None]:
def convert_pose(vector: np.ndarray) -> np.ndarray:
    rot = cv2.Rodrigues(np.array(vector).astype(np.float32))[0]
    vec = rot[:, 2]
    pitch = np.arcsin(vec[1])
    yaw = np.arctan2(vec[0], vec[2])
    return np.array([pitch, yaw]).astype(np.float32)


def convert_gaze(vector: np.ndarray) -> np.ndarray:
    x, y, z = vector / np.linalg.norm(vector)
    pitch = np.arcsin(-y)
    yaw = np.arctan2(-x, -z)
    return np.array([pitch, yaw]).astype(np.float32)

# Data Path

In [None]:
participant_list = ['p00', 'p01', 'p02', 'p03', 'p04', 'p05', 'p06', 'p07', 'p08', 'p09', 'p10', 'p11', 'p12', 'p13', 'p14']
selected_data = []
for i in range(15) :
    subsampled_indices = np.asarray(pd.read_csv(f'../MPIIGaze/Evaluation Subset/sample list for eye image/{participant_list[i]}.txt', delimiter =' ', header=None))
    for j in tqdm(range(3000)) :
        day = subsampled_indices[j][0].split('/')[0]
        image_num = int(subsampled_indices[j][0].split('/')[1].split('.')[0])
        eye_loc = subsampled_indices[j][1]

        mat_data = loadmat(f'../MPIIGaze/Data/Normalized/{participant_list[i]}/{day}.mat', squeeze_me=True, struct_as_record=True)['data'][eye_loc].tolist()

        # image index start at 1
        image = mat_data['image'].tolist()[image_num-1]
        pose = mat_data['pose'].tolist()[image_num-1]
        gaze = mat_data['gaze'].tolist()[image_num-1]

        # preprocess
        # convert hp and gaze from 3d to 2d
        pose = convert_pose(pose)
        gaze = convert_gaze(gaze)

        # flip image/head pose/gaze if right eye
        if eye_loc == 'right':
            image = image[:, ::-1]
            pose *= np.array([1, -1])
            gaze *= np.array([1, -1])

        # data_list = [participant_list[i], day, eye_loc, image, pose, gaze]
        selected_data.append([participant_list[i], day, eye_loc, image, pose, gaze])



In [None]:
data_df = pd.DataFrame(columns=["participant_id","day","eye_location","image","head_pose","gaze"], data=selected_data)
data_df

In [None]:
save_path = "../mpii_dataset"
if os.path.isdir(save_path) == False:
    os.makedirs(save_path)
# data_df.to_parquet(os.path.join(save_path, "mpii_preprocessed_dataset.parquet"), engine='pyarrow', index=False)

In [None]:
id_vector = np.asarray(data_df["participant_id"].to_list())
images = np.stack(data_df["image"].to_list())
head_poses = np.stack(data_df["head_pose"].to_list())
gazes = np.stack(data_df["gaze"].to_list())

In [None]:
np.save('../mpii_dataset/full_ids', id_vector)
np.save('../mpii_dataset/full_images', images)
np.save('../mpii_dataset/full_2d_hps', head_poses)
np.save('../mpii_dataset/full_2d_gazes', gazes)