# Import Libraries

In [15]:
import os
import pandas as pd
import numpy as np
import cv2
import random
random.seed(43)
import warnings
warnings.filterwarnings("ignore")

from tqdm.auto import tqdm
from glob import glob
from scipy.io import loadmat

# Data Path

In [2]:
data_path = "/Volumes/T7/DKU/GazeEstimation/dataset/MPIIGaze/Data/Normalized"
mat_path = sorted(glob(os.path.join(data_path,"*","*.mat"))) # p1*으로 수집한 데이터 경로 정의
print(f"Find {len(mat_path)} Data") # 전체 mat 파일 수 출력

Find 521 Data


In [19]:
# https://www.mpi-inf.mpg.de/fileadmin/_processed_/1/4/csm_MPIIGaze_filestruture_3caf5d24b3.png

## Mat to Dataframe

In [5]:
def convert_pose(vector: np.ndarray) -> np.ndarray:
    rot = cv2.Rodrigues(np.array(vector).astype(np.float32))[0]
    vec = rot[:, 2]
    pitch = np.arcsin(vec[1])
    yaw = np.arctan2(vec[0], vec[2])
    return np.array([pitch, yaw]).astype(np.float32)


def convert_gaze(vector: np.ndarray) -> np.ndarray:
    x, y, z = vector
    pitch = np.arcsin(-y)
    yaw = np.arctan2(-x, -z)
    return np.array([pitch, yaw]).astype(np.float32)

In [13]:
eye_loc_list = ["right", "left"] # 눈 위치 정의

# mat 파일 1개씩 작업 수행
datas = []
for idx in tqdm(range(len(mat_path))):
    cur_mat_path = mat_path[idx].replace("\\","/")
    par_id = cur_mat_path.split("/")[-2] # 참가자 id
    day_id = cur_mat_path.split("/")[-1].split(".")[0] # 수집 일자
    
    # load mat
    mat_data = loadmat(cur_mat_path, squeeze_me=True, struct_as_record=True)['data']
    # 눈 위치별 데이터 추출
    for loc in eye_loc_list:
        eye_loc_data = mat_data[loc].tolist()
        
        # 이미지 데이터
        image_data = eye_loc_data["image"].tolist()
        if len(image_data.shape) < 3:
            image_data = image_data[np.newaxis,:]
        
        # Head Pose 데이터
        pose_data = eye_loc_data["pose"].tolist()
        if len(pose_data.shape) < 2:
            pose_data = pose_data[np.newaxis,:]
        
        # Gaze 데이터
        gaze_data = eye_loc_data["gaze"].tolist()
        if len(gaze_data.shape) < 2:
            gaze_data = gaze_data[np.newaxis,:]
        
        
        for i in range(len(image_data)):
            if eye_loc_list == "left":
                image = image_data[i]
                pose = convert_pose(pose_data[i])
                gaze = convert_gaze(gaze_data[i])
            else:
                image = image_data[i][:, ::-1]
                pose = convert_pose(pose_data[i]) * np.array([1, -1])
                gaze = convert_gaze(gaze_data[i]) * np.array([1, -1])
            
            data_list = [par_id, day_id, loc, image.ravel(), pose[0], pose[1], gaze[0], gaze[1]]
            datas.append(data_list)

# 리스트에 담아두었던 정보들을 DataFrame으로 생성
data_df = pd.DataFrame(columns=["participant_id","day","eye_location","image","head_pitch","head_yaw","gaze_pitch","gaze_yaw"], data=datas)
data_df.head(3)
data_df = data_df.sort_values(by=['participant_id', 'day']).reset_index(drop=True)

  0%|          | 0/521 [00:00<?, ?it/s]

# Save Total Data

In [14]:
save_path = "./mpii_dataset"
if os.path.isdir(save_path) == False:
    os.makedirs(save_path)
save_file = os.path.join(save_path, "mpii_dataset.parquet")
data_df.to_parquet(save_file, engine='pyarrow', index=False)

# Extract Sampling Data

In [16]:
par_list = sorted(list(set(list(data_df['participant_id'].values))))
right_df = pd.DataFrame()
left_df = pd.DataFrame()

for i in range(len(par_list)):
    par_df = data_df[data_df["participant_id"] == par_list[i]]
    par_right_df = par_df[par_df["eye_location"] == "right"]
    par_left_df = par_df[par_df["eye_location"] == "left"]
    
    par_right_idx_list = list(par_right_df.index)
    par_left_idx_list = list(par_left_df.index)
    
    while True:
        if len(par_right_idx_list) < 1500:
            par_right_idx_list.extend(par_right_idx_list)
        else:
            break
    while True:
        if len(par_left_idx_list) < 1500:
            par_left_idx_list.extend(par_left_idx_list)
        else:
            break
    
    sample_right_idx_list = random.sample(par_right_idx_list, 1500)
    sample_left_idx_list = random.sample(par_left_idx_list, 1500)
    
    sample_right_df = par_right_df.loc[sample_right_idx_list]  # 수정: iloc 대신 loc를 사용해야 함
    sample_left_df = par_left_df.loc[sample_left_idx_list]  # 수정: iloc 대신 loc를 사용해야 함
    
    right_df = pd.concat([right_df, sample_right_df])
    left_df = pd.concat([left_df, sample_left_df])

sampling_dataset = pd.concat([right_df, left_df])
sampling_dataset = sampling_dataset.sort_values(by=['participant_id', 'day']).reset_index(drop=True)

In [18]:
sampling_dataset.tail(3)

Unnamed: 0,participant_id,day,eye_location,image,head_pitch,head_yaw,gaze_pitch,gaze_yaw
44997,p14,day07,left,"[233, 227, 222, 219, 215, 215, 203, 194, 182, ...",0.036259,-0.260125,-0.034857,-0.155305
44998,p14,day07,left,"[151, 144, 109, 104, 115, 109, 104, 99, 109, 1...",0.095564,-0.121666,-0.255056,-0.073477
44999,p14,day07,left,"[147, 155, 143, 128, 120, 114, 106, 102, 84, 8...",0.078855,-0.185464,-0.098753,-0.105999


In [17]:
save_path = "./mpii_dataset"
if os.path.isdir(save_path) == False:
    os.makedirs(save_path)
save_file = os.path.join(save_path, "sampled_mpii_dataset.parquet")
sampling_dataset.to_parquet(save_file, engine='pyarrow', index=False)