In [None]:
from google.colab import drive
drive.mount('/content/drive')

%cd '/content/drive/MyDrive/gaze_estimation'

In [None]:
import copy
import os
import pandas as pd
import numpy as np
import random
from tqdm.auto import tqdm

In [None]:
ut_sub = pd.read_parquet('../ut_dataset/sampled_utm_synth_dataset.parquet', engine='pyarrow')

In [None]:
full_ids = np.asarray(ut_sub['participant_id'].tolist())
full_images = np.stack(ut_sub['image'].tolist()).reshape(-1, 36, 60)
full_hps = np.asarray(ut_sub[['head_pitch', 'head_yaw']])
full_gazes = np.asarray(ut_sub[['gaze_pitch', 'gaze_yaw']])

In [None]:
os.makedirs('../ut_dataset', exist_ok=True)
np.save('../ut_dataset/full_ids', full_ids)
np.save('../ut_dataset/full_images', full_images)
np.save('../ut_dataset/full_hps', full_hps)
np.save('../ut_dataset/full_gazes', full_gazes)

In [None]:
random.seed(43)
def ut_fold_maker(par_list):
    train_id1 = copy.deepcopy(par_list)
    train_id2 = copy.deepcopy(par_list)
    train_id3 = copy.deepcopy(par_list)
    test_id_list = copy.deepcopy(par_list)

    test_id1 = random.sample(test_id_list, 17)
    for item in test_id1:
        train_id1.remove(item)
        test_id_list.remove(item)

    test_id2 = random.sample(test_id_list, 17)
    for item in test_id2:
        train_id2.remove(item)
        test_id_list.remove(item)

    test_id3 = test_id_list
    for item in test_id3:
        train_id3.remove(item)
    additional_rm_id = random.sample(train_id3, 1)
    train_id3.remove(additional_rm_id[0])

    train_ids = [train_id1, train_id2, train_id3]
    test_ids = [test_id1, test_id2, test_id3]

    return train_ids, test_ids

train_ids, test_ids = ut_fold_maker(np.unique(full_ids).tolist())

In [None]:
train_ids_df = pd.DataFrame()
test_ids_df = pd.DataFrame()

train_ids_df["fold_1"] = train_ids[0]
train_ids_df["fold_2"] = train_ids[1]
train_ids_df["fold_3"] = train_ids[2]
test_ids_df["fold_1"] = test_ids[0]
test_ids_df["fold_2"] = test_ids[1]
test_ids_df["fold_3"] = test_ids[2] + ['empty']

In [None]:
train_ids_df.to_csv('../ut_dataset/3_fold_train_ids.csv')
test_ids_df.to_csv('../ut_dataset/3_fold_test_ids.csv')

In [None]:
def get_numpy(data_df):
    id_vector = np.asarray(data_df["participant_id"].to_list())
    images = np.stack(data_df["image"].to_list()).reshape(-1,36,60)
    hps = np.asarray(data_df[["head_pitch", "head_yaw"]])
    gazes = np.asarray(data_df[["gaze_pitch", "gaze_yaw"]])

    return id_vector, images, hps, gazes

In [None]:
for i in range(3):
    train_df = ut_sub[ut_sub["participant_id"].isin(train_ids[i])]
    test_df = ut_sub[ut_sub["participant_id"].isin(test_ids[i])]

    tr_idv, tr_imgs, tr_hps, tr_gazes = get_numpy(train_df)
    te_idv, te_imgs, te_hps, te_gazes = get_numpy(test_df)

    fold_path = '../ut_dataset/3-fold'
    if os.path.isdir(fold_path) != True:
        os.makedirs(fold_path)

    np.save(os.path.join(fold_path, f"fold_{i}_train_ids"), tr_idv)
    np.save(os.path.join(fold_path, f"fold_{i}_test_ids"), te_idv)
    np.save(os.path.join(fold_path, f"fold_{i}_train_images"), tr_imgs)
    np.save(os.path.join(fold_path, f"fold_{i}_test_images"), te_imgs)
    np.save(os.path.join(fold_path, f"fold_{i}_train_2d_hps"), tr_hps)
    np.save(os.path.join(fold_path, f"fold_{i}_test_2d_hps"), te_hps)
    np.save(os.path.join(fold_path, f"fold_{i}_train_2d_gazes"), tr_gazes)
    np.save(os.path.join(fold_path, f"fold_{i}_test_2d_gazes"), te_gazes)

In [None]:
subj_indices = [np.where(full_ids == ids)[0] for ids in np.unique(full_ids)]

subj_ids = np.concatenate([full_ids[ind][np.newaxis,:] for ind in subj_indices])
subj_images = np.concatenate([full_images[ind][np.newaxis,:] for ind in subj_indices])
subj_hps = np.concatenate([full_hps[ind][np.newaxis,:] for ind in subj_indices])
subj_gazes = np.concatenate([full_gazes[ind][np.newaxis,:] for ind in subj_indices])

In [None]:
def ten_fold_index(N = 3000, K = 10, SEED = None) :
    indice = np.arange(N)
    if SEED is not None :
        np.random.seed(SEED)
        np.random.shuffle(indice)
    return np.split(indice, K)

indice_list = [ten_fold_index(SEED = 20 + i) for i in range(50)]


In [None]:
within_ids = np.stack([np.concatenate([subj_ids[i][indice_list[i][fold]] for i in range(50)]) for fold in range(10)])
within_images = np.stack([np.concatenate([subj_images[i][indice_list[i][fold]] for i in range(50)]) for fold in range(10)])
within_hps = np.stack([np.concatenate([subj_hps[i][indice_list[i][fold]] for i in range(50)]) for fold in range(10)])
within_gazes = np.stack([np.concatenate([subj_gazes[i][indice_list[i][fold]] for i in range(50)]) for fold in range(10)])

In [None]:
os.makedirs('../ut_dataset/10-fold', exist_ok=True)
np.save('../ut_dataset/10-fold/within_ids', within_ids)
np.save('../ut_dataset/10-fold/within_images', within_images)
np.save('../ut_dataset/10-fold/within_hps', within_hps)
np.save('../ut_dataset/10-fold/within_gazes', within_gazes)