In [1]:
import numpy as np
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from utils_analysis import get_viewing_prop

In [2]:
obj_pos_upsample = np.load("../dataset/Nakano_etal_2010/preprocessed_data/obj_pos_upsample.npz")["data"]
vit_gaze_pos = np.load("../dataset/Nakano_etal_2010/preprocessed_data/vit_gaze_pos_upsample.npz", allow_pickle=True)
vit_official_gaze_pos = np.load("../dataset/Nakano_etal_2010/preprocessed_data/vit_official_gaze_pos_upsample.npz", allow_pickle=True)

In [3]:
training_methods = ["dino", "supervised"] 
depth_list = [4, 8, 12]
num_models, _, num_heads, num_sampling, _ = vit_gaze_pos["dino"].item()["4"].shape
num_obj = obj_pos_upsample.shape[1]

In [4]:
gaze_weight_dict = {}
for tm in training_methods:
    gaze_weight_dict[tm] = {}
    vit_gaze_pos_tm = vit_gaze_pos[tm].item()
    for depth in depth_list:
        print(tm, depth)
        vit_gaze_pos_depth = vit_gaze_pos_tm[str(depth)]
        gaze_weight_model = np.zeros((num_models, depth, num_heads, num_sampling, num_obj))
        for m in tqdm(range(num_models)):
            for d in tqdm(range(depth)):
                for h in range(num_heads):
                    gaze_weight = get_viewing_prop(vit_gaze_pos_depth[m, d, h], 
                                                   obj_pos_upsample[0], obj_pos_upsample[1])
                    gaze_weight_model[m, d, h] = gaze_weight
        gaze_weight_dict[tm][str(depth)] = gaze_weight_model

dino 4


  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

dino 8


  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

dino 12


  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

supervised 4


  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

supervised 8


  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

supervised 12


  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

In [5]:
for tm in training_methods:
    vit_gaze_pos_key = vit_official_gaze_pos[f"{tm}_deit_small16"]
    gaze_weight_model = np.zeros((depth, num_heads, num_sampling, num_obj))
    depth, num_heads, _, _ = vit_gaze_pos_key.shape
    for d in tqdm(range(depth)):
        for h in range(num_heads):
            gaze_weight = get_viewing_prop(vit_gaze_pos_key[d, h],
                                           obj_pos_upsample[0], obj_pos_upsample[1])
            gaze_weight_model[d, h] = gaze_weight
    gaze_weight_dict[f"{tm}_deit_small16"] = gaze_weight_model

  0%|          | 0/12 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

In [6]:
np.savez_compressed(f"../dataset/Nakano_etal_2010/preprocessed_data/gaze_weight_vit.npz", 
                    **gaze_weight_dict)

In [7]:
gaze_weight_dict.keys()

dict_keys(['dino', 'supervised', 'dino_deit_small16', 'supervised_deit_small16'])