In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import cv2
import os

from scipy.stats import pearsonr
from transformers import CLIPSegProcessor, CLIPSegForImageSegmentation

clip_processor = CLIPSegProcessor.from_pretrained("CIDAS/clipseg-rd64-refined")
clip_model = CLIPSegForImageSegmentation.from_pretrained("CIDAS/clipseg-rd64-refined")

import os
import sys
current_dir = os.getcwd()
parent_dir = os.path.dirname(current_dir)
sys.path.append(parent_dir)
from config import AGD20K_PATH, model_name

from VLM_model_dot_relative import MetricsTracker
from file_managing import (
    load_selected_samples,
    get_actual_path,
    get_gt_path,
    load_ground_truth,
    prompt_dict_obj,
    get_clipseg_heatmap,
    calculate_metrics,
    prompt_dict_obj
)

def min_max_normalize(arr):
    denom = arr.max() - arr.min()
    if denom == 0:
        return np.zeros_like(arr)
    return (arr - arr.min()) / (denom + 1e-8)



from transformers import Qwen3VLForConditionalGeneration, AutoProcessor
model_name= f"Qwen/Qwen3-VL-32B-Instruct"
processor = AutoProcessor.from_pretrained(model_name)
tok = processor.tokenizer

AGD20K_PATH

  from .autonotebook import tqdm as notebook_tqdm
Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


'/home/DATA/AGD20K'

In [2]:
import numpy as np

def check_heatmap_containment(heatmap_top, heatmap_obj, threshold=0.15, containment_ratio=0.8):
    """
    Args:
        containment_ratio (float): Top 영역의 몇 % 이상이 Obj와 겹쳐야 포함으로 볼 것인지 (기본 0.9 = 90%)
    """
    
    # 1. 텐서인 경우 numpy 변환
    if hasattr(heatmap_top, 'cpu'):
        heatmap_top = heatmap_top.detach().cpu().numpy()
    if hasattr(heatmap_obj, 'cpu'):
        heatmap_obj = heatmap_obj.detach().cpu().numpy()

    # 2. 이진 마스크 생성
    mask_top = heatmap_top > threshold
    mask_obj = heatmap_obj > threshold

    # 3. 면적 계산
    area_top = np.sum(mask_top)
    area_obj = np.sum(mask_obj)

    # 예외 처리: Top 히트맵이 아예 활성화되지 않은 경우 (면적 0)
    if area_top == 0:
        return False

    # 조건 1: Top의 면적이 Object 면적보다 작은가?
    is_smaller = area_top < area_obj
    
    # 4. 포함 관계 확인 (수정된 부분)
    # 교집합(Intersection) 영역 계산
    intersection = np.logical_and(mask_top, mask_obj)
    intersection_area = np.sum(intersection)

    # [수정됨] 교집합 면적이 Top 전체 면적의 90% 이상인지 확인
    # (intersection_area / area_top) >= 0.9 와 동일한 수식입니다.
    is_inside = intersection_area >= (area_top * containment_ratio)

    # 디버깅용: 실제 겹치는 비율 확인
    # print(f"Overlap Ratio: {intersection_area / area_top:.2f}")

    return is_smaller and is_inside

In [6]:
metrics_tracker_alloutput = MetricsTracker(name="all_output")
import os
import pandas as pd
import numpy as np
import cv2
import matplotlib.pyplot as plt
from tqdm import tqdm

# 저장할 디렉토리 생성
Layername = "clipseg_top1"
output_dir = f"./output_{Layername}"  # 디렉토리 이름 변경 (구분 위해)s
os.makedirs(output_dir, exist_ok=True)

# Contrastive Subtraction 강도 설정 (0.5 ~ 1.0 추천)

POS_ALPHA = 0
results_list = []
pos_map = np.zeros((31, 31), dtype=np.float32)
for i in range(24):
    i += 1
    pkl_path = f"output_results/attention_result_full_output_32B_{i}.pkl"
    if not os.path.exists(pkl_path):
        continue
        
    df_output = pd.read_pickle(pkl_path)

    for idx, row in df_output.iterrows():
        object_name = row['object']
        action = row['action']
        filename = row['filename']
        output_description = row['output_sentence']
        output_attentions = row['output_attentions']
        PLSP_name = prompt_dict_obj[action][object_name]
        sum_heatmap = np.zeros((31, 31), dtype=np.float32)
        
        file_name_real = f"{AGD20K_PATH}/Seen/testset/egocentric/{action}/{object_name}/{filename}"
        gt_path = f"{AGD20K_PATH}/Seen/testset/GT/{action}/{object_name}/{filename.split('.')[0]}.png"
            
        # 이미지 로드
        if not os.path.exists(file_name_real):
            print(f"Image not found: {file_name_real}")
            continue

        orig_img = cv2.imread(file_name_real)
        orig_img = cv2.cvtColor(orig_img, cv2.COLOR_BGR2RGB)
        h, w, _ = orig_img.shape

        print(f"[{idx}] Processing: {action} - {object_name}")

        # --- 1. CLIPSeg Mask 생성 ---

        clip_object_heatmap = get_clipseg_heatmap(
            file_name_real,
            clip_model,
            clip_processor,
            object_name,
            # PLSP_name,
        )

        # CLIPSeg 결과를 31x31로 리사이즈
        clip_heatmap_resized = cv2.resize(clip_object_heatmap, (31, 31), interpolation=cv2.INTER_LINEAR)
        clip_binary_mask = (clip_heatmap_resized > 0.15).astype(np.float32) # 필요시 사용

        # --- 2. [변경] Contrastive Attention Map 계산 ---
        token_scores = []
        token_idx = 0

        # token = output_attentions[-1]
        # # 토큰별 히트맵 초기화
        # token_heatmap = np.zeros((31, 31), dtype=np.float32)
        # token_head_count = 0
        
        # attention_value = token['attentions']
        # decoded_str = token['token_str'] # 디버깅용

        # for each_attention in attention_value:
        #     layer = each_attention['layer']
        #     head = each_attention['head']
        #     sum_heatmap += each_attention['heatmap']            
        #     token_heatmap += each_attention['heatmap']
        #     token_head_count += 1            
        
        # # 해당 레이어의 헤드가 하나도 없으면 스킵
        # if token_head_count == 0:
        #     raise ValueError("No valid tokens found.")

        # # Visual Dependency Score (S_img) 계산: 맵의 총합
        # s_img = token_heatmap.sum()
        # s_img_masked = (token_heatmap * clip_binary_mask).sum()
        
        # # 리스트에 저장
        # token_scores.append({
        #     "token": decoded_str,
        #     "token_idx" : token_idx,
        #     "score": s_img_masked,
        #     "score_ori": s_img,
        #     "heatmap": token_heatmap, 
        #     "count": token_head_count
        # })
        # token_idx +=1


        for token in output_attentions:
            # 토큰별 히트맵 초기화
            token_heatmap = np.zeros((31, 31), dtype=np.float32)
            token_head_count = 0
            
            attention_value = token['attentions']
            decoded_str = token['token_str'] # 디버깅용

            for each_attention in attention_value:
                layer = each_attention['layer']
                head = each_attention['head']
                sum_heatmap += each_attention['heatmap']            
                token_heatmap += each_attention['heatmap']
                token_head_count += 1            
            
            # 해당 레이어의 헤드가 하나도 없으면 스킵
            if token_head_count == 0:
                raise ValueError("No valid tokens found.")

            # Visual Dependency Score (S_img) 계산: 맵의 총합
            s_img = token_heatmap.sum()
            s_img_masked = (token_heatmap * clip_binary_mask).sum()
            
            # 리스트에 저장
            token_scores.append({
                "token": decoded_str,
                "token_idx" : token_idx,
                "score": s_img_masked,
                "score_ori": s_img,
                "heatmap": token_heatmap, 
                "count": token_head_count
            })
            token_idx +=1
        # 예외 처리: 토큰이 없을 경우
        if len(token_scores) == 0:
            raise ValueError("No valid tokens found.")

        # 정렬 (Score 기준 오름차순)
        sorted_tokens = sorted(token_scores, key=lambda x: x['score'])
        
        # Top 5 (Signal) & Bottom 5 (Noise) 선정
        # 토큰 개수가 10개 미만일 경우 처리
        num_select = min(1, len(sorted_tokens) // 2)
        if num_select < 1: num_select = 1 # 최소 1개

        top_tokens = sorted_tokens[-num_select:][::-1]   # Signal (명사, 핵심어)
        top_token_idx  = top_tokens[-1]['token_idx']
        top_token_text  = top_tokens[-1]['token']

        following_token_idx = top_token_idx + 1
        following_token = None
        for item in token_scores:
            if item['token_idx'] == following_token_idx:
                following_token = item
                break
        following_text = following_token['token']

        clip_top_heatmap = get_clipseg_heatmap(
            file_name_real,
            clip_model,
            clip_processor,
            top_token_text + ' ' + following_text,
            # PLSP_name
        )

        average_heatmap = sum_heatmap / token_head_count
        pos_map = average_heatmap.copy()
        # pos_map = np.sum([t['heatmap'] for t in top_tokens], axis=0)
        # pos_map /= len(top_tokens)
        if pos_map.max() > 0: pos_map /= pos_map.max()

        avg_norm = pos_map.copy()

        if check_heatmap_containment(clip_top_heatmap,clip_object_heatmap):
            clip_heatmap = clip_top_heatmap
            clipseg_input_text = top_token_text+ ' ' + following_text
            # Signal Map (Positive) 평균
        else:
            clip_heatmap = clip_object_heatmap
            clipseg_input_text = object_name

        print(f"Selected CLIP input : {clipseg_input_text}")
        # CLIPSeg 결과를 31x31로 리사이즈
        clip_heatmap_resized = cv2.resize(clip_heatmap, (31, 31), interpolation=cv2.INTER_LINEAR)
        clip_binary_mask = (clip_heatmap_resized > 0.15).astype(np.float32) # 필요시 사용

        
        # CLIPSeg와 Hadamard Product
        avg_norm_cliped = avg_norm * clip_heatmap_resized
        gamma =  0.75  # 0
        avg_norm_cliped = np.power(avg_norm_cliped, gamma)
        # 리사이즈 및 블러링
        avg_norm_cliped_rescaled = cv2.resize(avg_norm_cliped, (w, h), interpolation=cv2.INTER_LINEAR)
        
        sig = min(w, h) * 0.05
        k_val = int(sig * 3) * 2 + 1 
        kernel_size = (k_val, k_val)

        # 블러 적용
        blur_map = cv2.GaussianBlur(avg_norm_cliped_rescaled, kernel_size, sig)

        # 블러 후 다시 정규화
        blur_map = min_max_normalize(blur_map) # 함수가 정의되어 있다고 가정
        avg_norm_cliped_blur = blur_map
        
        # 시각화를 위해 31x31 맵도 원본 크기로 리사이즈
        avg_norm_resized_vis = cv2.resize(avg_norm, (w, h), interpolation=cv2.INTER_LINEAR)
        clip_vis = cv2.resize(clip_heatmap_resized, (w, h), interpolation=cv2.INTER_NEAREST)

        # --- 4. GT 평가 및 메트릭 계산 ---
        gt_map = load_ground_truth(gt_path) # 함수 정의 가정
        if gt_map is not None:
            metrics_dino = calculate_metrics(avg_norm_cliped_blur, gt_map) # 함수 정의 가정
            metrics_tracker_alloutput.update(metrics_dino) # 객체 정의 가정
            
            # 메트릭 텍스트
            metrics_text = f"[{object_name} {action}] KLD: {metrics_dino['KLD']:.4f} | SIM: {metrics_dino['SIM']:.4f} | NSS: {metrics_dino['NSS']:.4f}"
            metrics_tracker_alloutput.print_metrics(metrics_dino, filename)
        else:
            print("NO GT!!!")
            metrics_text = "No GT Available"
            continue
        
        results_list.append({
            'object': object_name,
            'action': action,
            'filename': filename,
            'output_sentence': output_description,
            'top_token_text': top_token_text,
            'following_text': following_text,
            'clip_input': clipseg_input_text,
            'KLD': metrics_dino['KLD'],
            'SIM': metrics_dino['SIM'],
            'NSS': metrics_dino['NSS']
        })

        # --- 5. 시각화 ---
        fig, axes = plt.subplots(1, 6, figsize=(24, 5)) # 사이즈 살짝 조정
        
        # Signal 단어와 Noise 단어 표시 (제목용)
        top_words = ",".join([f"'{t['token'].strip()}'" for t in top_tokens[:5]])
        
        main_title = f"Obj: {object_name} | Act: {action} |{metrics_text}\nTop Tokens: [{top_words}({top_token_idx } ),  clipseg input : {top_token_text} {following_text}] \n Whole answer : {output_description}"
        fig.suptitle(main_title, fontsize=14, fontweight='bold', y=0.98)

        # (1) 원본 이미지
        axes[0].imshow(orig_img)
        axes[0].set_title(f"Original\n({object_name})")
        axes[0].axis('off')

        # (2) Contrastive Attention (Pos - Neg)
        im1 = axes[1].imshow(avg_norm_resized_vis, cmap='jet', interpolation='bilinear')
        axes[1].set_title(f"Attention Map {Layername}")
        axes[1].axis('off')
        plt.colorbar(im1, ax=axes[1], fraction=0.046, pad=0.04)

        # (3) CLIPSeg Result
        axes[2].imshow(clip_vis, cmap='gray')
        axes[2].set_title(f"CLIPSeg {clipseg_input_text}")
        axes[2].axis('off')

        # (4) Hadamard (Contrastive x CLIPSeg)
        # 리사이즈하여 시각화
        hadamard_vis = cv2.resize(avg_norm_cliped, (w, h), interpolation=cv2.INTER_LINEAR)
        im3 = axes[3].imshow(hadamard_vis, cmap='jet', interpolation='bilinear')
        axes[3].set_title("Hadamard\n(Contrastive x CLIP)")
        axes[3].axis('off')
        plt.colorbar(im3, ax=axes[3], fraction=0.046, pad=0.04)

        # (5) Final Blurred Result
        im4 = axes[4].imshow(avg_norm_cliped_blur, cmap='jet', interpolation='bilinear')
        axes[4].set_title("Final Blurred")
        axes[4].axis('off')
        plt.colorbar(im4, ax=axes[4], fraction=0.046, pad=0.04)

        # (6) Ground Truth
        axes[5].imshow(gt_map, cmap='gray') # GT는 보통 binary 혹은 gray
        axes[5].set_title("Ground Truth")
        axes[5].axis('off')

        # 파일 저장
        save_path = os.path.join(output_dir, f"{object_name}_{action}_{filename.split('.')[0]}.png")
        plt.tight_layout()
        plt.savefig(save_path, bbox_inches='tight', dpi=150)
        plt.close(fig)

df_analy = pd.DataFrame(results_list)

[0] Processing: cut - apple


  return self.preprocess(images, **kwargs)


Selected CLIP input : apple

Metrics for all_output apple_000054.jpg:
 all_output Current - KLD: 0.6967 | SIM: 0.6304 | NSS: 0.7173

Cumulative all_output  Averages over 1 samples:
Average - KLD: 0.6967 | SIM: 0.6304 | NSS: 0.7173

[1] Processing: eat - apple
Selected CLIP input :  flesh  of

Metrics for all_output apple_001541.jpg:
 all_output Current - KLD: 0.2812 | SIM: 0.7230 | NSS: 1.1925

Cumulative all_output  Averages over 2 samples:
Average - KLD: 0.4889 | SIM: 0.6767 | NSS: 0.9549

[2] Processing: peel - apple
Selected CLIP input :  colored  layer

Metrics for all_output apple_001541.jpg:
 all_output Current - KLD: 0.1693 | SIM: 0.7994 | NSS: 1.2102

Cumulative all_output  Averages over 3 samples:
Average - KLD: 0.3824 | SIM: 0.7176 | NSS: 1.0400

[3] Processing: hit - axe
Selected CLIP input :  blade  (

Metrics for all_output axe_000961.jpg:
 all_output Current - KLD: 0.8073 | SIM: 0.4638 | NSS: 1.3466

Cumulative all_output  Averages over 4 samples:
Average - KLD: 0.4886 |

  return self.preprocess(images, **kwargs)


Selected CLIP input :  handle  (

Metrics for all_output badminton_racket_002255.jpg:
 all_output Current - KLD: 1.1181 | SIM: 0.3491 | NSS: 1.8696

Cumulative all_output  Averages over 6 samples:
Average - KLD: 0.5826 | SIM: 0.5996 | NSS: 1.4789

[6] Processing: swing - badminton_racket
Selected CLIP input : inton  racket

Metrics for all_output badminton_racket_003649.jpg:
 all_output Current - KLD: 4.5053 | SIM: 0.0174 | NSS: -0.4329

Cumulative all_output  Averages over 7 samples:
Average - KLD: 1.1430 | SIM: 0.5164 | NSS: 1.2058

[7] Processing: cut - banana
Selected CLIP input : banana

Metrics for all_output banana_002623.jpg:
 all_output Current - KLD: 0.2136 | SIM: 0.7491 | NSS: 1.4466

Cumulative all_output  Averages over 8 samples:
Average - KLD: 1.0268 | SIM: 0.5455 | NSS: 1.2359

[8] Processing: eat - banana
Selected CLIP input : banana

Metrics for all_output banana_002458.jpg:
 all_output Current - KLD: 0.1017 | SIM: 0.8471 | NSS: 1.7448

Cumulative all_output  Averages 

  return self.preprocess(images, **kwargs)


Selected CLIP input : baseball

Metrics for all_output baseball_002670.jpg:
 all_output Current - KLD: 0.3203 | SIM: 0.7174 | NSS: 3.4653

Cumulative all_output  Averages over 11 samples:
Average - KLD: 0.8922 | SIM: 0.5722 | NSS: 1.4843

[11] Processing: hit - baseball_bat
Selected CLIP input :  is  the

Metrics for all_output baseball_bat_001882.jpg:
 all_output Current - KLD: 1.8169 | SIM: 0.1925 | NSS: 0.9138

Cumulative all_output  Averages over 12 samples:
Average - KLD: 0.9692 | SIM: 0.5406 | NSS: 1.4367

[12] Processing: hold - baseball_bat
Selected CLIP input : baseball_bat

Metrics for all_output baseball_bat_002547.jpg:
 all_output Current - KLD: 2.6102 | SIM: 0.1018 | NSS: 0.6019

Cumulative all_output  Averages over 13 samples:
Average - KLD: 1.0955 | SIM: 0.5068 | NSS: 1.3725

[13] Processing: swing - baseball_bat
Selected CLIP input : baseball_bat

Metrics for all_output baseball_bat_001882.jpg:
 all_output Current - KLD: 2.0663 | SIM: 0.1597 | NSS: 0.3872

Cumulative al

  return self.preprocess(images, **kwargs)


Selected CLIP input :  mattress  is

Metrics for all_output bed_002880.jpg:
 all_output Current - KLD: 0.4590 | SIM: 0.6157 | NSS: 2.8330

Cumulative all_output  Averages over 16 samples:
Average - KLD: 1.0924 | SIM: 0.4914 | NSS: 1.5263

[16] Processing: sit_on - bed
Selected CLIP input :  the  mattress

Metrics for all_output bed_003622.jpg:
 all_output Current - KLD: 0.3522 | SIM: 0.6591 | NSS: 1.9072

Cumulative all_output  Averages over 17 samples:
Average - KLD: 1.0489 | SIM: 0.5013 | NSS: 1.5488

[17] Processing: lie_on - bench
Selected CLIP input : bench

Metrics for all_output bench_003727.jpg:
 all_output Current - KLD: 0.7572 | SIM: 0.4859 | NSS: 1.2667

Cumulative all_output  Averages over 18 samples:
Average - KLD: 1.0327 | SIM: 0.5004 | NSS: 1.5331

[18] Processing: sit_on - bench
Selected CLIP input : bench

Metrics for all_output bench_001877.jpg:
 all_output Current - KLD: 0.0838 | SIM: 0.8624 | NSS: 1.8674

Cumulative all_output  Averages over 19 samples:
Average - KL

  return self.preprocess(images, **kwargs)


Selected CLIP input :  seat  of

Metrics for all_output bicycle_003046.jpg:
 all_output Current - KLD: 1.2116 | SIM: 0.3349 | NSS: 1.5870

Cumulative all_output  Averages over 21 samples:
Average - KLD: 1.0739 | SIM: 0.4910 | NSS: 1.4781

[21] Processing: sit_on - bicycle
Selected CLIP input :  seat  (

Metrics for all_output bicycle_002100.jpg:
 all_output Current - KLD: 2.2608 | SIM: 0.1400 | NSS: 0.9231

Cumulative all_output  Averages over 22 samples:
Average - KLD: 1.1278 | SIM: 0.4750 | NSS: 1.4529

[22] Processing: look_out - binoculars
Selected CLIP input :  smaller  lenses

Metrics for all_output binoculars_003630.jpg:
 all_output Current - KLD: 1.5445 | SIM: 0.2736 | NSS: 0.0268

Cumulative all_output  Averages over 23 samples:
Average - KLD: 1.1459 | SIM: 0.4663 | NSS: 1.3909

[23] Processing: hold - book
Selected CLIP input : book

Metrics for all_output book_001195.jpg:
 all_output Current - KLD: 0.8704 | SIM: 0.4501 | NSS: 1.0906

Cumulative all_output  Averages over 24 s

  return self.preprocess(images, **kwargs)


Selected CLIP input :  the  bottle

Metrics for all_output bottle_003259.jpg:
 all_output Current - KLD: 1.6543 | SIM: 0.2149 | NSS: 1.8104

Cumulative all_output  Averages over 26 samples:
Average - KLD: 1.1394 | SIM: 0.4565 | NSS: 1.4346

[26] Processing: hold - bottle
Selected CLIP input : bottle

Metrics for all_output bottle_001227.jpg:
 all_output Current - KLD: 0.3322 | SIM: 0.6857 | NSS: 2.0287

Cumulative all_output  Averages over 27 samples:
Average - KLD: 1.1095 | SIM: 0.4650 | NSS: 1.4566

[27] Processing: open - bottle
Selected CLIP input :  cap  or

Metrics for all_output bottle_001033.jpg:
 all_output Current - KLD: 2.2266 | SIM: 0.1315 | NSS: 0.8413

Cumulative all_output  Averages over 28 samples:
Average - KLD: 1.1494 | SIM: 0.4530 | NSS: 1.4346

[28] Processing: pour - bottle
Selected CLIP input :  narrow  sp

Metrics for all_output bottle_002780.jpg:
 all_output Current - KLD: 1.5409 | SIM: 0.2718 | NSS: 0.4286

Cumulative all_output  Averages over 29 samples:
Avera

  return self.preprocess(images, **kwargs)


Selected CLIP input : bowl

Metrics for all_output bowl_000134.jpg:
 all_output Current - KLD: 1.0394 | SIM: 0.4339 | NSS: 0.6546

Cumulative all_output  Averages over 31 samples:
Average - KLD: 1.1253 | SIM: 0.4580 | NSS: 1.3598

[31] Processing: wash - bowl
Selected CLIP input : bowl

Metrics for all_output bowl_002825.jpg:
 all_output Current - KLD: 0.7930 | SIM: 0.4906 | NSS: 0.4589

Cumulative all_output  Averages over 32 samples:
Average - KLD: 1.1149 | SIM: 0.4591 | NSS: 1.3317

[32] Processing: eat - broccoli
Selected CLIP input : broccoli

Metrics for all_output broccoli_002796.jpg:
 all_output Current - KLD: 0.1800 | SIM: 0.7861 | NSS: 1.6921

Cumulative all_output  Averages over 33 samples:
Average - KLD: 1.0866 | SIM: 0.4690 | NSS: 1.3426

[33] Processing: take_photo - camera
Selected CLIP input :  the  camera

Metrics for all_output camera_002534.jpg:
 all_output Current - KLD: 0.6046 | SIM: 0.5613 | NSS: 0.4494

Cumulative all_output  Averages over 34 samples:
Average - K

  return self.preprocess(images, **kwargs)


Selected CLIP input : carrot

Metrics for all_output carrot_001443.jpg:
 all_output Current - KLD: 0.8755 | SIM: 0.4565 | NSS: 2.8036

Cumulative all_output  Averages over 36 samples:
Average - KLD: 1.0607 | SIM: 0.4712 | NSS: 1.4002

[36] Processing: peel - carrot
Selected CLIP input : carrot

Metrics for all_output carrot_003707.jpg:
 all_output Current - KLD: 0.2832 | SIM: 0.7110 | NSS: 2.1095

Cumulative all_output  Averages over 37 samples:
Average - KLD: 1.0397 | SIM: 0.4777 | NSS: 1.4193

[37] Processing: take_photo - cell_phone
Selected CLIP input :  camera  lens

Metrics for all_output cell_phone_000601.jpg:
 all_output Current - KLD: 0.4590 | SIM: 0.6240 | NSS: 1.5544

Cumulative all_output  Averages over 38 samples:
Average - KLD: 1.0244 | SIM: 0.4816 | NSS: 1.4229

[38] Processing: talk_on - cell_phone
Selected CLIP input : cell_phone

Metrics for all_output cell_phone_000601.jpg:
 all_output Current - KLD: 0.6915 | SIM: 0.5113 | NSS: 1.4691

Cumulative all_output  Averages

  return self.preprocess(images, **kwargs)


Selected CLIP input :  cushion  of

Metrics for all_output chair_002839.jpg:
 all_output Current - KLD: 1.0506 | SIM: 0.4040 | NSS: 0.7983

Cumulative all_output  Averages over 41 samples:
Average - KLD: 1.0118 | SIM: 0.4799 | NSS: 1.4136

[41] Processing: lie_on - couch
Selected CLIP input :  seat  and

Metrics for all_output couch_003293.jpg:
 all_output Current - KLD: 0.7804 | SIM: 0.4734 | NSS: 1.5439

Cumulative all_output  Averages over 42 samples:
Average - KLD: 1.0063 | SIM: 0.4798 | NSS: 1.4167

[42] Processing: sit_on - couch
Selected CLIP input :  seat  cushions

Metrics for all_output couch_000779.jpg:
 all_output Current - KLD: 0.8279 | SIM: 0.4538 | NSS: 1.8609

Cumulative all_output  Averages over 43 samples:
Average - KLD: 1.0022 | SIM: 0.4791 | NSS: 1.4271

[43] Processing: drink_with - cup
Selected CLIP input : cup

Metrics for all_output cup_000508.jpg:
 all_output Current - KLD: 0.6255 | SIM: 0.5319 | NSS: 2.5961

Cumulative all_output  Averages over 44 samples:
Ave

  return self.preprocess(images, **kwargs)


Selected CLIP input : cup

Metrics for all_output cup_001535.jpg:
 all_output Current - KLD: 1.3242 | SIM: 0.3379 | NSS: 1.8638

Cumulative all_output  Averages over 46 samples:
Average - KLD: 1.0018 | SIM: 0.4753 | NSS: 1.4842

[46] Processing: sip - cup
Selected CLIP input : cup

Metrics for all_output cup_001864.jpg:
 all_output Current - KLD: 0.4860 | SIM: 0.6013 | NSS: 0.9339

Cumulative all_output  Averages over 47 samples:
Average - KLD: 0.9908 | SIM: 0.4780 | NSS: 1.4725

[47] Processing: wash - cup
Selected CLIP input : cup

Metrics for all_output cup_003621.jpg:
 all_output Current - KLD: 1.4351 | SIM: 0.2649 | NSS: 1.2279

Cumulative all_output  Averages over 48 samples:
Average - KLD: 1.0000 | SIM: 0.4736 | NSS: 1.4674

[48] Processing: throw - discus
Selected CLIP input :  metal  hub

Metrics for all_output discus_003558.jpg:
 all_output Current - KLD: 0.5057 | SIM: 0.5934 | NSS: 0.7177

Cumulative all_output  Averages over 49 samples:
Average - KLD: 0.9900 | SIM: 0.4760 |

  return self.preprocess(images, **kwargs)


Selected CLIP input :  handle  of

Metrics for all_output fork_000804.jpg:
 all_output Current - KLD: 0.7748 | SIM: 0.4818 | NSS: 1.8842

Cumulative all_output  Averages over 51 samples:
Average - KLD: 0.9846 | SIM: 0.4751 | NSS: 1.4349

[51] Processing: lift - fork
Selected CLIP input : fork

Metrics for all_output fork_001691.jpg:
 all_output Current - KLD: 1.4286 | SIM: 0.2824 | NSS: 0.4240

Cumulative all_output  Averages over 52 samples:
Average - KLD: 0.9932 | SIM: 0.4714 | NSS: 1.4155

[52] Processing: stick - fork
Selected CLIP input : fork

Metrics for all_output fork_000095.jpg:
 all_output Current - KLD: 0.8893 | SIM: 0.4309 | NSS: 1.7980

Cumulative all_output  Averages over 53 samples:
Average - KLD: 0.9912 | SIM: 0.4706 | NSS: 1.4227

[53] Processing: wash - fork
Selected CLIP input : fork

Metrics for all_output fork_001691.jpg:
 all_output Current - KLD: 0.9706 | SIM: 0.4077 | NSS: 1.8583

Cumulative all_output  Averages over 54 samples:
Average - KLD: 0.9908 | SIM: 0.4

  return self.preprocess(images, **kwargs)


Selected CLIP input :  a  fr

Metrics for all_output frisbee_001130.jpg:
 all_output Current - KLD: 1.0995 | SIM: 0.3641 | NSS: -0.4626

Cumulative all_output  Averages over 56 samples:
Average - KLD: 0.9885 | SIM: 0.4680 | NSS: 1.3701

[56] Processing: throw - frisbee
Selected CLIP input : frisbee

Metrics for all_output frisbee_003249.jpg:
 all_output Current - KLD: 0.5578 | SIM: 0.5666 | NSS: 0.5565

Cumulative all_output  Averages over 57 samples:
Average - KLD: 0.9809 | SIM: 0.4697 | NSS: 1.3558

[57] Processing: hold - golf_clubs
Selected CLIP input :  black ,

Metrics for all_output golf_clubs_000045.jpg:
 all_output Current - KLD: 0.8392 | SIM: 0.4829 | NSS: 1.7267

Cumulative all_output  Averages over 58 samples:
Average - KLD: 0.9785 | SIM: 0.4699 | NSS: 1.3622

[58] Processing: swing - golf_clubs
Selected CLIP input : golf_clubs

Metrics for all_output golf_clubs_001992.jpg:
 all_output Current - KLD: 2.3520 | SIM: 0.1297 | NSS: 0.3447

Cumulative all_output  Averages over 5

  return self.preprocess(images, **kwargs)


Selected CLIP input :  handle  (

Metrics for all_output hammer_000215.jpg:
 all_output Current - KLD: 1.2322 | SIM: 0.3111 | NSS: 2.0557

Cumulative all_output  Averages over 61 samples:
Average - KLD: 1.0154 | SIM: 0.4579 | NSS: 1.3428

[61] Processing: eat - hot_dog
Selected CLIP input : hot_dog

Metrics for all_output hot_dog_002166.jpg:
 all_output Current - KLD: 0.2284 | SIM: 0.7418 | NSS: 1.2049

Cumulative all_output  Averages over 62 samples:
Average - KLD: 1.0027 | SIM: 0.4625 | NSS: 1.3406

[62] Processing: throw - javelin
Selected CLIP input :  yellow  or

Metrics for all_output javelin_001474.jpg:
 all_output Current - KLD: 0.5379 | SIM: 0.5820 | NSS: 3.6785

Cumulative all_output  Averages over 63 samples:
Average - KLD: 0.9953 | SIM: 0.4644 | NSS: 1.3777

[63] Processing: type_on - keyboard
Selected CLIP input : keyboard

Metrics for all_output keyboard_000439.jpg:
 all_output Current - KLD: 0.2170 | SIM: 0.7603 | NSS: 1.4113

Cumulative all_output  Averages over 64 samp

  return self.preprocess(images, **kwargs)


Selected CLIP input : knife

Metrics for all_output knife_002682.jpg:
 all_output Current - KLD: 0.8300 | SIM: 0.4651 | NSS: 2.4461

Cumulative all_output  Averages over 66 samples:
Average - KLD: 0.9759 | SIM: 0.4698 | NSS: 1.4056

[66] Processing: stick - knife
Selected CLIP input : knife

Metrics for all_output knife_001072.jpg:
 all_output Current - KLD: 1.9212 | SIM: 0.1881 | NSS: 1.3944

Cumulative all_output  Averages over 67 samples:
Average - KLD: 0.9900 | SIM: 0.4656 | NSS: 1.4055

[67] Processing: wash - knife
Selected CLIP input : knife

Metrics for all_output knife_002720.jpg:
 all_output Current - KLD: 0.6519 | SIM: 0.5386 | NSS: 2.9270

Cumulative all_output  Averages over 68 samples:
Average - KLD: 0.9850 | SIM: 0.4667 | NSS: 1.4278

[68] Processing: type_on - laptop
Selected CLIP input :  keyboard  is

Metrics for all_output laptop_000585.jpg:
 all_output Current - KLD: 0.5816 | SIM: 0.5597 | NSS: 1.9317

Cumulative all_output  Averages over 69 samples:
Average - KLD: 

  return self.preprocess(images, **kwargs)


Selected CLIP input :  rear  wheel

Metrics for all_output motorcycle_003541.jpg:
 all_output Current - KLD: 2.1787 | SIM: 0.1702 | NSS: 0.1733

Cumulative all_output  Averages over 71 samples:
Average - KLD: 0.9980 | SIM: 0.4624 | NSS: 1.4122

[71] Processing: ride - motorcycle
Selected CLIP input :  seat  of

Metrics for all_output motorcycle_002198.jpg:
 all_output Current - KLD: 1.4293 | SIM: 0.2911 | NSS: 0.9216

Cumulative all_output  Averages over 72 samples:
Average - KLD: 1.0040 | SIM: 0.4601 | NSS: 1.4054

[72] Processing: sit_on - motorcycle
Selected CLIP input :  the  seat

Metrics for all_output motorcycle_000837.jpg:
 all_output Current - KLD: 1.5009 | SIM: 0.2552 | NSS: 2.5445

Cumulative all_output  Averages over 73 samples:
Average - KLD: 1.0108 | SIM: 0.4573 | NSS: 1.4210

[73] Processing: cut - orange
Selected CLIP input : orange

Metrics for all_output orange_001193.jpg:
 all_output Current - KLD: 0.7729 | SIM: 0.6031 | NSS: 1.2841

Cumulative all_output  Averages o

  return self.preprocess(images, **kwargs)


Selected CLIP input : orange

Metrics for all_output orange_001193.jpg:
 all_output Current - KLD: 0.7569 | SIM: 0.6044 | NSS: 1.2860

Cumulative all_output  Averages over 76 samples:
Average - KLD: 1.0013 | SIM: 0.4630 | NSS: 1.4156

[76] Processing: wash - orange
Selected CLIP input : orange

Metrics for all_output orange_001193.jpg:
 all_output Current - KLD: 0.5921 | SIM: 0.6603 | NSS: 1.2925

Cumulative all_output  Averages over 77 samples:
Average - KLD: 0.9960 | SIM: 0.4656 | NSS: 1.4140

[77] Processing: open - oven
Selected CLIP input : oven

Metrics for all_output oven_001370.jpg:
 all_output Current - KLD: 1.2079 | SIM: 0.3328 | NSS: 1.2464

Cumulative all_output  Averages over 78 samples:
Average - KLD: 0.9987 | SIM: 0.4639 | NSS: 1.4118

[78] Processing: write - pen
Selected CLIP input : pen

Metrics for all_output pen_003590.jpg:
 all_output Current - KLD: 1.4619 | SIM: 0.2664 | NSS: 1.2547

Cumulative all_output  Averages over 79 samples:
Average - KLD: 1.0046 | SIM: 0.4

  return self.preprocess(images, **kwargs)


Selected CLIP input :  the  punching

Metrics for all_output punching_bag_001639.jpg:
 all_output Current - KLD: 0.8360 | SIM: 0.4483 | NSS: 0.5989

Cumulative all_output  Averages over 81 samples:
Average - KLD: 0.9951 | SIM: 0.4635 | NSS: 1.3910

[81] Processing: open - refrigerator
Selected CLIP input :  the  **

Metrics for all_output refrigerator_002162.jpg:
 all_output Current - KLD: 1.2274 | SIM: 0.3333 | NSS: 0.8550

Cumulative all_output  Averages over 82 samples:
Average - KLD: 0.9979 | SIM: 0.4619 | NSS: 1.3845

[82] Processing: catch - rugby_ball
Selected CLIP input :  brown  panels

Metrics for all_output rugby_ball_003522.jpg:
 all_output Current - KLD: 0.3355 | SIM: 0.7037 | NSS: 0.6750

Cumulative all_output  Averages over 83 samples:
Average - KLD: 0.9899 | SIM: 0.4649 | NSS: 1.3759

[83] Processing: kick - rugby_ball
Selected CLIP input :  a  rugby

Metrics for all_output rugby_ball_002080.jpg:
 all_output Current - KLD: 0.1660 | SIM: 0.7890 | NSS: 0.9455

Cumulative 

  return self.preprocess(images, **kwargs)


Selected CLIP input :  blades  of

Metrics for all_output scissors_002479.jpg:
 all_output Current - KLD: 1.6779 | SIM: 0.2217 | NSS: 1.0630

Cumulative all_output  Averages over 86 samples:
Average - KLD: 0.9799 | SIM: 0.4690 | NSS: 1.3623

[86] Processing: hold - scissors
Selected CLIP input : scissors

Metrics for all_output scissors_002479.jpg:
 all_output Current - KLD: 0.4971 | SIM: 0.6156 | NSS: 1.7976

Cumulative all_output  Averages over 87 samples:
Average - KLD: 0.9744 | SIM: 0.4706 | NSS: 1.3673

[87] Processing: carry - skateboard
Selected CLIP input : skateboard

Metrics for all_output skateboard_002668.jpg:
 all_output Current - KLD: 0.1396 | SIM: 0.8175 | NSS: 0.8409

Cumulative all_output  Averages over 88 samples:
Average - KLD: 0.9649 | SIM: 0.4746 | NSS: 1.3613

[88] Processing: hold - skateboard
Selected CLIP input :  the  grip

Metrics for all_output skateboard_002387.jpg:
 all_output Current - KLD: 0.5332 | SIM: 0.5999 | NSS: 1.1111

Cumulative all_output  Averag

  return self.preprocess(images, **kwargs)


Selected CLIP input :  the  grip

Metrics for all_output skateboard_001460.jpg:
 all_output Current - KLD: 0.3067 | SIM: 0.7057 | NSS: 1.1123

Cumulative all_output  Averages over 91 samples:
Average - KLD: 0.9544 | SIM: 0.4775 | NSS: 1.3537

[91] Processing: carry - skis
Selected CLIP input :  the  **

Metrics for all_output skis_002829.jpg:
 all_output Current - KLD: 2.3025 | SIM: 0.1329 | NSS: -0.2105

Cumulative all_output  Averages over 92 samples:
Average - KLD: 0.9690 | SIM: 0.4737 | NSS: 1.3367

[92] Processing: hold - skis
Selected CLIP input :  tips  of

Metrics for all_output skis_001357.jpg:
 all_output Current - KLD: 2.1502 | SIM: 0.1543 | NSS: 0.9156

Cumulative all_output  Averages over 93 samples:
Average - KLD: 0.9817 | SIM: 0.4703 | NSS: 1.3321

[93] Processing: jump - skis
Selected CLIP input :  the  entire

Metrics for all_output skis_002829.jpg:
 all_output Current - KLD: 2.3725 | SIM: 0.1244 | NSS: -0.2072

Cumulative all_output  Averages over 94 samples:
Average 

  return self.preprocess(images, **kwargs)


Selected CLIP input : rounded  tips

Metrics for all_output snowboard_001325.jpg:
 all_output Current - KLD: 1.7136 | SIM: 0.2321 | NSS: 1.8059

Cumulative all_output  Averages over 96 samples:
Average - KLD: 1.0196 | SIM: 0.4606 | NSS: 1.3098

[96] Processing: hold - snowboard
Selected CLIP input :  a  snow

Metrics for all_output snowboard_001704.jpg:
 all_output Current - KLD: 1.8831 | SIM: 0.1921 | NSS: -0.2218

Cumulative all_output  Averages over 97 samples:
Average - KLD: 1.0285 | SIM: 0.4578 | NSS: 1.2940

[97] Processing: jump - snowboard
Selected CLIP input :  a  snow

Metrics for all_output snowboard_001704.jpg:
 all_output Current - KLD: 1.0241 | SIM: 0.3929 | NSS: 0.1912

Cumulative all_output  Averages over 98 samples:
Average - KLD: 1.0285 | SIM: 0.4571 | NSS: 1.2827

[98] Processing: catch - soccer_ball
Selected CLIP input : soccer_ball

Metrics for all_output soccer_ball_003333.jpg:
 all_output Current - KLD: 0.1069 | SIM: 0.8627 | NSS: 1.3880

Cumulative all_output  A

  return self.preprocess(images, **kwargs)


Selected CLIP input :  the  bottom

Metrics for all_output suitcase_002998.jpg:
 all_output Current - KLD: 3.9097 | SIM: 0.0357 | NSS: -0.2848

Cumulative all_output  Averages over 101 samples:
Average - KLD: 1.0387 | SIM: 0.4611 | NSS: 1.2788

[101] Processing: hold - suitcase
Selected CLIP input : opic  handle

Metrics for all_output suitcase_003687.jpg:
 all_output Current - KLD: 1.1628 | SIM: 0.3533 | NSS: 1.9276

Cumulative all_output  Averages over 102 samples:
Average - KLD: 1.0400 | SIM: 0.4600 | NSS: 1.2851

[102] Processing: open - suitcase
Selected CLIP input :  zipper  is

Metrics for all_output suitcase_000520.jpg:
 all_output Current - KLD: 2.3463 | SIM: 0.1470 | NSS: -0.2184

Cumulative all_output  Averages over 103 samples:
Average - KLD: 1.0526 | SIM: 0.4570 | NSS: 1.2705

[103] Processing: pack - suitcase
Selected CLIP input :  leopard  print

Metrics for all_output suitcase_002212.jpg:
 all_output Current - KLD: 0.8597 | SIM: 0.4385 | NSS: 0.8117

Cumulative all_outp

  return self.preprocess(images, **kwargs)


Selected CLIP input :  black  circular

Metrics for all_output surfboard_002422.jpg:
 all_output Current - KLD: 2.4812 | SIM: 0.1191 | NSS: -0.3527

Cumulative all_output  Averages over 106 samples:
Average - KLD: 1.0629 | SIM: 0.4534 | NSS: 1.2660

[106] Processing: hold - surfboard
Selected CLIP input : surfboard

Metrics for all_output surfboard_002631.jpg:
 all_output Current - KLD: 0.7549 | SIM: 0.4836 | NSS: 2.6717

Cumulative all_output  Averages over 107 samples:
Average - KLD: 1.0600 | SIM: 0.4537 | NSS: 1.2791

[107] Processing: jump - surfboard
Selected CLIP input : surfboard

Metrics for all_output surfboard_000658.jpg:
 all_output Current - KLD: 0.9082 | SIM: 0.4388 | NSS: 0.3362

Cumulative all_output  Averages over 108 samples:
Average - KLD: 1.0586 | SIM: 0.4535 | NSS: 1.2704

[108] Processing: lie_on - surfboard
Selected CLIP input : surfboard

Metrics for all_output surfboard_000221.jpg:
 all_output Current - KLD: 0.1024 | SIM: 0.8554 | NSS: 2.3661

Cumulative all_out

  return self.preprocess(images, **kwargs)


Selected CLIP input : tennis_racket

Metrics for all_output tennis_racket_002268.jpg:
 all_output Current - KLD: 3.4743 | SIM: 0.0601 | NSS: -0.4287

Cumulative all_output  Averages over 111 samples:
Average - KLD: 1.0639 | SIM: 0.4564 | NSS: 1.2559

[111] Processing: hold - tennis_racket
Selected CLIP input :  handle  of

Metrics for all_output tennis_racket_001785.jpg:
 all_output Current - KLD: 0.6092 | SIM: 0.5482 | NSS: 3.2809

Cumulative all_output  Averages over 112 samples:
Average - KLD: 1.0599 | SIM: 0.4572 | NSS: 1.2739

[112] Processing: swing - tennis_racket
Selected CLIP input :  handle  of

Metrics for all_output tennis_racket_003066.jpg:
 all_output Current - KLD: 1.1358 | SIM: 0.3532 | NSS: 1.8164

Cumulative all_output  Averages over 113 samples:
Average - KLD: 1.0605 | SIM: 0.4563 | NSS: 1.2787

[113] Processing: brush_with - toothbrush
Selected CLIP input :  at  the

Metrics for all_output toothbrush_001764.jpg:
 all_output Current - KLD: 2.2966 | SIM: 0.1348 | NSS:

  return self.preprocess(images, **kwargs)


Selected CLIP input : toothbrush

Metrics for all_output toothbrush_001991.jpg:
 all_output Current - KLD: 1.2360 | SIM: 0.3178 | NSS: 1.8808

Cumulative all_output  Averages over 116 samples:
Average - KLD: 1.0782 | SIM: 0.4503 | NSS: 1.2805

[116] Processing: drink_with - wine_glass
Selected CLIP input :  bowl  of

Metrics for all_output wine_glass_003343.jpg:
 all_output Current - KLD: 1.5623 | SIM: 0.2401 | NSS: 0.9313

Cumulative all_output  Averages over 117 samples:
Average - KLD: 1.0823 | SIM: 0.4485 | NSS: 1.2775

[117] Processing: hold - wine_glass
Selected CLIP input :  its  stem

Metrics for all_output wine_glass_002374.jpg:
 all_output Current - KLD: 1.1163 | SIM: 0.3738 | NSS: 1.2268

Cumulative all_output  Averages over 118 samples:
Average - KLD: 1.0826 | SIM: 0.4479 | NSS: 1.2770

[118] Processing: pour - wine_glass
Selected CLIP input : wine_glass

Metrics for all_output wine_glass_000186.jpg:
 all_output Current - KLD: 1.3131 | SIM: 0.3126 | NSS: 0.9142

Cumulative a

In [None]:
# 민맥스 안하고 그냥 맥스분에일한거
LAST-cvpr25 - KLD: 3.2672 | SIM: 0.1017 | NSS: -0.1982
LAST    - KLD:  KLD: 1.2582 | SIM: 0.4002 | NSS: 1.0195
ALL     - KLD:  KLD: 1.0893 | SIM: 0.4449 | NSS: 1.2702 //  KLD: 1.1319 | SIM: 0.4425 | NSS: 1.1987
TOP1 O only : : 1.0659 | SIM: 0.4494 | NSS: 1.3036
TOP1    - KLD: 1.0486 | SIM: 0.4630 | NSS: 1.3187  
TOP1_GT - KLD: 0.9934 | SIM: 0.4644 | NSS: 1.3921


In [None]:
TOP 90 : Average - KLD: 1.1244 | SIM: 0.4453 | NSS: 1.2122
TOP 95 : Average - KLD: 1.1112 | SIM: 0.4485 | NSS: 1.2430
TOP 99 : KLD: 1.0911 | SIM: 0.4547 | NSS: 1.2855
TOP 99 Bottom 50 : KLD: 1.0911 | SIM: 0.4547 | NSS: 1.2855
TOP 80 Bottom 50 :KLD: 1.1480 | SIM: 0.4391 | NSS: 1.1538
ALL_token_exp075 :KLD: 1.2934 | SIM: 0.3905 | NSS: 0.9825
TOP1 : 1.0901 | SIM: 0.4564 | NSS: 1.2929
TOP1_exp0.75 : KLD: 1.0885 | SIM: 0.4532 | NSS: 1.2754
CLIPSEG_TOP1_exp0.75 : 1.0672 | SIM: 0.4499 | NSS: 1.3048
CLIPSEG_TOP12_exp0.75 : KLD: 1.0653 | SIM: 0.4484 | NSS: 1.3006
CLIPSEG_TOP123_exp0.75 :  1.0725 | SIM: 0.4469 | NSS: 1.2891

CLIPSEG_TOP1_GT_exp0.75 :  KLD: 0.9955 | SIM: 0.4649 | NSS: 1.3919



TOP1_real_exp0.5 : KLD: 1.1299 | SIM: 0.4168 | NSS: 1.3019
CLIP_fillter top1 :  KLD: 1.0788 | SIM: 0.4702 | NSS: 1.2851
CLIP_fillter top1+following :  KLD: 1.0782 | SIM: 0.4732 | NSS: 1.2873
CLIP_fillter top1+following_exp0.3 : KLD: 1.0730 | SIM: 0.4661 | NSS: 1.2646
CLIP_fillter top1+following_exp0.75 :  KLD: 1.0625 | SIM: 0.4734 | NSS: 1.2900
CLIP_fillter top1+following_exp0.75_real :  KLD: 1.0474 | SIM: 0.4607 | NSS: 1.3293
CLIP_fillter top1+following_exp0.75_real_namable :  KLD: 1.1120 | SIM: 0.4399 | NSS: 1.2498
CLIP_fillter top1+following_ININ_exp0.75_real :  KLD: 1.0567 | SIM: 0.4571 | NSS: 1.3125
CLIP_fillter top1+following_exp0.5_real :  KLD: 1.0970 | SIM: 0.4276 | NSS: 1.3465
CLIP_fillter top1+following_exp2 :  Average - KLD: 1.5235 | SIM: 0.4005 | NSS: 0.9573

CLIP_fillter PLSP_exp0.75_real :  KLD: 1.0015 | SIM: 0.4608 | NSS: 1.4040