In [1]:
import pandas as pd
import os
import sys
import torch
import re
import json
import math
import os
import glob
import os
import numpy as np
from torchvision.transforms import GaussianBlur
from torchvision import transforms
import torch.nn.functional as F
import torchvision.transforms as T
import cv2

from PIL import Image, ImageDraw, ImageFont, ImageFilter
current_dir = os.getcwd()
from pathlib import Path
# 현재 작업 경로 가져오기
current_dir = Path.cwd()
# 상위 폴더 가져오기
parent_dir = current_dir.parent
# sys.path에 추가 (문자열로 변환 필요)
sys.path.append(str(parent_dir))
print(f"Added to sys.path: {parent_dir}")
# 경로 설정이 완료된 후 import 해야 합니다.
from VLM_model_dot_relative import QwenVLModel, MetricsTracker
from file_managing import (
    load_selected_samples,
    get_actual_path,
    get_gt_path,
)
from config import AGD20K_PATH

Added to sys.path: /home/bongo/porter_notebook/research/qwen3


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from transformers import CLIPSegProcessor, CLIPSegForImageSegmentation
clip_processor = CLIPSegProcessor.from_pretrained("CIDAS/clipseg-rd64-refined")
clip_model = CLIPSegForImageSegmentation.from_pretrained("CIDAS/clipseg-rd64-refined")

print("Imports and Model Loading Completed.")

def get_clipseg_heatmap(
        image_path: str,
        clip_model, 
        clip_processor, 
        object_name: str,
    ):
    """
    (수정됨) CLIPSeg 모델을 사용하여 이미지와 텍스트 프롬프트 간의
    세그멘테이션 히트맵을 추출합니다.
    """
   
    original_image = Image.open(image_path).convert('RGB')
    original_size = original_image.size # (width, height)

    # 1. 단일 텍스트 프롬프트 정의
    prompt_text = object_name

    # 2. 입력 처리
    inputs = clip_processor(
        text=[prompt_text], 
        images=[original_image], 
        padding="max_length", 
        return_tensors="pt"
    )
    
    # 3. 예측
    with torch.no_grad():
        outputs = clip_model(**inputs)
        # preds의 shape 처리는 로직에 따라 다르지만, 결과적으로 heatmap을 뽑을 때 주의해야 합니다.
        preds = outputs.logits.unsqueeze(0).unsqueeze(1) 

    # 4. 히트맵 생성
    # [중요 수정] .squeeze()를 추가하여 (1, 352, 352) -> (352, 352)로 변환합니다.
    heatmap_small = torch.sigmoid(preds[0][0]).cpu().detach().squeeze() 
    
    final_heatmap = np.array(
        Image.fromarray(heatmap_small.numpy())
        .resize(original_size, resample=Image.Resampling.BILINEAR)
    )
    
    # print(f"shape of final_heatmap : {final_heatmap.shape}")

    # 0-1 정규화
    if final_heatmap.max() > 0:
        final_heatmap = (final_heatmap - final_heatmap.min()) / (final_heatmap.max() - final_heatmap.min())
        # gamma, epsilon은 외부 변수를 사용하므로 함수 인자로 받거나 전역 변수여야 합니다.
        # 여기서는 코드 맥락상 전역 변수 gamma, epsilon을 사용한다고 가정합니다.
        final_heatmap = final_heatmap
        
    return final_heatmap


Imports and Model Loading Completed.


In [3]:
def load_ground_truth(gt_path):
    """
    Load and process ground truth image
    Args:
        gt_path (str): Path to the ground truth image
    Returns:
        torch.Tensor: Processed ground truth tensor normalized to [0, 1]
    """
    try:
        # Load the ground truth image
        gt_img = Image.open(gt_path)
        
        # Convert to grayscale if image is RGB
        if gt_img.mode == 'RGB':
            gt_img = gt_img.convert('L')
        
        # Convert to tensor
        gt_tensor = transforms.ToTensor()(gt_img).squeeze(0)
        
        # Normalize to [0, 1]
        if gt_tensor.max() > 0:
            gt_tensor = (gt_tensor - gt_tensor.min()) / (gt_tensor.max() - gt_tensor.min())
        
        return gt_tensor
        
    except Exception as e:
        print(f"⚠️ Failed to load ground truth image: {str(e)}")
        return None

def create_heatmap_from_dots_v2(image_size, dots):
    """
    Create a heatmap from dot coordinates using Gaussian kernels with dynamic sigma.
    Args:
        image_size (tuple): Size of the image (height, width)
        dots (list): List of dot coordinates [x, y]
    Returns:
        torch.Tensor: Heatmap tensor
    """
    height, width = image_size

    # Dynamic sigma based on image dimensions (simple linear scaling)
    base_size = 640  # Reference size
    base_sigma = 60
    scale_factor = ((height + width) / 2) / base_size
    sigma = int( base_sigma * scale_factor)
    heatmap = torch.zeros((height, width))
    for dot in dots:
        # Convert coordinates to integers
        x, y = map(int, dot)
        # Ensure coordinates are within image bounds
        x = max(0, min(x, width-1))
        y = max(0, min(y, height-1))
        # Create coordinate grids for the entire image
        y_grid, x_grid = torch.meshgrid(
            torch.arange(height, dtype=torch.float32),
            torch.arange(width, dtype=torch.float32),
            indexing='ij'
        )
        # Calculate Gaussian values centered at the dot
        gaussian = torch.exp(
            -((x_grid - x)**2 + (y_grid - y)**2) / (2 * sigma**2)
        )
        # Add to heatmap
        heatmap += gaussian
    # Normalize heatmap
    if heatmap.max() > 0:
        heatmap = (heatmap - heatmap.min()) / (heatmap.max() - heatmap.min() + 1e-10)
    return heatmap

def draw_dots_on_image( image_path, dots, gt_path, action, exo_path=None, exo_type=None, output_path=None):
    """
    Draw dots and create heatmap, save results side by side with GT
    Args:
        image_path (str): Path to the ego image
        dots (list): List of dot coordinates [x, y]
        gt_path (str): Path to the ground truth image
        action (str): Action name for the filename
        exo_path (str, optional): Path to the exo image (if provided, creates 3x2 layout)
        exo_type (str, optional): Type of exo image ('random' or 'selected')
        output_path (str, optional): Path to save the result image
    Returns:
        str: Path to the saved image
        torch.Tensor: Generated heatmap for metric calculation
    """
    # Load the ego image
    ego_img = Image.open(image_path)
    if exo_path is not None:
        exo_file_name = os.path.basename(exo_path)
    width, height = ego_img.size
    
    # Load exo image if provided
    exo_img = None
    if exo_path:
        exo_img = Image.open(exo_path)
    
    # Create heatmap from dots
    heatmap_tensor = create_heatmap_from_dots_v2((height, width), dots)
    
    # Convert heatmap to RGB image
    heatmap_img = transforms.ToPILImage()(heatmap_tensor.unsqueeze(0).repeat(3, 1, 1))
    
    # Create a copy for dot drawing
    dot_img = draw_dots_on_single_image(ego_img, dots, color='red', radius=15)
    
    # Determine layout based on image aspect ratio
    aspect_ratio = width / height
    
    # For very wide images (aspect ratio > 2), adjust font size based on width
    if aspect_ratio > 2:
        font_size = min(50, width // 12)  # Larger font for wide images
        header_height = 110  # Increased header height
        spacing = 30  # Normal spacing
    elif aspect_ratio > 1.5:  # For moderately wide images
        font_size = min(55, width // 10)  # Larger font for moderately wide images
        header_height = 120  # Increased header height
        spacing = 35  # Slightly increased spacing
    else:
        font_size = max(60, width // 8)  # Largest font size for normal images
        header_height = 130  # Normal header height
        spacing = 40  # Normal spacing
    
    # Create a new image with 3x2 layout
    combined_width = width * 3
    combined_height = height * 2 + header_height * 2 + spacing * 3 + 40  # Dynamic height
    combined_img = Image.new('RGB', (combined_width, combined_height), 'white')
    
    # Try to load fonts (size proportional to image width and aspect ratio)
    try:
        # Try to load a font that supports Korean
        font = ImageFont.truetype("/usr/share/fonts/truetype/nanum/NanumGothic.ttf", font_size)
    except:
        try:
            # Fallback to DejaVu font
            font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", font_size)
        except:
            # Last resort: default font
            font = ImageFont.load_default()

    # Get file names
    ego_filename = os.path.basename(image_path)
    gt_filename = os.path.basename(gt_path) if gt_path else "No GT"
    
    # Draw file names and titles for 3x2 layout
    draw = ImageDraw.Draw(combined_img)
    
    # Configure headers based on whether exo image is provided
    if exo_img:
        exo_filename = os.path.basename(exo_path)
        # Top row headers for exo version
        top_headers = [
            ("Ego", ego_filename),
            ("Exo", exo_filename),
            ("", "")  # Empty space
        ]
    else:
        # Top row headers for ego only version
        top_headers = [
            ("Original", ego_filename),
            ("", ""),  # Empty space
            ("", "")   # Empty space
        ]
    
    # Bottom row headers (same for both versions)
    bottom_headers = [
        ("Dots", action+"_"+ego_filename),
        ("Heatmap", action+"_"+ ego_filename),
        ("GT", action+"_"+gt_filename)
    ]
    
    # Draw top row headers with background
    for idx, (title, filename) in enumerate(top_headers):
        if title:  # Only draw if not empty
            section_width = width
            section_x = idx * section_width
            
            # Draw white background for text area
            draw.rectangle([section_x, 0, section_x + section_width, header_height], fill='white', outline='lightgray')
            
            # Draw title
            title_width = draw.textlength(title, font=font)
            title_x = section_x + (section_width - title_width) // 2
            draw.text((title_x, 5), title, fill='black', font=font)
            
            # Draw filename (truncate if too long)
            max_filename_width = section_width - 20
            filename_truncated = filename
            while draw.textlength(filename_truncated + "...", font=font) > max_filename_width and len(filename_truncated) > 0:
                filename_truncated = filename_truncated[:-1]
            if filename_truncated != filename:
                filename_truncated += "..."
            
            filename_width = draw.textlength(filename_truncated, font=font)
            filename_x = section_x + (section_width - filename_width) // 2
            draw.text((filename_x, header_height // 2 + 5), filename_truncated, fill='black', font=font)
    
    # Draw bottom row headers with background
    for idx, (title, filename) in enumerate(bottom_headers):
        section_width = width
        section_x = idx * section_width
        section_y = height + header_height + spacing  # Position below first row
        
        # Draw white background for text area
        draw.rectangle([section_x, section_y - 10, section_x + section_width, section_y + header_height], fill='white', outline='lightgray')
        
        # Draw title
        title_width = draw.textlength(title, font=font)
        title_x = section_x + (section_width - title_width) // 2
        draw.text((title_x, section_y), title, fill='black', font=font)
        
        # Draw filename (truncate if too long)
        max_filename_width = section_width - 20
        filename_truncated = filename
        while draw.textlength(filename_truncated + "...", font=font) > max_filename_width and len(filename_truncated) > 0:
            filename_truncated = filename_truncated[:-1]
        if filename_truncated != filename:
            filename_truncated += "..."
        
        filename_width = draw.textlength(filename_truncated, font=font)
        filename_x = section_x + (section_width - filename_width) // 2
        draw.text((filename_x, section_y + header_height // 2), filename_truncated, fill='black', font=font)
    
    # Paste images in 3x2 layout
    # Top row: Ego image and optionally Exo image
    top_image_y = header_height + spacing
    combined_img.paste(ego_img, (0, top_image_y))  # Ego
    if exo_img:
        combined_img.paste(exo_img, (width, top_image_y))  # Exo
    
    # Bottom row: Dots, Heatmap, GT
    bottom_image_y = height + header_height * 2 + spacing * 2
    combined_img.paste(dot_img, (0, bottom_image_y))  # Image with dots
    combined_img.paste(heatmap_img, (width, bottom_image_y))  # Heatmap
    
    # Add GT image and calculate metrics
    gt_map = load_ground_truth(gt_path)
    metrics_text = "No GT provided"
    
    if gt_map is not None:
        if isinstance(gt_map, torch.Tensor):
            gt_img = transforms.ToPILImage()(gt_map.unsqueeze(0).repeat(3, 1, 1))
        else:
            gt_map_tensor = torch.tensor(gt_map)
            gt_img = transforms.ToPILImage()(gt_map_tensor.unsqueeze(0).repeat(3, 1, 1))
        combined_img.paste(gt_img, (width * 2, bottom_image_y))  # GT heatmap
        
        # Calculate metrics
        metrics = calculate_metrics(heatmap_tensor, gt_map)
        metrics_text = f"KLD: {metrics['KLD']:.4f} | SIM: {metrics['SIM']:.4f} | NSS: {metrics['NSS']:.4f}"
    else:
        # If no GT provided, create blank white image
        blank_img = Image.new('RGB', (width, height), 'white')
        combined_img.paste(blank_img, (width * 2, bottom_image_y))
        metrics_text = "ERRRR"
    
    # Draw metrics text at the bottom with background
    text_width = draw.textlength(metrics_text, font=font)
    text_x = (combined_width - text_width) // 2
    text_y = bottom_image_y + height + spacing
    
    # Draw white background for metrics text
    padding = 10
    draw.rectangle([text_x - padding, text_y - padding, 
                    text_x + text_width + padding, text_y + font_size + padding], 
                    fill='white', outline='gray')
    
    draw.text((text_x, text_y), metrics_text, fill='black', font=font)
    
    # Create res_images directory if it doesn't exist
    script_dir = os.path.dirname(os.path.abspath(current_dir ))
    if exo_type is None:
        res_dir = os.path.join(script_dir, f'dot_images')
        os.makedirs(res_dir, exist_ok=True)
        os.makedirs(os.path.join(res_dir, "with_exo"), exist_ok=True)
        os.makedirs(os.path.join(res_dir, "only_ego"), exist_ok=True)
    else:
        res_dir = os.path.join(script_dir, f'dot_images_{exo_type}')
        os.makedirs(res_dir, exist_ok=True)
        os.makedirs(os.path.join(res_dir, "with_exo"), exist_ok=True)
        os.makedirs(os.path.join(res_dir, f"{exo_type}"), exist_ok=True)
        os.makedirs(os.path.join(res_dir, "only_ego"), exist_ok=True)            
        
    # Generate output path if not provided
    if output_path is None:
        base_name = os.path.splitext(ego_filename)[0]
        ext = os.path.splitext(ego_filename)[1]
        if exo_img and exo_type:
            # Format: skis_002829_jump_exo_random.jpg or skis_002829_jump_exo_selected.jpg
            output_filename = f"{base_name}_{action}_exo_{exo_type}{ext}"
            output_path = os.path.join(res_dir, f"with_exo/{output_filename}")
        elif exo_img:
            # Fallback if exo_type not specified
            output_filename = f"{base_name}_{action}_exo_{exo_file_name}"
            output_path = os.path.join(res_dir, f"with_exo/{output_filename}")
        elif exo_type is not None:
            output_filename = f"{base_name}_{action}_exo_{exo_type}{ext}"
            output_path = os.path.join(res_dir, f"{exo_type}/{output_filename}")
        else:
            # Format: skis_002829_jump.jpg
            output_filename = f"{base_name}_{action}{ext}"
            output_path = os.path.join(res_dir, f"only_ego/{output_filename}")
    
    # Save the combined image
    combined_img.save(output_path)
    # print(f"✅ Saved comparison image with heatmap and GT: {output_path}")
    
    return output_path, heatmap_tensor

def draw_dots_on_single_image( image, dots, color='red', radius=15):
    """
    Draw dots on an image
    Args:
        image (PIL.Image): Image to draw on
        dots (list): List of dot coordinates [x, y]
        color (str): Color of the dots
        radius (int): Radius of the dots
    Returns:
        PIL.Image: Image with dots drawn
    """
    img_copy = image.copy()
    draw = ImageDraw.Draw(img_copy)
    
    for dot in dots:
        x, y = map(int, dot)
        # Draw circle
        draw.ellipse([x-radius, y-radius, x+radius, y+radius], 
                    fill=color, outline=color)
    
    return img_copy


def calculate_metrics(pred_heatmap, gt_map):
    """
    Calculate comparison metrics between predicted heatmap and GT (following original metric.py)
    Args:
        pred_heatmap (torch.Tensor): Predicted heatmap
        gt_map (torch.Tensor): Ground truth map
    Returns:
        dict: Dictionary containing KLD, SIM, and NSS metrics
    """
    # Ensure inputs are proper tensors
    if not isinstance(pred_heatmap, torch.Tensor):
        pred_heatmap = torch.tensor(pred_heatmap)
    if not isinstance(gt_map, torch.Tensor):
        gt_map = torch.tensor(gt_map)
    
    # Flatten tensors and add batch dimension for compatibility
    pred = pred_heatmap.flatten().float().unsqueeze(0)  # [1, H*W]
    gt = gt_map.flatten().float().unsqueeze(0)          # [1, H*W]
    
    eps = 1e-10
    
    # Calculate KLD following original implementation
    # Normalize to probability distributions
    pred_norm = pred / pred.sum(dim=1, keepdim=True)
    gt_norm = gt / gt.sum(dim=1, keepdim=True)
    pred_norm += eps
    kld = F.kl_div(pred_norm.log(), gt_norm, reduction="batchmean").item()
    
    # Calculate SIM following original implementation
    pred_sim = pred / pred.sum(dim=1, keepdim=True)
    gt_sim = gt / gt.sum(dim=1, keepdim=True)
    sim = torch.minimum(pred_sim, gt_sim).sum().item() / len(pred_sim)
    
    # Calculate NSS following original implementation
    # First normalize by max values
    pred_nss = pred / pred.max(dim=1, keepdim=True).values
    gt_nss = gt / gt.max(dim=1, keepdim=True).values
    
    # Calculate z-score for prediction
    std = pred_nss.std(dim=1, keepdim=True)
    u = pred_nss.mean(dim=1, keepdim=True)
    smap = (pred_nss - u) / (std + eps)
    
    # Create fixation map from GT
    fixation_map = (gt_nss - torch.min(gt_nss, dim=1, keepdim=True).values) / (
        torch.max(gt_nss, dim=1, keepdim=True).values - torch.min(gt_nss, dim=1, keepdim=True).values + eps)
    fixation_map = (fixation_map >= 0.1).float()
    
    # Calculate NSS
    nss_values = smap * fixation_map
    nss = nss_values.sum(dim=1) / (fixation_map.sum(dim=1) + eps)
    nss = nss.mean().item()
    
    return {
        'KLD': kld,
        'SIM': sim,
        'NSS': nss
    }

In [4]:
import cv2
import numpy as np
import matplotlib.pyplot as plt

def recover_heatmap_data(image_path, target_size=(31, 31)):
    # 1. 이미지 로드 (BGR 형태)
    img_bgr = cv2.imread(image_path)
    
    if img_bgr is None:
        print(f"❌ 파일을 찾을 수 없습니다: {image_path}")
        return None

    # 2. 빨간색 네모 부분 찾기 (마스크 생성)
    # BGR 기준 빨간색 범위: Blue/Green은 낮고, Red는 높은 영역
    lower_red = np.array([0, 0, 150])   # 진한 빨강 ~ 
    upper_red = np.array([100, 100, 255]) # 밝은 빨강
    
    red_mask = cv2.inRange(img_bgr, lower_red, upper_red)
    
    # 마스크를 살짝 넓혀서 네모의 경계선까지 확실하게 포함
    kernel = np.ones((3,3), np.uint8)
    red_mask = cv2.dilate(red_mask, kernel, iterations=1)

    # 3. 인페인팅 (Inpainting): 빨간 부분을 주변 색으로 덮어쓰기
    # 주변 픽셀값을 참조하여 자연스럽게 메꿉니다.
    img_clean = cv2.inpaint(img_bgr, red_mask, inpaintRadius=3, flags=cv2.INPAINT_TELEA)

    # 4. 31x31 크기로 리사이즈 (Downsampling)
    # 이미지가 클 경우, 정보를 압축하는 INTER_AREA 방식이 가장 좋습니다.
    img_resized = cv2.resize(img_clean, target_size, interpolation=cv2.INTER_AREA)

    # 5. 점수(Score)로 변환
    # BGR -> Grayscale 변환
    img_gray = cv2.cvtColor(img_resized, cv2.COLOR_BGR2GRAY)
    
    # 'gray_r' (0=흰색, 1=검정색)을 사용했다고 가정하고 역산
    # 검정색(0)에 가까울수록 높은 점수(1.0)
    # 흰색(255)에 가까울수록 낮은 점수(0.0)
    heatmap_scores = 1.0 - (img_gray.astype(float) / 255.0)
    # 상단 1행 (0번째 인덱스)
    heatmap_scores[0, :] = 0.0
    
    # 하단 1행 (마지막 인덱스)
    heatmap_scores[-1, :] = 0.0
        
    # 좌측 1열 (0번째 인덱스)
    heatmap_scores[:, 0] = 0.0
    
    # 우측 1열 (마지막 인덱스)
    heatmap_scores[:, -1] = 0.0
    return heatmap_scores


In [6]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.ndimage import center_of_mass, gaussian_filter

# --- 0. 사용자 데이터 준비 (가정) ---
# 사용자의 코드 흐름에 따르면 final_heatmap_image는 이미 계산된 상태입니다.
# 여기서는 테스트를 위해 임시 데이터를 생성하지만, 실제로는 이 부분을 건너뛰고 
# 가지고 계신 final_heatmap_image 변수를 그대로 사용하시면 됩니다.
# (테스트용: 흩어진 히트맵 생성)
temp_grid = np.zeros((500, 200))
temp_grid[50:150, 50:150] = 1  # 위쪽 덩어리
temp_grid[350:450, 50:150] = 1 # 아래쪽 덩어리 (약간 더 큼)
temp_grid = gaussian_filter(temp_grid, sigma=20) # 부드럽게
final_heatmap_image = temp_grid / temp_grid.max() * 255
# ----------------------------------


def concentrate_heatmap_to_com(heatmap_img, strength=0.5, smooth_sigma=5):
    """
    히트맵을 가중 평균 중심(Center of Mass)으로 모으는 함수
    
    :param heatmap_img: 원본 히트맵 (2D numpy array)
    :param strength: 집중 강도 (0.0 ~ 1.0). 1.0에 가까울수록 한 점으로 모입니다.
    :param smooth_sigma: 결과물의 구멍을 메꾸기 위한 스무딩 강도
    """
    height, width = heatmap_img.shape
    
    # 1. 가중 평균 중심 (Center of Mass) 계산
    # 히트맵의 밝기(값)가 '무게' 역할을 합니다.
    cy, cx = center_of_mass(heatmap_img)
    print(f"가중 중심 좌표: (x={cx:.1f}, y={cy:.1f})")

    # 2. 좌표 그리드 생성 (모든 픽셀의 위치)
    y_indices, x_indices = np.indices((height, width))
    
    # 3. 이동할 벡터 계산 (현재 위치 -> 중심점)
    # 각 픽셀 위치에서 중심점까지의 거리를 계산합니다.
    vector_y = cy - y_indices
    vector_x = cx - x_indices
    
    # 4. 새로운 좌표 계산 (선형 보간)
    # strength만큼 중심 쪽으로 당겨옵니다.
    new_y = y_indices + vector_y * strength
    new_x = x_indices + vector_x * strength
    
    # 5. 좌표값 정수 변환 및 경계 처리 (이미지 밖으로 나가지 않게)
    new_y = np.clip(new_y, 0, height - 1).astype(int)
    new_x = np.clip(new_x, 0, width - 1).astype(int)
    
    # 6. 값 누적 (Accumulation)
    # 이동된 좌표에 기존 히트맵의 값을 더해줍니다. 
    # (겹치는 부분은 값이 더 커져서 '더 뜨거운' 색상이 됩니다)
    concentrated_map = np.zeros_like(heatmap_img)
    
    # np.add.at은 인덱스가 중복될 경우 값을 덮어쓰지 않고 '더해줍니다'
    # 원본 값을 그대로 가져와서 이동시킵니다.
    np.add.at(concentrated_map, (new_y.flatten(), new_x.flatten()), heatmap_img.flatten())

    # 7. 스무딩 (Interpolation)
    # 픽셀 이동으로 인해 생길 수 있는 빈 공간(격자 무늬 등)을 메꿔줍니다.
    final_result = gaussian_filter(concentrated_map, sigma=smooth_sigma)
    
    # 8. 정규화 (선택 사항: 다시 0~255 범위로 맞춤)
    if final_result.max() > 0:
        final_result = final_result / final_result.max() * 255
        
    return final_result



In [7]:
df_fin = pd.read_pickle('/home/bongo/porter_notebook/research/qwen3/APM_dot_verifying/test_verify_qwen3_32b.pkl').sort_values(['object','action']).reset_index(drop=True)
df_fin.loc[df_fin['final_dot'].apply(lambda x : len(x))==0,'final_dot']= df_fin.loc[df_fin['final_dot'].apply(lambda x : len(x))==0,'dots']
df_fin

Unnamed: 0,action,object,filename,dots,veri_result,veri_reason,final_dot
0,cut,apple,apple_000054.jpg,"[[276, 480], [598, 647], [300, 318]]","[Fail, Fail, Fail]","[The query point (276,480) is located on the s...","[[276, 480], [598, 647], [300, 318]]"
1,eat,apple,apple_001541.jpg,"[[500, 460], [350, 400], [700, 450]]","[Pass, Pass, Pass]","[The query point (500,460) lies on the surface...","[[500, 460], [350, 400], [700, 450]]"
2,peel,apple,apple_001541.jpg,"[[500, 400], [350, 350], [700, 350]]","[Pass, Pass, Pass]","[The query point (500,400) lies on the surface...","[[500, 400], [350, 350], [700, 350]]"
3,hit,axe,axe_000961.jpg,"[[150, 250], [380, 150], [620, 580]]","[Pass, Pass, Fail]","[The query point (150,250) lies on the metalli...","[[150, 250], [380, 150]]"
4,hold,axe,axe_001552.jpg,"[[642, 355], [500, 490], [330, 590]]","[Pass, Pass, Fail]","[The query point (642,355) lies on the handle ...","[[642, 355], [500, 490]]"
...,...,...,...,...,...,...,...
116,drink_with,wine_glass,wine_glass_003343.jpg,"[[645, 584], [630, 323], [635, 820]]","[Pass, Pass, Fail]","[The query point (645,584) lies within the bow...","[[645, 584], [630, 323]]"
117,hold,wine_glass,wine_glass_002374.jpg,"[[500, 700], [500, 850], [500, 550]]","[Fail, Fail, Fail]","[The query point (500,700) is located in the b...","[[500, 700], [500, 850], [500, 550]]"
118,pour,wine_glass,wine_glass_000186.jpg,"[[500, 175], [500, 475], [500, 725]]","[Pass, Pass, Fail]","[The query point (500,175) is located within t...","[[500, 175], [500, 475]]"
119,sip,wine_glass,wine_glass_003343.jpg,"[[645, 108], [645, 500], [645, 810]]","[Fail, Pass, Fail]","[The query point (645,108) is located near the...","[[645, 500]]"


In [19]:
import os
metrics_tracker = MetricsTracker(name="only_ego")
OUTPUT_DIR = "./clipseg_att"
OUTPUT_DIR_PAIRS = os.path.join(OUTPUT_DIR, "pairs")
ORGANIZED_DIR = os.path.join(OUTPUT_DIR, "results")


# 1. 상위 디렉토리 생성
print(f"디렉토리 생성 시도: {OUTPUT_DIR}")
os.makedirs(OUTPUT_DIR, exist_ok=True)

# 2. 하위 디렉토리 생성
print(f"디렉토리 생성 시도: {OUTPUT_DIR_PAIRS}")
os.makedirs(OUTPUT_DIR_PAIRS, exist_ok=True)

print(f"디렉토리 생성 시도: {ORGANIZED_DIR}")
os.makedirs(ORGANIZED_DIR, exist_ok=True)

print("\n✅ 모든 출력 디렉토리 확인 및 생성 완료.")

# Get total number of samples
total_samples = len(df_fin)
metrics_tracker_ego = MetricsTracker(name="only_ego")
# Process each sample
print(f"Processing {total_samples} samples...")
print("=" * 50)    
for idx, row in df_fin.iterrows():
    object_name = row['object']
    action = row['action']
    filename = row['filename']
    dot_list =  row['dots']
    image_path = f"{AGD20K_PATH}/Seen/testset/egocentric/{action}/{object_name}/{filename}"
    original_image = cv2.imread(image_path)
    original_h, original_w, _ = original_image.shape
    attention_file_path = f"/home/bongo/porter_notebook/research/qwen3/APM_dot_verifying/QWEN3_32B_top_attention_heads/{action}/{object_name}/{filename}_495_115_rank01_L01_H10_score_1.0000.png"

    gt_path = get_gt_path(image_path) 
    image_name = image_path.split('/')[-1]

    print(f"Action : {action}, Object : {object_name} image_name : {image_name}")

    final_dots = row['final_dot']
    total_heatmap_sum = np.zeros((original_h, original_w), dtype=np.float32)
    for x, y in final_dots:

        matching_files = []
        base_pattern = (
            f"/home/bongo/porter_notebook/research/qwen3/APM_dot_verifying/"
            f"QWEN3_32B_top_attention_heads/{action}/{object_name}/{filename}_{x}_{y}_rank"
        )
        # 결과 출력
        # 가져오고 싶은 rank의 최대 숫자 (예: 5로 설정하면 rank01, rank02, ..., rank05만 가져옵니다.)
        use_k = 5
        # rank01 부터 rank{k} 까지 순회하며 정확한 파일 이름을 검색합니다.
        for i in range(1, use_k + 1):
            # i를 두 자리 숫자로 포매팅합니다 (예: 1 -> "01", 10 -> "10")
            rank_number = f"{i:02d}"
            
            # 전체 파일 경로를 생성합니다.
            file_pattern = f"{base_pattern}{rank_number}*.png"
            found_files = glob.glob(file_pattern)
            matching_files.extend(found_files)
        if len(matching_files) != use_k:
            raise Exception(f"에러!! 발견된 파일 목록 (총 {len(matching_files)}개) - {use_k}개가 왜 안되지??:")
        
        for i, file_path in enumerate(matching_files):
            # 3. 히트맵 데이터 복구
            patch_data = recover_heatmap_data(file_path)
            # if i == 0:
            #     # 첫 번째 배열은 최종 배열의 초기값으로 설정합니다.
            #     final_array = patch_data
            # else:
            #     final_array = final_array * patch_data 
            patch_data_upsampled = cv2.resize(
                                        patch_data, 
                                        (original_w, original_h), 
                                        interpolation=cv2.INTER_LINEAR
                                    )

            # 3. [변경] 업샘플링된 맵끼리 더히가!!! 곱하기 (Hadamard Product)
            if i == 0:
                final_array = patch_data_upsampled
            else:
                final_array = final_array + patch_data_upsampled

        if final_array is None:
            raise Exception(f"에러!! final_array 가 없음!!")
        # 2. 업샘플링 (Resize)
        # final_array를 0~1 사이 값으로 정규화 (최대값이 1이 되도록)
        heatmap_normalized = final_array / final_array.max()


        # cv2.resize를 사용하여 원본 이미지 크기(original_w, original_h)로 확대
        # Interpolation: INTER_LINEAR 또는 INTER_CUBIC 사용 권장
        # heatmap_upsampled = cv2.resize(
        #     heatmap_normalized, 
        #     (original_w, original_h), 
        #     interpolation=cv2.INTER_LINEAR
        # )
        total_heatmap_sum += heatmap_normalized # heatmap_upsampled
        # print(f"  > Upsampled heatmap size: {heatmap_upsampled.shape}")
        # 3. 마스크 생성 및 오버레이

        # 히트맵을 0-255 범위의 uint8 타입으로 변환
        # (히트맵이 0~1 사이의 부동소수점 값이라고 가정)
        # heatmap_255 = np.uint8(255 * heatmap_upsampled)
        heatmap_255 = np.uint8(255 * heatmap_normalized)

        # 히트맵을 컬러맵으로 변환 (예: JET 컬러맵)
        # 1채널 히트맵(회색조)을 3채널 컬러 이미지로 변환
        heatmap_color = cv2.applyColorMap(heatmap_255, cv2.COLORMAP_JET)

        # 이미지 오버레이 (가중치 합산)
        # heatmap_color는 0-255, image도 0-255입니다.
        # cv2.addWeighted(src1, alpha, src2, beta, gamma)
        # 최종 이미지 = src1 * alpha + src2 * beta + gamma
        # 여기서는 원본 이미지(image) 60% + 히트맵(heatmap_color) 40%로 혼합
        alpha = 0.6  # 원본 이미지 투명도
        beta = 0.4   # 히트맵 투명도
        
        # 이미지는 BGR 포맷으로 가정 (cv2 기본)
        overlayed_image = cv2.addWeighted(original_image, alpha, heatmap_color, beta, 0)

        # # 시각화된 결과 저장 (옵션)
        # dots_output_filename = f"{object_name}_{action}_{filename.replace('.jpg', '')}_{x}_{y}_att_overlay.png"
        # dots_output_path = os.path.join(OUTPUT_DIR,'each_dots', dots_output_filename)
        # cv2.imwrite(dots_output_path, overlayed_image)
        # print(f"  > ✅ 시각화 결과 저장: {output_path}")
    epsilon = 0.1
    final_sum_normalized = total_heatmap_sum / total_heatmap_sum.max()
    final_sum_normalized = concentrate_heatmap_to_com(final_sum_normalized, strength=0.5, smooth_sigma=3)




    # final_sum_normalized += + epsilon 
    # 2. 컬러맵 적용
    final_sum_255 = np.uint8(255 * final_sum_normalized)

    final_sum_color = cv2.applyColorMap(final_sum_255, cv2.COLORMAP_JET)

    # 3. 원본 이미지와 오버레이
    # (image는 BGR 포맷의 원본 이미지)
    alpha = 0.6  # 원본 이미지 투명도
    beta = 0.4   # 히트맵 투명도
    final_overlayed_image = cv2.addWeighted(original_image, alpha, final_sum_color, beta, 0)
    

    clip_heatmap = get_clipseg_heatmap(image_path,clip_model,clip_processor,object_name)
    expanded_clip_heatmap = np.expand_dims(clip_heatmap, axis=-1)

    final_heatmap_image = final_sum_color * expanded_clip_heatmap
    final_heatmap_image = final_heatmap_image / final_heatmap_image.max() *255

    ## centering
    # strength 값 조절: 0.3(살짝 모음) ~ 0.7(많이 모음)
    
    # final_sum_color_resized = cv2.resize(final_sum_color, (original_w, original_h))


    # 4. 파일 저장
    pairs_output_filename = f"{object_name}_{action}_{filename.replace('.jpg', '')}_att_overlay.png"
    pair_output_path = os.path.join(OUTPUT_DIR,'pairs', pairs_output_filename)
    cv2.imwrite(pair_output_path, final_heatmap_image)


    gt_map = load_ground_truth(gt_path)

    metrics_clipseg  = calculate_metrics(final_sum_normalized, gt_map)
    metrics_tracker.update(metrics_clipseg)
    metrics_tracker.print_metrics(metrics_clipseg, filename)
    
    metrics_text = f"[{object_name} {action} {filename}]  KLD: {metrics_clipseg['KLD']:.4f} | SIM: {metrics_clipseg['SIM']:.4f} | NSS: {metrics_clipseg['NSS']:.4f}"
    # print(metrics_text)

    # --- 4. 결과 시각화 ---
    # ✨ 레이아웃을 1x4에서 1x5로 변경하고, figsize을 조정합니다.
    fig, ax = plt.subplots(1, 5, figsize=(25, 5))
    fig.suptitle(metrics_text, fontsize=20, fontweight='bold', y=0.98)
    # --- Plot 1: 원본 이미지 (ax[0]) ---
    ax[0].imshow(original_image)
    ax[0].set_title('Original Image')
    ax[0].axis('off')

    # --- ✨ Plot 5: 최종 퓨전 히트맵 (기존 ax[3] -> ax[4]로 이동) ---
    ax[1].imshow(final_sum_normalized)
    ax[1].set_title('att map')
    ax[1].axis('off')
    
        
    # --- ✨ Plot 3: DINO 원본 히트맵 (새로 추가된 부분) ---
    # 이 dino_attention_heatmap 변수는 클러스터링 전에 미리 계산해 두어야 합니다.
    # (예: dino_attention_heatmap = generate_dino_heatmap(original_image_path, dino_model) )
    ax[2].imshow(original_image)
    ax[2].imshow(clip_heatmap, cmap='jet', alpha=0.5)
    ax[2].set_title('clip_heatmap')
    ax[2].axis('off')

    final_heatmap_image_rgb = cv2.cvtColor(final_heatmap_image.astype(np.uint8), cv2.COLOR_BGR2RGB)
    # --- Plot 2: dot 히트맵 (ax[1]) ---
    ax[3].imshow(original_image)
    ax[3].imshow(final_heatmap_image_rgb, cmap='gray', alpha=0.5)
    ax[3].set_title('result')
    ax[3].axis('off')

    # --- ✨ Plot 5: 최종 퓨전 히트맵 (기존 ax[3] -> ax[4]로 이동) ---
    ax[4].imshow(original_image)
    ax[4].imshow(gt_map, cmap='jet', alpha=0.5)
    ax[4].set_title('GT')
    ax[4].axis('off')
#     전체 레이아웃 정리 및 출력
    # plt.tight_layout()

    organized_output_filename = f"{object_name}_{action}_{filename.replace('.jpg', '')}.png"
    organized_output_path = os.path.join(ORGANIZED_DIR, organized_output_filename)
    plt.savefig(organized_output_path)
    plt.close(fig)
    # plt.show()
    # print(a)


디렉토리 생성 시도: ./clipseg_att
디렉토리 생성 시도: ./clipseg_att/pairs
디렉토리 생성 시도: ./clipseg_att/results

✅ 모든 출력 디렉토리 확인 및 생성 완료.
Processing 121 samples...
Action : cut, Object : apple image_name : apple_000054.jpg
가중 중심 좌표: (x=317.5, y=240.7)


  return self.preprocess(images, **kwargs)



Metrics for only_ego apple_000054.jpg:
 only_ego Current - KLD: 4.6495 | SIM: 0.4610 | NSS: 0.4866

Cumulative only_ego  Averages over 1 samples:
Average - KLD: 4.6495 | SIM: 0.4610 | NSS: 0.4866

Action : eat, Object : apple image_name : apple_001541.jpg
가중 중심 좌표: (x=263.3, y=317.6)

Metrics for only_ego apple_001541.jpg:
 only_ego Current - KLD: 2.4263 | SIM: 0.6223 | NSS: 0.9137

Cumulative only_ego  Averages over 2 samples:
Average - KLD: 3.5379 | SIM: 0.5416 | NSS: 0.7001

Action : peel, Object : apple image_name : apple_001541.jpg
가중 중심 좌표: (x=252.9, y=339.6)

Metrics for only_ego apple_001541.jpg:
 only_ego Current - KLD: 2.8983 | SIM: 0.5807 | NSS: 0.8079

Cumulative only_ego  Averages over 3 samples:
Average - KLD: 3.3247 | SIM: 0.5546 | NSS: 0.7360

Action : hit, Object : axe image_name : axe_000961.jpg
가중 중심 좌표: (x=462.0, y=447.3)

Metrics for only_ego axe_000961.jpg:
 only_ego Current - KLD: 3.8864 | SIM: 0.3689 | NSS: 0.8089

Cumulative only_ego  Averages over 4 samples:


In [None]:
w Epsilon :::  Average - KLD: 1.7037 | SIM: 0.2611 | NSS: 0.7943
wo Epsilon :::  Average - KLD: 1.7630 | SIM: 0.2836 | NSS: 0.7943
centered : Average - KLD: 1.7630 | SIM: 0.2836 | NSS: 0.7943