In [None]:
import os
import torch
import random
from PIL import Image
import my_prompt4 as my_prompt
from file_managing import (
    load_selected_samples,
    get_actual_path,
    get_gt_path,
)
from config import AGD20K_PATH, model_name
from VLM_model_dot import QwenVLModel, MetricsTracker
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
os.environ["PYTORCH_ENABLE_SDPA"] = "1"

missing_gt = 0

  from .autonotebook import tqdm as notebook_tqdm


In [4]:


def affordance_grounding(model, action, object_name, image_path, gt_path, exo_path=None, exo_type=None, failed_heatmap_path=None, validation_reason=None):
    """
    Process each image using Qwen VL model
    """
    # print(f"Processing image: Action: {action}, Object: {object_name}, Image path: {image_path.split('/')[-1]}, GT path: {gt_path.split('/')[-1]}, Image exists: {os.path.exists(image_path)}, GT exists: {os.path.exists(gt_path)}")
    

    if exo_path is None:
        prompt = my_prompt.process_image_ego_prompt(action, object_name)
               
        results = model.process_image_ego(image_path, prompt, gt_path, action, exo_type)

        
    else:
        if failed_heatmap_path is not None:
            # When we have a failed heatmap, include it in the prompt for better context
            
            prompt = my_prompt.process_image_exo_with_heatmap_prompt(action, object_name, validation_reason)
        
            results = model.process_image_exo_with_heatmap(image_path, prompt, gt_path, exo_path, failed_heatmap_path, action, exo_type)
        else:
            prompt = my_prompt.process_image_exo_prompt(action, object_name)
            results = model.process_image_exo(image_path, prompt, gt_path, exo_path, action, exo_type)

    return results

In [5]:
    # Initialize Qwen VL model
    model = QwenVLModel(model_name = model_name)
    metrics_tracker_ego = MetricsTracker(name="only_ego")
    metrics_tracker_exo_best = MetricsTracker(name="with_exo_best")

    json_path = os.path.join("selected_samples.json")
    data = load_selected_samples(json_path)

    # Get total number of samples
    total_samples = len(data['selected_samples'])
    
    # Process each sample
    print(f"Processing {total_samples} samples...")
    print("=" * 50)    

💻 사용 디바이스: cuda
🤖 Qwen/Qwen2.5-VL-3B-Instruct 모델 로딩중...


Loading checkpoint shards: 100%|██████████| 2/2 [00:01<00:00,  1.22it/s]


✅ 모델 로딩 완료!
Processing 123 samples...


In [7]:
for pair_key, sample_info in data["selected_samples"].items():
    print(pair_key, sample_info )

jump_skis {'action': 'jump', 'object': 'skis', 'image_path': '${AGD20K_PATH}/Seen/testset/egocentric/jump/skis/skis_002829.jpg'}
jump_skateboard {'action': 'jump', 'object': 'skateboard', 'image_path': '${AGD20K_PATH}/Seen/testset/egocentric/jump/skateboard/skateboard_002387.jpg'}
jump_surfboard {'action': 'jump', 'object': 'surfboard', 'image_path': '${AGD20K_PATH}/Seen/testset/egocentric/jump/surfboard/surfboard_000658.jpg'}
jump_snowboard {'action': 'jump', 'object': 'snowboard', 'image_path': '${AGD20K_PATH}/Seen/testset/egocentric/jump/snowboard/snowboard_001704.jpg'}
peel_carrot {'action': 'peel', 'object': 'carrot', 'image_path': '${AGD20K_PATH}/Seen/testset/egocentric/peel/carrot/carrot_003707.jpg'}
peel_orange {'action': 'peel', 'object': 'orange', 'image_path': '${AGD20K_PATH}/Seen/testset/egocentric/peel/orange/orange_001193.jpg'}
peel_banana {'action': 'peel', 'object': 'banana', 'image_path': '${AGD20K_PATH}/Seen/testset/egocentric/peel/banana/banana_000480.jpg'}
peel_appl

In [11]:

action = sample_info["action"]
object_name = sample_info["object"]

image_path = get_actual_path(sample_info["image_path"])
gt_path = get_gt_path(image_path)    
print(f"Action : {action}, Object : {object_name} image_name : {image_path.split('/')[-1]}")
exo_best_path = "dogs.jpg"
if  (exo_best_path is None):
    print(f"NO SEEN DATA SET : {action}/{object_name}")

# Process the image
results_ego = affordance_grounding(model, action, object_name, image_path, gt_path)
metrics_ego = results_ego['metrics']
if metrics_ego:
    # Update and print metrics
    metrics_tracker_ego.update(metrics_ego)
    metrics_tracker_ego.print_metrics(metrics_ego, image_path.split('/')[-1])
    
# with exo random
results_exo_best = affordance_grounding(model, action, object_name, image_path, gt_path, exo_best_path, "selected_exo")     
metrics_exo_best = results_exo_best['metrics']

if metrics_exo_best:
    metrics_tracker_exo_best.update(metrics_exo_best)
    metrics_tracker_exo_best.print_metrics(metrics_exo_best, image_path.split('/')[-1])
    
    
# Count missing GT files
if not os.path.exists(gt_path):
    missing_gt += 1

print("*** End  ", "*"*150)
print("\n\n")

Action : swing, Object : badminton_racket image_name : badminton_racket_003649.jpg

Metrics for only_ego badminton_racket_003649.jpg:
 only_ego Current - KLD: 1.9615 | SIM: 0.3357 | NSS: 1.5040

Cumulative only_ego  Averages over 3 samples:
Average - KLD: 1.9615 | SIM: 0.3357 | NSS: 1.5040


Metrics for with_exo_best badminton_racket_003649.jpg:
 with_exo_best Current - KLD: 9.7612 | SIM: 0.0045 | NSS: -0.3639

Cumulative with_exo_best  Averages over 3 samples:
Average - KLD: 9.7612 | SIM: 0.0045 | NSS: -0.3639

*** End   ******************************************************************************************************************************************************



