In [1]:
import os
import torch
import random
from PIL import Image
import my_prompt5 as my_prompt
from file_managing import (
    load_selected_samples,
    get_actual_path,
    get_gt_path,
)
from config import AGD20K_PATH, model_name
from VLM_model_dot import QwenVLModel, MetricsTracker
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
os.environ["PYTORCH_ENABLE_SDPA"] = "1"

missing_gt = 0

  from .autonotebook import tqdm as notebook_tqdm


In [2]:


def affordance_grounding(model, action, object_name, image_path, gt_path, exo_path=None,  failed_heatmap_path=None, validation_reason=None):
    """
    Process each image using Qwen VL model
    """
    print(f"Processing image: Action: {action}, Object: {object_name}, Image path: {image_path}, GT path: {gt_path}, Image exists: {os.path.exists(image_path)}, GT exists: {os.path.exists(gt_path)}")
    

    if exo_path is None:
        prompt = my_prompt.process_image_ego_prompt(action, object_name)
               
        results = model.process_image_ego(image_path, prompt, gt_path, action)

        
    else:

        prompt = my_prompt.process_image_exo_prompt(action, object_name)
        results = model.process_image_exo(image_path, prompt, gt_path, exo_path, action)

    return results

In [3]:
    # Initialize Qwen VL model
    model = QwenVLModel(model_name = model_name)
    metrics_tracker_ego = MetricsTracker(name="only_ego")
    metrics_tracker_exo_best = MetricsTracker(name="with_exo_best")

    json_path = os.path.join("selected_samples.json")
    data = load_selected_samples(json_path)

    # Get total number of samples
    total_samples = len(data['selected_samples'])
    
    # Process each sample
    print(f"Processing {total_samples} samples...")
    print("=" * 50)    

💻 사용 디바이스: cuda
🤖 Qwen/Qwen2.5-VL-32B-Instruct 모델 로딩중...


Loading checkpoint shards: 100%|██████████| 18/18 [00:08<00:00,  2.12it/s]

✅ 모델 로딩 완료!
Processing 123 samples...





In [8]:
for pair_key, sample_info in data["selected_samples"].items():
    if( sample_info['object'] =='skis') and (  sample_info['action'] =='jump'):
        print(pair_key, sample_info )
        break
action = sample_info["action"]
object_name = sample_info["object"]


jump_skis {'action': 'jump', 'object': 'skis', 'image_path': '${AGD20K_PATH}/Seen/testset/egocentric/jump/skis/skis_002829.jpg'}


In [5]:

action = sample_info["action"]
object_name = sample_info["object"]

image_path = get_actual_path(sample_info["image_path"])
gt_path = get_gt_path(image_path)    
print(f"Action : {action}, Object : {object_name} image_name : {image_path.split('/')[-1]}")


# Process the image
prompt = my_prompt.process_image_ego_prompt_w_pred(action, object_name)
        
results = model.process_image_ego(image_path, prompt, gt_path, action)
results



Action : jump, Object : skis image_name : skis_002829.jpg


qwen ego Results!! : [
    [150, 700, 0.9],  // Midpoint of the ski for balance and control
    [180, 1200, 0.8], // Tip of the ski for directional control during takeoff
    [120, 1200, 0.8]  // Tail of the ski for stability and landing
]
No dot coordinates found, trying to parse as bounding boxes...
text : [
    [150, 700, 0.9],  // Midpoint of the ski for balance and control
    [180, 1200, 0.8], // Tip of the ski for directional control during takeoff
    [120, 1200, 0.8]  // Tail of the ski for stability and landing
]
final points :[]
parsed dots!!! : []


{'text_result': '[\n    [150, 700, 0.9],  // Midpoint of the ski for balance and control\n    [180, 1200, 0.8], // Tip of the ski for directional control during takeoff\n    [120, 1200, 0.8]  // Tail of the ski for stability and landing\n]',
 'dots': [],
 'dot_image_path': '/root/qwen_AG_new/dot_images/only_ego/skis_002829_jump.jpg',
 'dot_only_image_path': '/root/qwen_AG_new/dot_images/dots_only/skis_002829_jump_dots.jpg',
 'heatmap_image_path': '/root/qwen_AG_new/dot_images/heatmaps/skis_002829_jump_heatmap.jpg',
 'heatmap_tensor': tensor([[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]]),
 'metrics': None}

In [5]:
my_list  = [ 
"jump_skis_005545.jpg","jump_skis_013945.jpg","jump_skis_004492.jpg","jump_skis_005088.jpg","jump_skis_012922.jpg","jump_skis_009285.jpg","jump_skis_012324.jpg","jump_skis_003753.jpg","jump_skis_012808.jpg","jump_skis_004900.jpg","jump_skis_002172.jpg","jump_skis_004884.jpg","jump_skis_008520.jpg","jump_skis_000754.jpg","jump_skis_000818.jpg","jump_skis_002040.jpg","jump_skis_002294.jpg","jump_skis_002377.jpg","jump_skis_004530.jpg","jump_skis_004916.jpg","jump_skis_006213.jpg","jump_skis_006389.jpg","jump_skis_007326.jpg","jump_skis_007740.jpg","jump_skis_008793.jpg","jump_skis_011708.jpg","jump_skis_011720.jpg","jump_skis_011869.jpg","jump_skis_012044.jpg","jump_skis_013063.jpg"
]

In [9]:

import glob
import os

exo_url = f"/root/AGD20K/Seen/trainset/exocentric/{action}/{object_name}"
search_pattern = os.path.join(exo_url, "*.jpg")

# 패턴과 일치하는 모든 파일 경로 리스트를 가져옵니다.
jpg_files = glob.glob(search_pattern)
print(len(jpg_files))
jpg_files[:2]

140


['/root/AGD20K/Seen/trainset/exocentric/jump/skis/jump_skis_000084.jpg',
 '/root/AGD20K/Seen/trainset/exocentric/jump/skis/jump_skis_000397.jpg']

In [11]:
for exo_img_path in jpg_files:

    action = sample_info["action"]
    object_name = sample_info["object"]

    image_path = get_actual_path(sample_info["image_path"])
    gt_path = get_gt_path(image_path)    
    print(f"Action : {action}, Object : {object_name} image_name : {image_path.split('/')[-1]}")

    exo_best_path = exo_img_path
    exo_filename =  os.path.basename(exo_best_path)
    if exo_filename not in my_list:
        continue
    if (exo_best_path is None):
        print(f"NO SEEN DATA SET : {action}/{object_name}")

    # # Process the image
    # results_ego = affordance_grounding(model, action, object_name, image_path, gt_path)
    # metrics_ego = results_ego['metrics']
    # if metrics_ego:
    #     # Update and print metrics
    #     metrics_tracker_ego.update(metrics_ego)
    #     metrics_tracker_ego.print_metrics(metrics_ego, image_path.split('/')[-1])
        
    # with exo random
    results_exo_best = affordance_grounding(model, action, object_name, image_path, gt_path, exo_best_path)     
    metrics_exo_best = results_exo_best['metrics']

    if metrics_exo_best:
        metrics_tracker_exo_best.update(metrics_exo_best)
        metrics_tracker_exo_best.print_metrics(metrics_exo_best, image_path.split('/')[-1])
        
        
    # Count missing GT files
    if not os.path.exists(gt_path):
        missing_gt += 1

    print("*** End  ", "*"*150)
    print("\n\n")

Action : jump, Object : skis image_name : skis_002829.jpg
Action : jump, Object : skis image_name : skis_002829.jpg
Action : jump, Object : skis image_name : skis_002829.jpg
Action : jump, Object : skis image_name : skis_002829.jpg
Action : jump, Object : skis image_name : skis_002829.jpg
Action : jump, Object : skis image_name : skis_002829.jpg
Action : jump, Object : skis image_name : skis_002829.jpg
Action : jump, Object : skis image_name : skis_002829.jpg
Processing image: Action: jump, Object: skis, Image path: /root/AGD20K/Seen/testset/egocentric/jump/skis/skis_002829.jpg, GT path: /root/AGD20K/Seen/testset/GT/jump/skis/skis_002829.png, Image exists: True, GT exists: True
exo file name : jump_skis_000754.jpg / exo_path


final points :[[140, 100], [150, 700], [150, 1200], [380, 100], [390, 700], [390, 1200]]

Metrics for with_exo_best skis_002829.jpg:
 with_exo_best Current - KLD: 1.3951 | SIM: 0.2941 | NSS: 1.2060

Cumulative with_exo_best  Averages over 8 samples:
Average - KLD: 1.5075 | SIM: 0.2755 | NSS: 1.0650

*** End   ******************************************************************************************************************************************************



Action : jump, Object : skis image_name : skis_002829.jpg
Processing image: Action: jump, Object: skis, Image path: /root/AGD20K/Seen/testset/egocentric/jump/skis/skis_002829.jpg, GT path: /root/AGD20K/Seen/testset/GT/jump/skis/skis_002829.png, Image exists: True, GT exists: True
exo file name : jump_skis_000818.jpg / exo_path
final points :[[150, 100], [150, 600], [150, 1100], [390, 100], [390, 600], [390, 1100]]

Metrics for with_exo_best skis_002829.jpg:
 with_exo_best Current - KLD: 1.8208 | SIM: 0.2245 | NSS: 0.6445

Cumulat

In [12]:
import glob
import os

action = "push"
object_name = "bicycle"

exo_url = f"/root/AGD20K/Seen/trainset/exocentric/{action}/{object_name}"
search_pattern = os.path.join(exo_url, "*.jpg")

# 패턴과 일치하는 모든 파일 경로 리스트를 가져옵니다.
jpg_files = glob.glob(search_pattern)
print(len(jpg_files))
jpg_files[:2]

59


['/root/AGD20K/Seen/trainset/exocentric/push/bicycle/push_bicycle_000800.jpg',
 '/root/AGD20K/Seen/trainset/exocentric/push/bicycle/push_bicycle_000820.jpg']

In [13]:
my_list = ["push_bicycle_000820.jpg",
"push_bicycle_001280.jpg",
"push_bicycle_001798.jpg",
"push_bicycle_001822.jpg",
"push_bicycle_003570.jpg",
"push_bicycle_004366.jpg",
"push_bicycle_004740.jpg",
"push_bicycle_004885.jpg",
"push_bicycle_005388.jpg",
"push_bicycle_006220.jpg",
"push_bicycle_006504.jpg",
"push_bicycle_006549.jpg",
"push_bicycle_006768.jpg",
"push_bicycle_007122.jpg",
"push_bicycle_007511.jpg",
"push_bicycle_007828.jpg",
"push_bicycle_008083.jpg",
"push_bicycle_008897.jpg",
"push_bicycle_008917.jpg",
"push_bicycle_009558.jpg",
"push_bicycle_010074.jpg",
"push_bicycle_010251.jpg",
"push_bicycle_010286.jpg",
"push_bicycle_010959.jpg",
"push_bicycle_011146.jpg",
"push_bicycle_011700.jpg",
"push_bicycle_012603.jpg",
"push_bicycle_012840.jpg",
"push_bicycle_013392.jpg",
"push_bicycle_013710.jpg",
]

In [14]:
exo_img_path

'/root/AGD20K/Seen/trainset/exocentric/jump/skis/jump_skis_013945.jpg'

In [15]:
for pair_key, sample_info in data["selected_samples"].items():
    if( sample_info['object'] =='bicycle') and (  sample_info['action'] =='push'):
        print(pair_key, sample_info )
        break

push_bicycle {'action': 'push', 'object': 'bicycle', 'image_path': '${AGD20K_PATH}/Seen/testset/egocentric/push/bicycle/bicycle_002432.jpg'}


In [16]:
for exo_img_path in jpg_files:
    action = "push"
    object_name = "bicycle"

    image_path = get_actual_path(sample_info["image_path"])
    gt_path = get_gt_path(image_path)    
    print(f"Action : {action}, Object : {object_name} image_name : {image_path.split('/')[-1]}")

    exo_best_path = exo_img_path
    exo_filename =  os.path.basename(exo_best_path)
    if exo_filename not in my_list:
        continue
    if (exo_best_path is None):
        print(f"NO SEEN DATA SET : {action}/{object_name}")

    # # Process the image
    # results_ego = affordance_grounding(model, action, object_name, image_path, gt_path)
    # metrics_ego = results_ego['metrics']
    # if metrics_ego:
    #     # Update and print metrics
    #     metrics_tracker_ego.update(metrics_ego)
    #     metrics_tracker_ego.print_metrics(metrics_ego, image_path.split('/')[-1])
        
    # with exo random
    results_exo_best = affordance_grounding(model, action, object_name, image_path, gt_path, exo_best_path)     
    metrics_exo_best = results_exo_best['metrics']

    if metrics_exo_best:
        metrics_tracker_exo_best.update(metrics_exo_best)
        metrics_tracker_exo_best.print_metrics(metrics_exo_best, image_path.split('/')[-1])
        
        
    # Count missing GT files
    if not os.path.exists(gt_path):
        missing_gt += 1

    print("*** End  ", "*"*150)
    print("\n\n")

Action : push, Object : bicycle image_name : bicycle_002432.jpg
Action : push, Object : bicycle image_name : bicycle_002432.jpg
Processing image: Action: push, Object: bicycle, Image path: /root/AGD20K/Seen/testset/egocentric/push/bicycle/bicycle_002432.jpg, GT path: /root/AGD20K/Seen/testset/GT/push/bicycle/bicycle_002432.png, Image exists: True, GT exists: True
exo file name : push_bicycle_000820.jpg / exo_path


final points :[[430, 35], [240, 260], [470, 350]]

Metrics for with_exo_best bicycle_002432.jpg:
 with_exo_best Current - KLD: 2.0108 | SIM: 0.2434 | NSS: 1.1542

Cumulative with_exo_best  Averages over 38 samples:
Average - KLD: 1.6999 | SIM: 0.2730 | NSS: 1.0537

*** End   ******************************************************************************************************************************************************



Action : push, Object : bicycle image_name : bicycle_002432.jpg
Action : push, Object : bicycle image_name : bicycle_002432.jpg
Processing image: Action: push, Object: bicycle, Image path: /root/AGD20K/Seen/testset/egocentric/push/bicycle/bicycle_002432.jpg, GT path: /root/AGD20K/Seen/testset/GT/push/bicycle/bicycle_002432.png, Image exists: True, GT exists: True
exo file name : push_bicycle_001280.jpg / exo_path
final points :[[405, 78], [240, 260], [45, 290]]

Metrics for with_exo_best bicycle_002432.jpg:
 with_exo_best Current - KLD: 1.7954 | SIM: 0.2415 | NSS:

In [17]:
3+5

8