In [1]:
import os
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
from image_processor import ImageProcessor
from model_handler import LlavaNextHandler
from evaluation import  get_evaluation

MODEL_PATH = "/llava-hf/llava-v1.6-mistral-7b-hf"
DATASET_PATH = "/home/baojiedama/ROPE"
DATA_TYPE = "validation"  # 'train' or 'validation'
SETTING = "default"       # 可选"default", "single", "student-forcing", "teacher-forcing"
DATA_JSON = os.path.join(DATASET_PATH, "merged_mixed_data.json")

model_handler = LlavaNextHandler(
    model_name="llavanext",
    model_size="7b",
    model_path=MODEL_PATH,
    device_map="cuda"
)
image_processor = ImageProcessor()

# 评测类
EvalClass = get_evaluation(SETTING)
evaluator = EvalClass(model_handler, image_processor, DATASET_PATH, DATA_TYPE)

  warn(


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [5]:
import json
from PIL import Image
DATA_JSON = "/home/baojiedama/ROPE/validation/merged_mixed_data.json"
DATASET_PATH = "/home/baojiedama/ROPE"


with open(DATA_JSON, "r") as f:
    dataset = json.load(f)
# dataset通常是list或dict，适配下格式
first_entry = dataset[0] if isinstance(dataset, list) else list(dataset.values())[0][0]
# 1. 读取一条数据（merged_mixed_data.json）
print("first_entry['folder']:", first_entry["folder"])
# 2. 构造一条真实prompt
prompt = evaluator._generate_prompt(first_entry.get("data_source", ""))

# 3. 读入图像
folder = first_entry["folder"]
if folder.startswith("/"):
    folder = folder[1:]  # 去掉头部的斜杠
image_path = os.path.join(DATASET_PATH, folder).replace("jpg", "png")
print("Image path:", image_path)
image = Image.open(image_path)
processed_image = image_processor.preprocess_image(image)

# 4. 调用模型，打印输出
print("PROMPT:", prompt)
output = model_handler.generate_response(prompt, processed_image)
print("MODEL OUTPUT:", output)

# 5. 如果有后处理（如分割/正则），也可以打印
try:
    segmented = evaluator.segment_classes(output)
    print("SEGMENTED CLASSES:", segmented)
except Exception as e:
    print("SEGMENT FAILED:", e)


first_entry['folder']: /validation/image/mixed-images/bbox/ADE/ADE_val_00000285.png
Image path: /home/baojiedama/ROPE/validation/image/mixed-images/bbox/ADE/ADE_val_00000285.png
PROMPT: Given the classes: plant, window, glass, windshield, vase, mirror, tree, ceiling, cabinet, rock, person, bag, chair, door, light, food, arm, base, bottle, brand, grass, box, pole, license plate, curtain, plate, mountain, table, head, building, balcony, shelf, pillow, column, shutter, flowerpot, leg, apron, sign, picture, cushion, flower, drawer, wheel, roof, book, price tag, car, rim, handle. There are five red bounding boxes in this image. For each object within the red bounding boxes, identify its class from the list. Provide the class names in the format: 'obj1: <class1>, obj2: <class2>, obj3: <class3>, obj4: <class4>, obj5: <class5>', with no additional words or punctuation. For example: obj1: class, obj2: class, obj3: class, obj4: class, obj5: class. Replace class with the actual names of the class

ValueError: Cannot use chat template functions because tokenizer.chat_template is not set and no template argument was passed! For information about writing templates and setting the tokenizer.chat_template attribute, please see the documentation at https://huggingface.co/docs/transformers/main/en/chat_templating

In [None]:
# ========== 加载数据 ==========
import json
with open(DATA_JSON, "r") as f:
    dataset = json.load(f)

acc_list = [0] * 5
photo2answer = {}

In [None]:
# ========== 主评测循环 ==========
for entry in dataset:
    evaluator.process_entry(entry, acc_list, photo2answer)

# ========== 评测指标输出 ==========
metrics = evaluator.calculate_metrics(acc_list)
import pprint
pprint.pprint(metrics)

# ========== 保存结果 ==========
with open("moh_llava_results.json", "w") as f:
    json.dump(metrics, f, indent=2)
with open("moh_llava_pred.json", "w") as f:
    json.dump(photo2answer, f, indent=2)