In [None]:
from pathlib import Path
import sys
sys.path.append("..")

from unimemrag.main import (
    RagPipelineConfig,
    create_retriever,
    load_viquae_dataset,
    index_text_corpus,
    index_image_corpus,
    build_rag_bundle,
    evaluate_bundle_with_model,
)


cfg = RagPipelineConfig(
    collection_name="mmlongbench_vrag",
    dataset_file="benchmark/MMLongBench/mmlb_data/NIAH/counting-image_test_K8_dep3.jsonl",
    image_root="benchmark/MMLongBench/mmlb_image",
    top_k=5,
)

In [None]:
import os
os.environ.pop("http_proxy", None)
os.environ.pop("https_proxy", None)
os.environ["NO_PROXY"] = "localhost,127.0.0.1"

retriever, embed_model, store = create_retriever(cfg)

In [None]:
dataset = load_viquae_dataset(cfg.dataset_file, max_samples=10)
index_text_corpus(dataset, embedder=embed_model, store=store, batch_size=cfg.text_batch_size)
index_image_corpus(dataset, embedder=embed_model, store=store, image_root=Path(cfg.image_root))

In [10]:
dataset[0]

{'id': 'mm-niah-0',
 'question': 'Please help me collect the number of this owl: \n<image>\n in each image in the above document, for example: [x, x, x...]. The summation is not required, and the numbers in [x, x, x...] represent the counted number of the given owl in each image. Only output the results in JSON format without any explanation.',
 'answer': [1],
 'positive_ctxs': [{'type': 'image',
   'text': '<image>',
   'image': 'obelics_paste/count-image/c962d22ba9594882b0d38cfbee6c836f7c046c68baa7eebf41fe1c2a90d1921b_1_owl.jpg',
   'len': 488,
   'has_ans': True,
   'nid': 0}],
 'ctxs': [{'type': 'text',
   'text': 'Tesa Green soaks in the satisfaction of a 100-mile victory at the finish line. Photo: Kristi Mayo/Mile 90 Photography',
   'len': 37,
   'has_ans': None,
   'nid': None},
  {'type': 'text',
   'text': 'Morris won his second Hawk Hundred at 2:16 a.m., followed by a trickle of 24 other runners throughout the night and into the next day. Kopp, who led for much of the race, 

In [None]:
bundle = build_rag_bundle(dataset, retriever=retriever, image_root=Path(cfg.image_root), top_k=cfg.top_k)

In [None]:
len(bundle)

In [9]:
bundle["data"][0]

{'id': 'mm-niah-0',
 'context': 'Document (Title: None): Tesa Green soaks in the satisfaction of a 100-mile victory at the finish line. Photo: Kristi Mayo/Mile 90 Photography',
 'question': 'Please help me collect the number of this owl: \n<image>\n in each image in the above document, for example: [x, x, x...]. The summation is not required, and the numbers in [x, x, x...] represent the counted number of the given owl in each image. Only output the results in JSON format without any explanation.',
 'answer': [1],
 'image_list': [],
 'retrieved_doc_ids': ['None'],
 'retrieved_scores': [0.4223017015230683],
 'retrieved_titles': ['None'],
 'original_image': None,
 'original_question': None}

In [None]:
from argparse import Namespace
from MMLongBench.vlm_model import load_LLM


args = Namespace(
    model_name_or_path="../../ckpts/Qwen2.5-VL-7B-Instruct",
    temperature=0.7, top_p=0.9,
    input_max_length=32768,
    generation_max_length=2048,
    generation_min_length=0,
    do_sample=False, stop_newline=False, use_chat_template=False,
    no_torch_compile=False, no_bf16=False, load_in_8bit=False,
    rope_theta=None, use_yarn=False, offload_state_dict=False,
    do_prefill=False, attn_implementation=None,
    image_resize=None, max_image_num=None, max_image_size=None,
    api_sleep=None, image_detail="auto",
)

model = load_LLM(args)

In [None]:
report = evaluate_bundle_with_model(model, bundle, num_workers=16, max_examples=20)

In [None]:
report["results"][7]

In [None]:
dataset[6]["ctxs"]