In [None]:
import os
os.environ.pop("http_proxy", None)
os.environ.pop("https_proxy", None)
os.environ["NO_PROXY"] = "localhost,127.0.0.1"

import sys
sys.path.append("..")

from config import Config
from unimemrag.retriever import ClipEmbedding
from unimemrag.memory_forest.memory_forest import MemoryForestStore

cfg = Config(collection="memtree")                          
embed_model = ClipEmbedding(model_name="../../ckpts/clip-vit-base-patch32")
memforest_store = MemoryForestStore(cfg, vector_size=embed_model.dim)

In [None]:
import os
os.environ.pop("http_proxy", None)
os.environ.pop("https_proxy", None)
os.environ["NO_PROXY"] = "localhost,127.0.0.1"
import json
from pathlib import Path
from itertools import islice
from tqdm.auto import tqdm
# import sys
# sys.path.append("..")
from unimemrag.memory_forest.memory_forest import build_tree, iter_wiki_dict
from unimemrag.utils.image_cache import download_images_for_kb, load_image_cache, replace_payload_image_urls, save_image_cache

KB_PATH = Path("../../benchmark/infoseek/wiki_text/wiki_100_dict_v4.json")

with KB_PATH.open("r", encoding="utf-8") as fh:
    KB = json.load(fh)

IMAGE_CACHE_DIR = Path('../../benchmark/infoseek/wiki_text/images_100k')
IMAGE_CACHE_INDEX = IMAGE_CACHE_DIR / "image_cache_index.json"
image_cache = load_image_cache(IMAGE_CACHE_INDEX)

if not image_cache:
    image_cache = download_images_for_kb(KB, IMAGE_CACHE_DIR, max_workers=64, resume=True)
    save_image_cache(image_cache, IMAGE_CACHE_INDEX)
else:
    print(f"Loaded {len(image_cache)} cached entries from {IMAGE_CACHE_INDEX}")

def localize_payload(payload):
    return replace_payload_image_urls(dict(payload), image_cache)


total = len(KB)
iterator = islice(iter_wiki_dict(KB), total)
iterator = tqdm(iterator, total=total, desc="Building Trees")

trees = []
for wiki_url, payload in iterator:
    payload = localize_payload(payload)
    tree = build_tree(wiki_url, payload)
    trees.append(tree)

In [None]:
memforest_store.ingest_trees(
      trees,
      embed_model,
      batch_size=256,
      text_workers=16,
      image_workers=16,
      alpha=0.1,
      show_progress=True
)

In [1]:
"""
Loading tree index from memforest store
"""

import sys
sys.path.append('..')

import os
os.environ.pop("http_proxy", None)
os.environ.pop("https_proxy", None)
os.environ["NO_PROXY"] = "localhost,127.0.0.1"

from config import Config
from unimemrag.embedding.models.ClipEmbedding import ClipEmbedding
from unimemrag.memory_forest import MemoryForestStore

cfg = Config(collection="memtree")
embed_model = ClipEmbedding()
memforest_store = MemoryForestStore(cfg, vector_size=embed_model.dim)

  from .autonotebook import tqdm as notebook_tqdm
Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
  self.client = QdrantClient(


In [2]:
results = memforest_store.retrieve(
    embed_model,
    query_text="who invent this object?",
    query_image="../atm-machine.jpg",
    root_top_k=3,
    event_top_k=3,
    leaf_top_k=3,
    alpha=0,
)

In [3]:
results[0].leaves

{'f7c6f00d-afa9-5019-80df-50972a913a06': [RetrievalHit(id='ac861bd1-4481-5d10-885a-4f2c0ffb220e', score=0.65269524, payload={'modality': 'text', 'node_type': 'leaf', 'tree_id': 'https://en.wikipedia.org/wiki/Automated_teller_machine', 'parent_id': 'f7c6f00d-afa9-5019-80df-50972a913a06', 'content': 'ATMs were originally developed as cash dispensers, and have evolved to provide many other bank-related functions: Paying routine bills, fees, and taxes (utilities, phone bills, social security, legal fees, income taxes, etc.) Printing or ordering bank statements Updating passbooks Cash advances Cheque Processing Module Paying (in full or partially) the credit balance on a card linked to a specific current account. Transferring money between linked accounts (such as transferring between accounts) Deposit currency recognition, acceptance, and recycling In some countries, especially those which benefit from a fully integrated cross-bank network (e.g.: Multibanco in Portugal), ATMs include many 

In [4]:
for tree in results:
    print("=== Root:", tree.root.payload.get("topic"))
    for event in tree.events:
        sec_id = event.id
        meta = event.payload.get("metadata", {})
        title = meta.get("section_title") or event.payload.get("summary")
        section_chunks = [
            leaf.payload["content"]
            for leaf in tree.leaves.get(sec_id, [])
            if "content" in leaf.payload
        ]
        section_text = "\n".join(section_chunks)
        print(f"\nSection: {title}")
        print("Images:", meta.get("section_images", []))
        print("Text:", section_text)

=== Root: Automated teller machine

Section: Uses
Images: ['../../benchmark/infoseek/wiki_text/images_100k/https_en_wikipedia_org_wiki_Automated_teller_machine/0018_d206a7de64b8.jpg', '../../benchmark/infoseek/wiki_text/images_100k/https_en_wikipedia_org_wiki_Automated_teller_machine/0019_fbe752e8b774.jpg', '../../benchmark/infoseek/wiki_text/images_100k/https_en_wikipedia_org_wiki_Automated_teller_machine/0020_22218a09a72d.jpg', '../../benchmark/infoseek/wiki_text/images_100k/https_en_wikipedia_org_wiki_Automated_teller_machine/0021_626ff9e96c26.png']
Text: ATMs were originally developed as cash dispensers, and have evolved to provide many other bank-related functions: Paying routine bills, fees, and taxes (utilities, phone bills, social security, legal fees, income taxes, etc.) Printing or ordering bank statements Updating passbooks Cash advances Cheque Processing Module Paying (in full or partially) the credit balance on a card linked to a specific current account. Transferring mone

In [None]:
# memforest_store.clear_collection()

In [None]:
memforest_store.client.count(memforest_store.roof_collection, exact=True)
# memforest_store.client.count(memforest_store.leaf_collection, exact=True)

In [None]:
for res in results:
    print(res.root.payload["topic"])
    print(res.events)

In [3]:
from unimemrag.vlm.QwenVL import QwenVL
vlm = QwenVL(
    model_path="../../ckpts/Qwen2.5-VL-7B-Instruct",
    torch_dtype="auto",
    device_map="auto",
    attn_implementation="flash_attention_2",
)

`torch_dtype` is deprecated! Use `dtype` instead!
You are attempting to use Flash Attention 2 without specifying a torch dtype. This might lead to unexpected behaviour
Loading checkpoint shards: 100%|██████████| 5/5 [00:06<00:00,  1.39s/it]
The image processor of type `Qwen2VLImageProcessor` is now loaded as a fast processor by default, even if the model checkpoint was saved with a slow processor. This is a breaking change and may produce slightly different outputs. To continue using the slow processor, instantiate this class with `use_fast=False`. Note that this behavior will be extended to all models in a future release.


In [4]:
result = results[0]

def format_context(tree_result, top_sections=3, max_chars=1024):
    root = tree_result.root
    meta = root.payload.get("metadata", {}) or {}
    lines = [
        f"Topic: {root.payload.get('topic') or meta.get('source_url', 'n/a')}",
        f"Tree ID: {tree_result.tree_id}",
        f"Alignment score: {meta.get('alignment_best_score', 'n/a')}",
        "",
    ]
    for event in tree_result.events[:top_sections]:
        emeta = event.payload.get("metadata", {}) or {}
        title = emeta.get("section_title") or event.payload.get("summary") or "Unknown section"
        lines.append(f"Section: {title}")
        section_preview = (emeta.get("section_preview") or event.payload.get("summary") or "").strip()
        if section_preview:
            lines.append(section_preview)
        leaf_hits = tree_result.leaves.get(event.id, [])
        for idx, leaf_hit in enumerate(leaf_hits[:2], start=1):
            snippet = (leaf_hit.payload.get("content") or "").strip()
            if not snippet:
                continue
            lines.append(f"Paragraph {idx}: {snippet}")
        lines.append("")
    context = "\n".join(lines).strip()
    if max_chars and max_chars > 0 and len(context) > max_chars:
        truncated = context[:max_chars]
        if "\n" in truncated:
            truncated = truncated.rsplit("\n", 1)[0]
        context = truncated
    return context

context = format_context(result, top_sections=3)
print("formated_context:", context)
question="Who built this object that has not cash dispensing features?",

messages = [
    {"role": "system", "content": [
        {"type": "text", "text": "You are a helpful science expert. Answer the question based on the context provided."}
    ]},
    {"role": "user", "content": [
        {"type": "image", "image": "../atm-machine.jpg"},
        {"type": "text",  "text": f"Here's the contexts:\n{context}\n\nNow, answet the question:\n{question}"}
    ]}
]
print(messages)

answer = vlm.chat(messages, max_new_tokens=32768, temperature=0.7)
print(answer)

formated_context: Topic: Automated teller machine
Tree ID: https://en.wikipedia.org/wiki/Automated_teller_machine
Alignment score: 0.32619166374206543

Section: Uses
ATMs were originally developed as cash dispensers, and have evolved to provide many other bank-related functions: 
Paying routine bills, fees, and taxes (utilities, phone bills, social security, legal fees, income taxes, etc.)
Printing or ordering bank statements
Updating passbooks
Cash advances
Cheque Processing Module
Paying (in full or partially) the credit balance on a card linked to a specific current account.
Transferring money between linked accounts (such as transferring between accounts)
Deposit cu
[{'role': 'system', 'content': [{'type': 'text', 'text': 'You are a helpful science expert. Answer the question based on the context provided.'}]}, {'role': 'user', 'content': [{'type': 'image', 'image': '../atm-machine.jpg'}, {'type': 'text', 'text': "Here's the contexts:\nTopic: Automated teller machine\nTree ID: http

In [None]:
import json

with open("../../benchmark/infoseek/wiki_text/wiki_100_dict_v4.json", "r", encoding="utf-8") as f:
    data = json.load(f)

In [None]:
for i, (k, v) in enumerate(data.items()):
    print(f"[{i}] {k} -> {list(v.keys())}")
    print(f"[{i}] {k} -> topic: {v.get('title', [])} image_urls: {v.get('image_urls', [])}")
    if i >= 5:
        break

In [None]:
import os
os.environ.pop("http_proxy", None)
os.environ.pop("https_proxy", None)
os.environ["NO_PROXY"] = "localhost,127.0.0.1"

import sys
sys.path.append('..')

from unimemrag.embedding.models.ClipEmbedding import ClipEmbedding
import numpy as np

clip = ClipEmbedding()

In [None]:
import os
os.environ.pop("http_proxy", None)
os.environ.pop("https_proxy", None)
os.environ["NO_PROXY"] = "localhost,127.0.0.1"

import sys
sys.path.append("..")
from unimemrag.retriever import ClipEmbedding

embed_model = ClipEmbedding(model_name="../../ckpts/clip-vit-base-patch32")

topic = "Reflecting telescope"
image_urls = [
    "https://upload.wikimedia.org/wikipedia/commons/c/c1/ParasolMushroom.JPG",
    "https://upload.wikimedia.org/wikipedia/commons/8/8c/Parasol-1.jpg",
    "https://upload.wikimedia.org/wikipedia/commons/5/54/Glawlen_y_Bwgan_%28Macrolepiota_procera%29.jpg",
    "https://upload.wikimedia.org/wikipedia/commons/a/a4/Macrolepiota-procera.jpg",
    "https://upload.wikimedia.org/wikipedia/commons/d/d3/Macrolepiota_procera_fungus%2C_Woodfidley%2C_New_Forest_-_geograph.org.uk_-_261237.jpg",
    "https://upload.wikimedia.org/wikipedia/commons/4/48/Macrolepiota_procera_2011_G1.jpg",
    "https://upload.wikimedia.org/wikipedia/commons/c/cf/Parasol-Macrolepiota-procera.jpg",
    "https://upload.wikimedia.org/wikipedia/commons/6/6a/Macrolepiota_procera_2013_G1.jpg",
    "http://upload.wikimedia.org/wikipedia/commons/a/ab/Parasol_mushroom.jpg",
    "https://upload.wikimedia.org/wikipedia/commons/d/da/Breaded_parasol_mushroom.jpg",
    "https://upload.wikimedia.org/wikipedia/commons/1/1b/Edible_fungi_in_basket_2019_G2.jpg",
    "https://upload.wikimedia.org/wikipedia/commons/f/f3/Unopened_parasol_mushroom.jpg"
    ]


text_embed = embed_model.embed_texts([topic])[0]      # (dim,)
image_embeds = embed_model.embed_images(image_urls)   # (N, dim)

scores = image_embeds @ text_embed             # 余弦相似度
for url, score in zip(image_urls, scores):
    print(f"{url} -> {score:.4f}")