In [1]:
import os
import json
import datasets
from tqdm import tqdm
from memorag import Model
from functools import partial
from functools import partial
from transformers.utils import logging
from torch.utils.data import DataLoader
from longbench_utils import DATASET2CATEGORY, scorer, DATASET2PROMPT, DATASET2MAXNEWTOKENS, makedirs, FileLogger, DefaultDataCollator

logger = logging.get_logger(__name__)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from hg_rag import BAAIEmbeddingModel, SBertEmbeddingModel, QwenQAModel, QwenSummarizationModel
sum_model=QwenSummarizationModel(model_name='/home/rt/data/model/Qwen/Qwen2.5-7B-Instruct'), 
qa_model=QwenQAModel(model_name='/home/rt/data/model/Qwen/Qwen2.5-7B-Instruct'), 
# embedding_model=BAAIEmbeddingModel(model_path='/home/rt/data/model/BAAI/bge-m3')
emb_model=SBertEmbeddingModel(model_name='/home/rt/data/model/sentence-transformers/multi-qa-mpnet-base-cos-v1')

Loading checkpoint shards: 100%|██████████| 4/4 [00:00<00:00,  6.25it/s]
Device set to use cuda:1
Loading checkpoint shards: 100%|██████████| 4/4 [00:00<00:00,  6.26it/s]
Device set to use cuda
2025-01-02 20:08:56,294 - Use pytorch device_name: cuda
2025-01-02 20:08:56,295 - Load pretrained SentenceTransformer: /home/rt/data/model/sentence-transformers/multi-qa-mpnet-base-cos-v1


In [3]:
def process_longbench(data, indices, tokenizer, max_length=3500, truncate_from_middle=True):
    outputs = {'context': [], 'question': [], "dataset": [], "index": [], "length": []}

    for input, context, dataset, index in zip(data['input'], data['context'], data['dataset'], indices):
        if dataset.endswith("_e"):
            dataset = dataset[:-2]

        if dataset in ['narrativeqa', 'qasper', 'multifieldqa_en', 'hotpotqa', '2wikimqa', 'musique', 'qmsum']:
            question = input
        elif dataset == "gov_report":
            question = ""
        elif dataset == "multi_news":
            question = ""
        else:
            continue
        
        if max_length is not None:
            if truncate_from_middle:
                try:
                    tokenized_context = tokenizer.encode(context, add_special_tokens=False)
                except:
                    tokenized_context = tokenizer.encode(context)
                if len(tokenized_context) > max_length:
                    half = int(max_length / 2)
                    context = tokenizer.decode(tokenized_context[:half]) + tokenizer.decode(tokenized_context[-half:])
            else:
                tokenized_context = tokenizer.encode(context)
                context = tokenizer.decode(tokenized_context[-max_length:])

        length = len(tokenizer.encode(context))

        outputs["context"].append(context)
        outputs["question"].append(question)
        outputs["dataset"].append(dataset)
        outputs["index"].append(index)
        outputs["length"].append(length)

    return outputs

In [4]:
output_dir = "./results/longbench/"

dataset_names = ['hotpotqa',] # ['narrativeqa', 'qasper', 'hotpotqa'], ['narrativeqa', 'qasper', 'multifieldqa_en', 'hotpotqa', '2wikimqa', 'musique'] 
# raw_dataset = datasets.load_dataset("json", data_files=f'/home/rt/data/MemoRAG/THUDM/LongBench/data/{dataset_names[0]}.jsonl', split="train")
raw_dataset = datasets.load_dataset("json", data_files='../dataset/TommyChien/MemoRAG-data/longbench.json', split="train")

In [6]:
max_length = 100000
truncate_from_middle = True

process_fn = partial(
            process_longbench, 
            tokenizer=qa_model[0].tokenizer,
            max_length=max_length,
            truncate_from_middle=truncate_from_middle
        )

dataset = raw_dataset.map(process_fn, batched=True, num_proc=32, with_indices=True, remove_columns=raw_dataset.column_names)
groupby_dataset = dataset.to_pandas().groupby("dataset")

In [8]:
metrics = {}
result_dir = ''
result_dir = os.path.join(output_dir, result_dir)

for i, dataset_name in enumerate(dataset_names):
    logger.info(f"Evaluating {dataset_name} ({i + 1} / {len(dataset_names)})...")

    result_path = os.path.join(result_dir, f"{dataset_name}.json")
    
    dataset = datasets.Dataset.from_pandas(groupby_dataset.get_group(dataset_name), preserve_index=False)

    data_collator = DefaultDataCollator(padding_side="left")
    dataloader = DataLoader(
        dataset, 
        batch_size=1, 
        collate_fn=data_collator,
        # only pin memory when no gpu
    )

    indices = []
    preds = []
    memory_results = []
    _prompt = DATASET2PROMPT[dataset_name]
    task_max_new_token=DATASET2MAXNEWTOKENS[dataset_name]
    
    for i, x in enumerate(tqdm(dataloader, desc="Generating")):
        x.pop("dataset")
        index = x.pop("index")[0]

        # # generate output
        # prompt = _prompt.format(context=x["context"][0], input=x["question"][0])

        from hg_rag import RetrievalAugmentation, RetrievalAugmentationConfig
        RAC = RetrievalAugmentationConfig(
            summarization_model=sum_model[0], 
            qa_model=qa_model[0], 
            # embedding_model=BAAIEmbeddingModel(model_path='/home/rt/data/model/BAAI/bge-m3')
            embedding_model=emb_model,
            tb_summarization_length=50,
        )
        RA = RetrievalAugmentation(config=RAC)
        RA.add_documents(x["context"][0]) # persist_path='./db3'
        output = RA.answer_question(question=x["question"][0], prompt_template=_prompt, gen_max_tokens=task_max_new_token)
        # output = gen_model.generate(prompts=prompt, max_new_tokens=task_max_new_token, do_sample=True)

        print(output)
        output = [output]

        index = index.tolist()
        preds.extend(output)
        if isinstance(index, list):
            indices.extend(index)
        else:
            # single process
            indices.append(index)

        raw_dataset_subset = raw_dataset[indices]
        answers = raw_dataset_subset["answers"]
        lengths = raw_dataset_subset["length"]
        all_classes = []
        score = scorer(dataset_name, preds, answers, all_classes)        
        
        logger.info(f"{dataset_name}: {score}")
        metrics[dataset_name] = score

        with open(makedirs(result_path), "w", encoding="utf-8") as f:
            f.write(json.dumps(score, ensure_ascii=False) + "\n")
            for index, pred in zip(indices, preds):
                sample = raw_dataset[index]
                del sample["context"]
                sample["pred"] = pred
                f.write(json.dumps(sample, ensure_ascii=False) + "\n")

2025-01-02 20:15:59,522 - Evaluating hotpotqa (1 / 1)...
Generating:   0%|          | 0/200 [00:00<?, ?it/s]2025-01-02 20:15:59,630 - Successfully initialized TreeBuilder with Config 
        TreeBuilderConfig:
            Tokenizer: <Encoding 'cl100k_base'>
            Max Tokens: 100
            Num Layers: 5
            Threshold: 0.5
            Top K: 5
            Selection Mode: top_k
            Summarization Length: 50
            Summarization Model: <hg_rag.All_Models.QwenSummarizationModel object at 0x7f1c1489a590>
            Embedding Models: {'EMB': <hg_rag.All_Models.SBertEmbeddingModel object at 0x7f1c14898820>}
            Cluster Embedding Model: EMB
        
        Reduction Dimension: 10
        Clustering Algorithm: RAPTOR_Clustering
        Clustering Parameters: {}
        
2025-01-02 20:15:59,632 - Successfully initialized ClusterTreeBuilder with Config 
        TreeBuilderConfig:
            Tokenizer: <Encoding 'cl100k_base'>
            Max Tokens: 100
    

collapsed_tree
{'node_index': 3, 'layer_number': 0, 'parent_index': 155}
{'node_index': 28, 'layer_number': 0, 'parent_index': 144}
{'node_index': 26, 'layer_number': 0, 'parent_index': 149}
{'node_index': 14, 'layer_number': 0, 'parent_index': 150}
{'node_index': 0, 'layer_number': 0, 'parent_index': 141}
{'node_index': 2, 'layer_number': 0, 'parent_index': 155}
{'node_index': 21, 'layer_number': 0, 'parent_index': 159}
{'node_index': 13, 'layer_number': 0, 'parent_index': 159}
{'node_index': 29, 'layer_number': 0, 'parent_index': 150}
{'node_index': 9, 'layer_number': 0, 'parent_index': 141}
{'node_index': 8, 'layer_number': 0, 'parent_index': 158}
{'node_index': 1, 'layer_number': 0, 'parent_index': 141}
{'node_index': 12, 'layer_number': 0, 'parent_index': 145}
{'node_index': 4, 'layer_number': 0, 'parent_index': 156}
{'node_index': 19, 'layer_number': 0, 'parent_index': 151}
{'node_index': 11, 'layer_number': 0, 'parent_index': 145}
{'node_index': 10, 'layer_number': 0, 'parent_in

2025-01-02 20:16:57,379 - hotpotqa: 3.85
Generating:   0%|          | 1/200 [00:57<3:11:32, 57.75s/it]2025-01-02 20:16:57,382 - Successfully initialized TreeBuilder with Config 
        TreeBuilderConfig:
            Tokenizer: <Encoding 'cl100k_base'>
            Max Tokens: 100
            Num Layers: 5
            Threshold: 0.5
            Top K: 5
            Selection Mode: top_k
            Summarization Length: 50
            Summarization Model: <hg_rag.All_Models.QwenSummarizationModel object at 0x7f1c1489a590>
            Embedding Models: {'EMB': <hg_rag.All_Models.SBertEmbeddingModel object at 0x7f1c14898820>}
            Cluster Embedding Model: EMB
        
        Reduction Dimension: 10
        Clustering Algorithm: RAPTOR_Clustering
        Clustering Parameters: {}
        
2025-01-02 20:16:57,382 - Successfully initialized ClusterTreeBuilder with Config 
        TreeBuilderConfig:
            Tokenizer: <Encoding 'cl100k_base'>
            Max Tokens: 100
          

To determine which case was brought to court first, we need to examine the details provided:

1. **Miller v. California (1973)**: This case involved the appeal of Larry Flynt's magazine, "Penthouse," which was convicted under California's obscenity laws. Miller's appeal reached the Supreme Court in 1972, where the Court issued its landmark decision in 1973. The case is noted for establishing a new standard for determining obscenity under the First Amendment.

2. **Gates v. Collier (1972)**: This case involves a challenge to conditions in prison, specifically the treatment of prisoners' mail and the censorship practices of prison officials. The case was heard by the Supreme Court in 1972, resulting in a decision in 1973.

Based on the information provided, **Miller v. California** was brought to the Supreme Court first, with oral arguments heard in January 1972 and a decision issued in June 1973. **Gates v. Collier** was also heard by the Supreme Court in 1972 but did not come before th

2025-01-02 20:17:02,931 - Created 193 Leaf Embeddings
2025-01-02 20:17:02,932 - Building All Nodes
2025-01-02 20:17:02,939 - Using Cluster TreeBuilder
2025-01-02 20:17:02,940 - Constructing Layer 0
2025-01-02 20:17:18,932 - Summarization Length: 50
2025-01-02 20:17:19,996 - Node Texts Length: 558, Summarized Text Length: 674
2025-01-02 20:17:21,081 - Node Texts Length: 623, Summarized Text Length: 738
2025-01-02 20:17:22,135 - Node Texts Length: 340, Summarized Text Length: 458
2025-01-02 20:17:23,194 - Node Texts Length: 440, Summarized Text Length: 555
2025-01-02 20:17:24,252 - Node Texts Length: 430, Summarized Text Length: 542
2025-01-02 20:17:25,297 - Node Texts Length: 226, Summarized Text Length: 342
2025-01-02 20:17:26,352 - Node Texts Length: 411, Summarized Text Length: 527
2025-01-02 20:17:27,389 - Node Texts Length: 278, Summarized Text Length: 395
2025-01-02 20:17:28,458 - Node Texts Length: 511, Summarized Text Length: 625
2025-01-02 20:17:29,510 - Node Texts Length: 357,

collapsed_tree
{'node_index': 25, 'layer_number': 0, 'parent_index': 214}
{'node_index': 22, 'layer_number': 0, 'parent_index': 215}
{'node_index': 3, 'layer_number': 0, 'parent_index': 226}
{'node_index': 15, 'layer_number': 0, 'parent_index': 225}
{'node_index': 19, 'layer_number': 0, 'parent_index': 226}
{'node_index': 17, 'layer_number': 0, 'parent_index': 225}
{'node_index': 24, 'layer_number': 0, 'parent_index': 218}
{'node_index': 0, 'layer_number': 0, 'parent_index': 215}
{'node_index': 2, 'layer_number': 0, 'parent_index': 225}
{'node_index': 20, 'layer_number': 0, 'parent_index': 218}
{'node_index': 1, 'layer_number': 0, 'parent_index': 225}
{'node_index': 8, 'layer_number': 0, 'parent_index': 224}
{'node_index': 4, 'layer_number': 0, 'parent_index': 226}
{'node_index': 18, 'layer_number': 0, 'parent_index': 210}
{'node_index': 5, 'layer_number': 0, 'parent_index': 224}
{'node_index': 6, 'layer_number': 0, 'parent_index': 224}
{'node_index': 16, 'layer_number': 0, 'parent_ind

2025-01-02 20:18:19,698 - hotpotqa: 1.92
Generating:   1%|          | 2/200 [02:20<3:58:16, 72.20s/it]2025-01-02 20:18:19,702 - Successfully initialized TreeBuilder with Config 
        TreeBuilderConfig:
            Tokenizer: <Encoding 'cl100k_base'>
            Max Tokens: 100
            Num Layers: 5
            Threshold: 0.5
            Top K: 5
            Selection Mode: top_k
            Summarization Length: 50
            Summarization Model: <hg_rag.All_Models.QwenSummarizationModel object at 0x7f1c1489a590>
            Embedding Models: {'EMB': <hg_rag.All_Models.SBertEmbeddingModel object at 0x7f1c14898820>}
            Cluster Embedding Model: EMB
        
        Reduction Dimension: 10
        Clustering Algorithm: RAPTOR_Clustering
        Clustering Parameters: {}
        
2025-01-02 20:18:19,703 - Successfully initialized ClusterTreeBuilder with Config 
        TreeBuilderConfig:
            Tokenizer: <Encoding 'cl100k_base'>
            Max Tokens: 100
          

The actor who played Phileas Fogg in the 1956 film "Around the World in 80 Days" was Cantinflas. However, the information you're seeking pertains to the 1939 film adaptation of the same story. In the 1939 film, the actor who played Phileas Fogg was Michael Redgrave.

Michael Redgrave starred alongside Gary Cooper in "Around the World in 80 Days," which was based on the novel by Jules Verne. This film was produced by Goldwyn Productions and released in 1939.


2025-01-02 20:18:24,923 - Created 191 Leaf Embeddings
2025-01-02 20:18:24,924 - Building All Nodes
2025-01-02 20:18:24,931 - Using Cluster TreeBuilder
2025-01-02 20:18:24,931 - Constructing Layer 0
2025-01-02 20:18:39,357 - Summarization Length: 50
2025-01-02 20:18:40,408 - Node Texts Length: 388, Summarized Text Length: 504
2025-01-02 20:18:41,473 - Node Texts Length: 377, Summarized Text Length: 491
2025-01-02 20:18:42,553 - Node Texts Length: 460, Summarized Text Length: 574
2025-01-02 20:18:43,635 - Node Texts Length: 639, Summarized Text Length: 754
2025-01-02 20:18:44,712 - Node Texts Length: 558, Summarized Text Length: 673
2025-01-02 20:18:45,777 - Node Texts Length: 464, Summarized Text Length: 580
2025-01-02 20:18:46,854 - Node Texts Length: 569, Summarized Text Length: 686
2025-01-02 20:18:47,945 - Node Texts Length: 658, Summarized Text Length: 774
2025-01-02 20:18:49,022 - Node Texts Length: 644, Summarized Text Length: 760
2025-01-02 20:18:50,081 - Node Texts Length: 416,

ValueError: n_neighbors must be greater than 1