# RAPTOR: Recursive Abstractive Processing for Tree-Organized Retrieval

In [1]:
# NOTE: An OpenAI API key must be set here for application initialization, even if not in use.
# If you're not utilizing OpenAI models, assign a placeholder string (e.g., "not_used").
import os
#os.environ["OPENAI_API_KEY"] = "your-openai-key"

1) **Building**: RAPTOR recursively embeds, clusters, and summarizes chunks of text to construct a tree with varying levels of summarization from the bottom up. You can create a tree from the text in 'sample.txt' using `RA.add_documents(text)`.

2) **Querying**: At inference time, the RAPTOR model retrieves information from this tree, integrating data across lengthy documents at different abstraction levels. You can perform queries on the tree with `RA.answer_question`.

### Building the tree

In [2]:
from raptor import RetrievalAugmentation 

  from .autonotebook import tqdm as notebook_tqdm
2024-07-05 16:24:18,050 - Loading faiss with AVX2 support.
2024-07-05 16:24:18,631 - Successfully loaded faiss with AVX2 support.


## Using other Open Source Models for Summarization/QA/Embeddings

If you want to use other models such as Llama or Mistral, you can very easily define your own models and use them with RAPTOR. 

In [3]:
import torch
from raptor import BaseSummarizationModel, BaseQAModel, BaseEmbeddingModel, RetrievalAugmentationConfig
from transformers import AutoTokenizer, pipeline

In [4]:
from huggingface_hub import login
add_to_git_credential=True
login("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")

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to C:\Users\karth\.cache\huggingface\token
Login successful


In [5]:
import requests

class SummarizationModel(BaseSummarizationModel):
    def __init__(self, url="http://a0221.nhr.fau.de:5000/v1/chat/completions"):
        super().__init__()  # Initialize from BaseSummarizationModel if needed
        self.url = url
        self.headers = {
            "Content-Type": "application/json"
        }
        self.history = []

    def summarize(self, context, max_tokens=150):
        # Clear history for each new summarization request
        self.history = []

        # Construct the user message for summarization
        user_message = f"{context}"
        self.history.append({"role": "user", "content": f"Write a summary of the following, including as many key details as possible: {context}:"})

        # Prepare the data payload
        data = {
            "mode": "instruct",
            "temperature": 0.7,
            "messages": self.history
        }

        # Make the POST request to the specified URL
        try:
            response = requests.post(self.url, headers=self.headers, json=data, verify=False)

            # Check if the response is successful
            if response.status_code == 200:
                print(response.json())
                assistant_message = response.json()['choices'][0]['message']['content']
                print(assistant_message)
                return assistant_message.strip()
            else:
                return f"Error: {response.status_code} {response.text}"
        except requests.exceptions.RequestException as e:
            return f"Request error: {e}"


In [6]:
import requests

class QAModel(BaseQAModel):
    def __init__(self, url="http://a0221.nhr.fau.de:5000/v1/chat/completions"):
        super().__init__()  # Initialize from BaseSummarizationModel if needed
        self.url = url
        self.headers = {
            "Content-Type": "application/json"
        }
        self.history = []

    def answer_question(self, context, question):
        # Clear history for each new summarization request
        self.history = []

        
        
        self.history.append({"role": "user", "content": f"Given Context: {context} Give the best full answer amongst the option to question {question}"})

        # Prepare the data payload
        data = {
            "mode": "instruct",
            "temperature": 0.7,
            "messages": self.history
        }

        # Make the POST request to the specified URL
        try:
            response = requests.post(self.url, headers=self.headers, json=data, verify=False)

            # Check if the response is successful
            if response.status_code == 200:
                print(response.json())
                assistant_message = response.json()['choices'][0]['message']['content']
                print(assistant_message)
                return assistant_message.strip()
            else:
                return f"Error: {response.status_code} {response.text}"
        except requests.exceptions.RequestException as e:
            return f"Request error: {e}"


In [7]:
from sentence_transformers import SentenceTransformer
class SBertEmbeddingModel(BaseEmbeddingModel):
    def __init__(self, model_name="sentence-transformers/multi-qa-mpnet-base-cos-v1"):
        self.model = SentenceTransformer(model_name)

    def create_embedding(self, text):
        return self.model.encode(text,show_progress_bar=False)


In [8]:
RAC = RetrievalAugmentationConfig(summarization_model=SummarizationModel(), qa_model=QAModel(), embedding_model=SBertEmbeddingModel())

2024-07-05 16:24:56,490 - Load pretrained SentenceTransformer: sentence-transformers/multi-qa-mpnet-base-cos-v1
2024-07-05 16:24:57,857 - Use pytorch device: cpu


In [9]:
RA = RetrievalAugmentation(config=RAC)

2024-07-05 16:25:03,463 - Successfully initialized TreeBuilder with Config 
        TreeBuilderConfig:
            Tokenizer: <Encoding 'cl100k_base'>
            Max Tokens: 100
            Num Layers: 5
            Threshold: 0.5
            Top K: 5
            Selection Mode: top_k
            Summarization Length: 100
            Summarization Model: <__main__.GEMMASummarizationModel object at 0x00000217C71581C0>
            Embedding Models: {'EMB': <__main__.SBertEmbeddingModel object at 0x00000217C7158970>}
            Cluster Embedding Model: EMB
        
        Reduction Dimension: 10
        Clustering Algorithm: RAPTOR_Clustering
        Clustering Parameters: {}
        
2024-07-05 16:25:03,465 - Successfully initialized ClusterTreeBuilder with Config 
        TreeBuilderConfig:
            Tokenizer: <Encoding 'cl100k_base'>
            Max Tokens: 100
            Num Layers: 5
            Threshold: 0.5
            Top K: 5
            Selection Mode: top_k
            

In [10]:
with open('demo/tech_txt.txt', 'r', encoding='utf-8') as file:
    text = file.read()
print(text[:100])    
RA.add_documents(text)

SAVE_PATH = "demo/tech_txt_tree_structure"
RA.save(SAVE_PATH)


, Module name 1750: Advanced Processes Basics I (Advanced Processes)5 ECTS
Course / lectures: Vorle


2024-07-05 16:26:05,493 - Creating Leaf Nodes


In [None]:
question = "Difference between Masters in computation Engineering and Masters in Artificial Intelligence in terms of credits?"

answer = RA.answer_question(question=question)

print("Answer: ", answer)

In [None]:
tree = RA.tree
tree.root_nodes
def print_tree_layers(root_nodes):
    """
    Iterates over the tree from the root nodes and prints node index and text layer by layer.

    Args:
      root_nodes: A dictionary mapping node index to Node objects.
    """
        
    all_nodes = tree.all_nodes
    current_layer = list(root_nodes.values())  # Convert root_nodes to a list for iteration
    level = 0
    while current_layer:
        print(f"================= Level {level} ================= ")
        next_layer = []
        for node in current_layer:
            print(f"Index: {node.index}, Text: {node.text}\n")
            next_layer.extend(all_nodes.get(child_index) for child_index in node.children)
        
        current_layer = next_layer
        level += 1

print_tree_layers(tree.root_nodes)
