<a href="https://colab.research.google.com/github/DigitalResultsPros/30-Days-Of-React/blob/master/RAG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
! pip install haystack-ai transformers "huggingface_hub>=0.22.0" sentence-transformers accelerate bitsandbytes

Collecting haystack-ai
  Downloading haystack_ai-2.2.4-py3-none-any.whl (346 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/346.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━[0m [32m174.1/346.9 kB[0m [31m5.2 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m346.9/346.9 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
Collecting sentence-transformers
  Downloading sentence_transformers-3.0.1-py3-none-any.whl (227 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m227.1/227.1 kB[0m [31m9.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting accelerate
  Downloading accelerate-0.32.1-py3-none-any.whl (314 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m314.1/314.1 kB[0m [31m10.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting bitsandbytes
  Downloading bitsandbytes-0.43.1-py3-none-manylinux_2_24_x86_64.whl (119.8 MB)


In [None]:
import getpass, os


os.environ["HF_API_TOKEN"] = getpass.getpass("Your Hugging Face token")

Your Hugging Face token··········


In [None]:
from haystack import Pipeline, Document
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.components.embedders import SentenceTransformersTextEmbedder, SentenceTransformersDocumentEmbedder
from haystack.components.writers import DocumentWriter
from haystack.components.preprocessors import DocumentSplitter
from haystack.components.retrievers import InMemoryEmbeddingRetriever
from haystack.components.generators import HuggingFaceLocalGenerator
from haystack.components.builders import PromptBuilder
from haystack.components.joiners import DocumentJoiner
from haystack import component
from typing import List, Dict, Any

@component
class DocumentFormatter:
    @component.output_types(formatted_documents=str)
    def run(self, documents: List[Document]):
        formatted = "\n".join([doc.content for doc in documents])
        print(f"Formatted documents: {formatted}")  # Debug print
        return {"formatted_documents": formatted}

@component
class QueryAdder:
    @component.output_types(template_variables=Dict[str, Any])
    def run(self, formatted_documents: str, query: str):
        template_vars = {"documents": formatted_documents, "query": query}
        print(f"Template variables: {template_vars}")  # Debug print
        return {"template_variables": template_vars}

class AdvancedRAGSystem:
    def __init__(self, embedding_model="sentence-transformers/all-mpnet-base-v2", llm_model="google/flan-t5-small"):
        self.main_document_store = InMemoryDocumentStore()
        self.memory_document_store = InMemoryDocumentStore()
        self.lessons_document_store = InMemoryDocumentStore()

        self.embedding_model = embedding_model
        self.llm_model = llm_model

        self.indexing_pipeline = self._create_indexing_pipeline()
        self.query_pipeline = self._create_query_pipeline()

    def _create_indexing_pipeline(self):
        indexing_pipeline = Pipeline()
        indexing_pipeline.add_component("splitter", DocumentSplitter(split_by="word", split_length=200))
        indexing_pipeline.add_component(
            "embedder",
            SentenceTransformersDocumentEmbedder(model=self.embedding_model)
        )
        indexing_pipeline.add_component("writer", DocumentWriter(document_store=self.main_document_store))
        indexing_pipeline.connect("splitter", "embedder")
        indexing_pipeline.connect("embedder", "writer")
        return indexing_pipeline

    def _create_query_pipeline(self):
        query_pipeline = Pipeline()
        query_pipeline.add_component(
            "text_embedder",
            SentenceTransformersTextEmbedder(model=self.embedding_model)
        )
        query_pipeline.add_component("main_retriever", InMemoryEmbeddingRetriever(document_store=self.main_document_store, top_k=3))
        query_pipeline.add_component("memory_retriever", InMemoryEmbeddingRetriever(document_store=self.memory_document_store, top_k=2))
        query_pipeline.add_component("lessons_retriever", InMemoryEmbeddingRetriever(document_store=self.lessons_document_store, top_k=1))

        query_pipeline.add_component("joiner", DocumentJoiner(join_mode="concatenate"))
        query_pipeline.add_component("formatter", DocumentFormatter())
        query_pipeline.add_component("query_adder", QueryAdder())

        prompt_template = """
        You are an AI assistant tasked with answering questions based on the given context. Your goal is to provide accurate and helpful responses.

        Context:
        {documents}

        Human: {query}

        Assistant: Let's approach this step-by-step:

        1) First, I'll review the context provided.
        2) Then, I'll identify the relevant information to answer the question.
        3) Finally, I'll formulate a clear and concise answer.

        Based on the context and the question, here's my response:

        """
        query_pipeline.add_component("prompt_builder", PromptBuilder(template=prompt_template))

        query_pipeline.add_component("generator", HuggingFaceLocalGenerator(model=self.llm_model))

        query_pipeline.connect("text_embedder.embedding", "main_retriever.query_embedding")
        query_pipeline.connect("text_embedder.embedding", "memory_retriever.query_embedding")
        query_pipeline.connect("text_embedder.embedding", "lessons_retriever.query_embedding")
        query_pipeline.connect("main_retriever", "joiner")
        query_pipeline.connect("memory_retriever", "joiner")
        query_pipeline.connect("lessons_retriever", "joiner")
        query_pipeline.connect("joiner.documents", "formatter.documents")
        query_pipeline.connect("formatter.formatted_documents", "query_adder.formatted_documents")
        query_pipeline.connect("query_adder.template_variables", "prompt_builder.template_variables")
        query_pipeline.connect("prompt_builder", "generator")

        return query_pipeline

    def add_documents(self, documents):
        result = self.indexing_pipeline.run({"splitter": {"documents": documents}})
        print(f"Indexing result: {result}")  # Debug print
        print(f"Document store contents: {self.main_document_store.filter_documents()}")  # Debug print

    def add_lesson(self, lesson):
        lesson_doc = Document(content=lesson)
        self.lessons_document_store.write_documents([lesson_doc])

    def query(self, query):
        print(f"Documents in store before query: {self.main_document_store.filter_documents()}")  # Debug print

        # Debug: Check if documents have embeddings
        for doc in self.main_document_store.filter_documents():
            print(f"Document ID: {doc.id}, Has Embedding: {doc.embedding is not None}, Embedding size: {len(doc.embedding) if doc.embedding is not None else 'N/A'}")

        result = self.query_pipeline.run({
            "text_embedder": {"text": query},
            "query_adder": {"query": query}
        })

        # Debug: Print retrieved documents
        if "main_retriever" in result:
            print(f"Retrieved documents: {result['main_retriever']}")

        print(f"Query result: {result}")  # Debug print
        return result

    def save_state(self, filename):
        # Implement saving of document stores
        pass

    def load_state(self, filename):
        # Implement loading of document stores
        pass

# Usage example
if __name__ == "__main__":
    rag_system = AdvancedRAGSystem()

    # Add documents
    raw_docs = [Document(content="The capital of France is Paris.")]
    rag_system.add_documents(raw_docs)

    # Add a lesson
    rag_system.add_lesson("Always provide context when discussing geographical facts.")

    # Query the system
    result = rag_system.query("What is the capital of France?")
    if "generator" in result and result["generator"] and "replies" in result["generator"]:
        print(result["generator"]["replies"][0])
    else:
        print("No response generated.")

    # The memory is automatically updated with each query

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Indexing result: {'writer': {'documents_written': 1}}
Document store contents: [Document(id=48f2efc93c0a1ccc335639efc5a416440dce3ee857d851757d9d9dfd9a6f7faf, content: 'The capital of France is Paris.', meta: {'source_id': '4552697f972d0d5f3d116fcf78d85ce15fd43314f5312dd064b0d85317d4635b', 'page_number': 1}, embedding: vector of size 768)]
Documents in store before query: [Document(id=48f2efc93c0a1ccc335639efc5a416440dce3ee857d851757d9d9dfd9a6f7faf, content: 'The capital of France is Paris.', meta: {'source_id': '4552697f972d0d5f3d116fcf78d85ce15fd43314f5312dd064b0d85317d4635b', 'page_number': 1}, embedding: vector of size 768)]
Document ID: 48f2efc93c0a1ccc335639efc5a416440dce3ee857d851757d9d9dfd9a6f7faf, Has Embedding: True, Embedding size: 768


config.json:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/308M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]



Formatted documents: The capital of France is Paris.
Template variables: {'documents': 'The capital of France is Paris.', 'query': 'What is the capital of France?'}
Query result: {'generator': {'replies': ["I'll review the context."]}}
I'll review the context.


In [None]:
# Usage example
if __name__ == "__main__":

    rag_system.add_documents(raw_docs)
    rag_system = AdvancedRAGSystem()
    print(raw_docs)

    # Add documents
    raw_docs = [Document(content="The capital of France is Paris.")]
    rag_system.add_documents(raw_docs)
    rag_system = AdvancedRAGSystem()
    # Add a lesson
    rag_system.add_lesson("Always provide context when discussing geographical facts.")

    # Query the system
    result = rag_system.query("What is the capital of France?")
    if "generator" in result and result["generator"] and "replies" in result["generator"]:
        print(result["generator"]["replies"][0])
    else:
        print("No response generated.")

    # The memory is automatically updated with each query

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Indexing result: {'writer': {'documents_written': 1}}
Document store contents: [Document(id=48f2efc93c0a1ccc335639efc5a416440dce3ee857d851757d9d9dfd9a6f7faf, content: 'The capital of France is Paris.', meta: {'source_id': '4552697f972d0d5f3d116fcf78d85ce15fd43314f5312dd064b0d85317d4635b', 'page_number': 1}, embedding: vector of size 384)]
[Document(id=4552697f972d0d5f3d116fcf78d85ce15fd43314f5312dd064b0d85317d4635b, content: 'The capital of France is Paris.')]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Indexing result: {'writer': {'documents_written': 1}}
Document store contents: [Document(id=48f2efc93c0a1ccc335639efc5a416440dce3ee857d851757d9d9dfd9a6f7faf, content: 'The capital of France is Paris.', meta: {'source_id': '4552697f972d0d5f3d116fcf78d85ce15fd43314f5312dd064b0d85317d4635b', 'page_number': 1}, embedding: vector of size 384)]
Documents in store before query: []


Batches:   0%|          | 0/1 [00:00<?, ?it/s]



Formatted documents: 
Template variables: {'documents': '', 'query': 'What is the capital of France?'}
Query result: {'generator': {'replies': ['can be used to search for documents']}}
can be used to search for documents


In [None]:
import gc
import torch
from haystack import Pipeline, Document
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.components.embedders import SentenceTransformersTextEmbedder, SentenceTransformersDocumentEmbedder
from haystack.components.writers import DocumentWriter
from haystack.components.preprocessors import DocumentSplitter
from haystack.components.retrievers import InMemoryEmbeddingRetriever
from haystack.components.generators import HuggingFaceLocalGenerator
from haystack.components.builders import PromptBuilder
from haystack import component
from typing import List, Dict, Any

@component
class DocumentFormatter:
    @component.output_types(formatted_documents=str)
    def run(self, documents: List[Document]):
        formatted = "\n".join([f"- {doc.content}" for doc in documents])
        print(f"Formatted documents: {formatted}")  # Debug print
        return {"formatted_documents": formatted}

@component
class QueryAdder:
    @component.output_types(template_variables=Dict[str, Any])
    def run(self, formatted_documents: str, query: str):
        template_vars = {"documents": formatted_documents, "query": query}
        print(f"Template variables: {template_vars}")  # Debug print
        return {"template_variables": template_vars}

class AdvancedRAGSystem:
    def __init__(self, embedding_model="Snowflake/snowflake-arctic-embed-l", llm_model="meta-llama/Meta-Llama-3-8B-Instruct"):
        self.document_store = InMemoryDocumentStore()
        self.embedding_model = embedding_model
        self.llm_model = llm_model

        self.indexing_pipeline = self._create_indexing_pipeline()
        self.query_pipeline = self._create_query_pipeline()

    def _create_indexing_pipeline(self):
        indexing_pipeline = Pipeline()
        indexing_pipeline.add_component("splitter", DocumentSplitter(split_by="word", split_length=100))
        indexing_pipeline.add_component(
            "embedder",
            SentenceTransformersDocumentEmbedder(model=self.embedding_model)
        )
        indexing_pipeline.add_component("writer", DocumentWriter(document_store=self.document_store))
        indexing_pipeline.connect("splitter", "embedder")
        indexing_pipeline.connect("embedder", "writer")
        return indexing_pipeline

    def _create_query_pipeline(self):
        query_pipeline = Pipeline()
        query_pipeline.add_component(
            "text_embedder",
            SentenceTransformersTextEmbedder(model=self.embedding_model)
        )
        query_pipeline.add_component("retriever", InMemoryEmbeddingRetriever(document_store=self.document_store, top_k=3))
        query_pipeline.add_component("formatter", DocumentFormatter())
        query_pipeline.add_component("query_adder", QueryAdder())

        prompt_template = """
        Answer the question based on the given context. If you can't answer the question based on the context, say "I don't know".

        Context:
        {documents}

        Question: {query}

        Answer:
        """
        query_pipeline.add_component("prompt_builder", PromptBuilder(template=prompt_template))

        query_pipeline.add_component("generator", HuggingFaceLocalGenerator(model=self.llm_model))

        query_pipeline.connect("text_embedder.embedding", "retriever.query_embedding")
        query_pipeline.connect("retriever", "formatter")
        query_pipeline.connect("formatter.formatted_documents", "query_adder.formatted_documents")
        query_pipeline.connect("query_adder.template_variables", "prompt_builder.template_variables")
        query_pipeline.connect("prompt_builder", "generator")

        return query_pipeline

    def add_documents(self, documents):
        result = self.indexing_pipeline.run({"splitter": {"documents": documents}})
        print(f"Indexing result: {result}")  # Debug print
        print(f"Document store contents: {self.document_store.filter_documents()}")  # Debug print
        gc.collect()
        torch.cuda.empty_cache()

    def query(self, query):
        print(f"Documents in store before query: {self.document_store.filter_documents()}")  # Debug print
        result = self.query_pipeline.run({
            "text_embedder": {"text": query},
            "query_adder": {"query": query}
        })
        print(f"Query result: {result}")  # Debug print
        gc.collect()
        torch.cuda.empty_cache()
        return result

    def clear_memory(self):
        all_docs = self.document_store.filter_documents()
        if all_docs:
            doc_ids = [doc.id for doc in all_docs]
            self.document_store.delete_documents(doc_ids)
        gc.collect()
        torch.cuda.empty_cache()

# Usage example
if __name__ == "__main__":
    rag_system = AdvancedRAGSystem()

    # Add documents
    raw_docs = [Document(content="The capital of France is Paris.")]
    rag_system.add_documents(raw_docs)

    # Query the system
    result = rag_system.query("What is the capital of France?")
    if "generator" in result and result["generator"] and "replies" in result["generator"]:
        print(result["generator"]["replies"][0])
    else:
        print("No response generated.")

    # Clear memory after usage
    rag_system.clear_memory()

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Indexing result: {'writer': {'documents_written': 1}}
Document store contents: [Document(id=48f2efc93c0a1ccc335639efc5a416440dce3ee857d851757d9d9dfd9a6f7faf, content: 'The capital of France is Paris.', meta: {'source_id': '4552697f972d0d5f3d116fcf78d85ce15fd43314f5312dd064b0d85317d4635b', 'page_number': 1}, embedding: vector of size 384)]
Documents in store before query: [Document(id=48f2efc93c0a1ccc335639efc5a416440dce3ee857d851757d9d9dfd9a6f7faf, content: 'The capital of France is Paris.', meta: {'source_id': '4552697f972d0d5f3d116fcf78d85ce15fd43314f5312dd064b0d85317d4635b', 'page_number': 1}, embedding: vector of size 384)]


config.json:   0%|          | 0.00/654 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [None]:
import gc
import torch
from haystack import Pipeline, Document
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.components.embedders import SentenceTransformersTextEmbedder, SentenceTransformersDocumentEmbedder
from haystack.components.writers import DocumentWriter
from haystack.components.preprocessors import DocumentSplitter
from haystack.components.retrievers import InMemoryEmbeddingRetriever
from haystack.components.generators import HuggingFaceLocalGenerator
from haystack.components.builders import PromptBuilder
from haystack import component
from typing import List, Dict, Any

@component
class DocumentFormatter:
    @component.output_types(formatted_documents=str)
    def run(self, documents: List[Document]):
        formatted = "\n".join([f"- {doc.content}" for doc in documents])
        print(f"Formatted documents: {formatted}")  # Debug print
        return {"formatted_documents": formatted}

@component
class QueryAdder:
    @component.output_types(template_variables=Dict[str, Any])
    def run(self, formatted_documents: str, query: str):
        template_vars = {"documents": formatted_documents, "query": query}
        print(f"Template variables: {template_vars}")  # Debug print
        return {"template_variables": template_vars}

class AdvancedRAGSystem:
    def __init__(self, embedding_model="Snowflake/snowflake-arctic-embed-l", llm_model="meta-llama/Meta-Llama-3-8B-Instruct"):
        self.document_store = InMemoryDocumentStore()
        self.embedding_model = embedding_model
        self.llm_model = llm_model

        self.indexing_pipeline = self._create_indexing_pipeline()
        self.query_pipeline = self._create_query_pipeline()

    def _create_indexing_pipeline(self):
        indexing_pipeline = Pipeline()
        indexing_pipeline.add_component("splitter", DocumentSplitter(split_by="word", split_length=100))
        indexing_pipeline.add_component(
            "embedder",
            SentenceTransformersDocumentEmbedder(model=self.embedding_model)
        )
        indexing_pipeline.add_component("writer", DocumentWriter(document_store=self.document_store))
        indexing_pipeline.connect("splitter", "embedder")
        indexing_pipeline.connect("embedder", "writer")
        return indexing_pipeline

    def _create_query_pipeline(self):
        query_pipeline = Pipeline()
        query_pipeline.add_component(
            "text_embedder",
            SentenceTransformersTextEmbedder(model=self.embedding_model)
        )
        query_pipeline.add_component("retriever", InMemoryEmbeddingRetriever(document_store=self.document_store, top_k=3))
        query_pipeline.add_component("formatter", DocumentFormatter())
        query_pipeline.add_component("query_adder", QueryAdder())

        prompt_template = """
        Answer the question based on the given context. Be direct and use the information provided.

        Context:
        {documents}

        Question: {query}

        Answer: The answer to the question "{query}" based on the given context is
        """
        query_pipeline.add_component("prompt_builder", PromptBuilder(template=prompt_template))

        query_pipeline.add_component("generator", HuggingFaceLocalGenerator(
            model=self.llm_model,
            huggingface_pipeline_kwargs={
                "device_map": "auto",
                "model_kwargs": {
                    "load_in_4bit": True,
                    "bnb_4bit_use_double_quant": True,
                    "bnb_4bit_quant_type": "nf4",
                    "bnb_4bit_compute_dtype": torch.bfloat16
                }
            },
            generation_kwargs={"max_new_tokens": 500}
        ))

        query_pipeline.connect("text_embedder.embedding", "retriever.query_embedding")
        query_pipeline.connect("retriever", "formatter")
        query_pipeline.connect("formatter.formatted_documents", "query_adder.formatted_documents")
        query_pipeline.connect("query_adder.template_variables", "prompt_builder.template_variables")
        query_pipeline.connect("prompt_builder", "generator")

        return query_pipeline

    def add_documents(self, documents):
        result = self.indexing_pipeline.run({"splitter": {"documents": documents}})
        print(f"Indexing result: {result}")  # Debug print
        print(f"Document store contents: {self.document_store.filter_documents()}")  # Debug print
        gc.collect()
        torch.cuda.empty_cache()

    def query(self, query):
        print(f"Documents in store before query: {self.document_store.filter_documents()}")  # Debug print
        result = self.query_pipeline.run({
            "text_embedder": {"text": query},
            "query_adder": {"query": query}
        })
        print(f"Query result: {result}")  # Debug print
        gc.collect()
        torch.cuda.empty_cache()
        return result

    def clear_memory(self):
        all_docs = self.document_store.filter_documents()
        if all_docs:
            doc_ids = [doc.id for doc in all_docs]
            self.document_store.delete_documents(doc_ids)
        gc.collect()
        torch.cuda.empty_cache()

# Usage example
if __name__ == "__main__":
    rag_system = AdvancedRAGSystem()

    # Warm up the generator
    rag_system.query_pipeline.get_component("generator").warm_up()

    # Add documents
    raw_docs = [Document(content="The capital of France is Paris.")]
    rag_system.add_documents(raw_docs)

    # Query the system
    result = rag_system.query("What is the capital of France?")
    if "generator" in result and result["generator"] and "replies" in result["generator"]:
        print(result["generator"]["replies"][0])
    else:
        print("No response generated.")

    # Clear memory after usage
    rag_system.clear_memory()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/654 [00:00<?, ?B/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/187 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/51.0k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/73.0 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/252 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/84.0k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/107 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/704 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.34G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.38k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/712k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/695 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/297 [00:00<?, ?B/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Indexing result: {'writer': {'documents_written': 1}}
Document store contents: [Document(id=48f2efc93c0a1ccc335639efc5a416440dce3ee857d851757d9d9dfd9a6f7faf, content: 'The capital of France is Paris.', meta: {'source_id': '4552697f972d0d5f3d116fcf78d85ce15fd43314f5312dd064b0d85317d4635b', 'page_number': 1}, embedding: vector of size 1024)]
Documents in store before query: [Document(id=48f2efc93c0a1ccc335639efc5a416440dce3ee857d851757d9d9dfd9a6f7faf, content: 'The capital of France is Paris.', meta: {'source_id': '4552697f972d0d5f3d116fcf78d85ce15fd43314f5312dd064b0d85317d4635b', 'page_number': 1}, embedding: vector of size 1024)]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Formatted documents: - The capital of France is Paris.
Template variables: {'documents': '- The capital of France is Paris.', 'query': 'What is the capital of France?'}
Query result: {'generator': {'replies': [' {answer}.\n    """\n\n    # Initialize the answer as an empty string\n    answer = ""\n\n    # Check if the query is in the documents\n    if query in documents:\n        # If the query is found, set the answer to the query itself\n        answer = query\n\n    # Check if the query is in the answer\n    elif query in answer:\n        # If the query is found, set the answer to the query itself\n        answer = query\n\n    # If the query is not found, return a message indicating that the query is not found\n    else:\n        answer = "The query \'{query}\' is not found in the given context.".format(query=query)\n\n    # Return the answer\n    return answer\n\n\n# Test the function\ndocuments = ["This is a sample document.", "Another sample document."]\nquery = "sample"\nprint(

In [None]:
if __name__ == "__main__":
    rag_system = AdvancedRAGSystem()



    # Add documents
    raw_docs = [Document(content="The capital of France is Paris.")]
    rag_system.add_documents(raw_docs)

    # Query the system
    result = rag_system.query("What is the capital of France?")
    if "generator" in result and result["generator"] and "replies" in result["generator"]:
        print(result["generator"]["replies"][0])
    else:
        print("No response generated.")

    # Clear memory after usage
    rag_system.clear_memory()

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Indexing result: {'writer': {'documents_written': 1}}
Document store contents: [Document(id=48f2efc93c0a1ccc335639efc5a416440dce3ee857d851757d9d9dfd9a6f7faf, content: 'The capital of France is Paris.', meta: {'source_id': '4552697f972d0d5f3d116fcf78d85ce15fd43314f5312dd064b0d85317d4635b', 'page_number': 1}, embedding: vector of size 1024)]


The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


Documents in store before query: [Document(id=48f2efc93c0a1ccc335639efc5a416440dce3ee857d851757d9d9dfd9a6f7faf, content: 'The capital of France is Paris.', meta: {'source_id': '4552697f972d0d5f3d116fcf78d85ce15fd43314f5312dd064b0d85317d4635b', 'page_number': 1}, embedding: vector of size 1024)]


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Formatted documents: - The capital of France is Paris.
Template variables: {'documents': '- The capital of France is Paris.', 'query': 'What is the capital of France?'}
Query result: {'generator': {'replies': [' """\n\n        # Create a dictionary to store the answers\n        answers = {}\n\n        # Loop through the documents\n        for document in documents:\n            # Extract the title and text from the document\n            title = document["title"]\n            text = document["text"]\n\n            # Split the text into sentences\n            sentences = text.split(". ")\n\n            # Loop through the sentences\n            for sentence in sentences:\n                # Check if the query is in the sentence\n                if query in sentence:\n                    # If the query is in the sentence, extract the answer\n                    answer = sentence.split(":")[1].strip()\n                    # Store the answer in the dictionary\n                    answers[titl