In [11]:
#!pip install pymongo
#!pip install haystack-ai
#!pip install mongodb-atlas-haystack
#!pip install sentence-transformers


In [1]:
import pymongo
import os
from haystack import Pipeline, Document
from haystack.document_stores.types import DuplicatePolicy
from haystack.components.writers import DocumentWriter
from haystack.components.generators import OpenAIGenerator
from haystack.components.builders.prompt_builder import PromptBuilder
from haystack.components.embedders import SentenceTransformersDocumentEmbedder, SentenceTransformersTextEmbedder
from haystack_integrations.document_stores.mongodb_atlas import MongoDBAtlasDocumentStore
from haystack_integrations.components.retrievers.mongodb_atlas import MongoDBAtlasEmbeddingRetriever
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
from pymongo import MongoClient
from getpass import getpass
from haystack.components.generators import OpenAIGenerator


In [2]:
os.environ['MONGO_CONNECTION_STRING'] = 'connstringhere'
os.environ['OPENAI_API_KEY'] = 'apikeyhere'

The below is code that only runs locally in notebook . I disconnected this from mongo as there's a lot of custom logic needed to retrieve non embe

In [25]:

#Create some example documents
doc_embedder = SentenceTransformersDocumentEmbedder(model="intfloat/e5-base-v2")

documents = [
    Document(content="there is is a dog named beau."),
    Document(content="there is a dog named steve."),
    Document(content="there is a cat named greg."),
]


#document_store = MongoDBAtlasDocumentStore(
#    database_name="contentDeliveryApi",
#    collection_name="Article",
#    vector_search_index="bridex",
#)

document_store = InMemoryDocumentStore()
doc_embedder.warm_up()
docs_with_embeddings = doc_embedder.run(documents)
document_store.write_documents(docs_with_embeddings["documents"])

# Define some more components
doc_writer = DocumentWriter(document_store=document_store, policy=DuplicatePolicy.SKIP)
#retriever = MongoDBAtlasEmbeddingRetriever(document_store=document_store)
retriever = InMemoryEmbeddingRetriever(document_store)
query_embedder = SentenceTransformersTextEmbedder(model="intfloat/e5-base-v2")
generator = OpenAIGenerator(model="gpt-4o-mini")

# Pipeline that ingests document for retrieval
indexing_pipe = Pipeline()
indexing_pipe.add_component(instance=doc_embedder, name="doc_embedder")
indexing_pipe.add_component(instance=doc_writer, name="doc_writer")

indexing_pipe.connect("doc_embedder.documents", "doc_writer.documents")
indexing_pipe.run({"doc_embedder": {"documents": documents}})

# Build a RAG pipeline with a Retriever to get documents relevant to 
# the query, a PromptBuilder to create a custom prompt and the OpenAIGenerator (LLM)
prompt_template = """
Given these documents, answer the question.\nDocuments:
{% for doc in documents %}
    {{ doc.content }}
{% endfor %}

\nQuestion: {{question}}
\nAnswer:
"""
rag_pipeline = Pipeline()
rag_pipeline.add_component(instance=query_embedder, name="query_embedder")
rag_pipeline.add_component("retriever", retriever)
rag_pipeline.add_component(instance=PromptBuilder(template=prompt_template), name="prompt_builder")
rag_pipeline.add_component(instance=OpenAIGenerator(), name="llm")
rag_pipeline.connect("query_embedder", "retriever.query_embedding")
rag_pipeline.connect("retriever", "prompt_builder.documents")
rag_pipeline.connect("prompt_builder", "llm")

# Ask a question on the data you just added.
question = "do you see any documents about a dog"
result = rag_pipeline.run(
    {
        "query_embedder": {"text": question},
        "prompt_builder": {"question": question},
    }
)
print(result)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

ID 'b84e57c682405d25454f95b77a440a431c9044da37b05b3134eae9445d404d48' already exists
ID '7b0f2e58545f7c4c00689434aa17d6734a05fcd3b104efc617687e0c328938a5' already exists
ID '3fdd6a6fe41ec4db7e98ff90b05a6e0f56f624a67b275e18ce0549221d18d4fe' already exists


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

{'llm': {'replies': ['Yes, there are documents about dogs. There are mentions of a dog named Beau and a dog named Steve.'], 'meta': [{'model': 'gpt-4o-mini-2024-07-18', 'index': 0, 'finish_reason': 'stop', 'usage': {'completion_tokens': 22, 'prompt_tokens': 57, 'total_tokens': 79, 'completion_tokens_details': CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0), 'prompt_tokens_details': PromptTokensDetails(audio_tokens=0, cached_tokens=0)}}]}}
