[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mongodb-developer/ai-rag-lab-notebooks/blob/main/notebook_template.ipynb)


[![Lab Documentation and Solutions](https://img.shields.io/badge/Lab%20Documentation%20and%20Solutions-purple)](https://mongodb-developer.github.io/ai-rag-lab/)


# Step 1: Install libraries


In [1]:
! pip install -qU pymongo datasets langchain fireworks-ai tiktoken sentence_transformers tqdm

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.4/50.4 kB[0m [31m980.7 kB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m21.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m471.6/471.6 kB[0m [31m15.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m19.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m95.1/95.1 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m30.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m255.2/255.2 kB[0m [31m13.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

# Step 2: Setup prerequisites

Replace:

- `<MONGODB_URI>` with your **MongoDB connection string**
- `<FIREWORKS_API_KEY>` with your **Fireworks API key**


In [2]:
import os
from pymongo import MongoClient

In [3]:
# Retain the quotes ("") when pasting the URI
MONGODB_URI = ""
# Initialize a MongoDB Python client
mongodb_client = MongoClient(MONGODB_URI, appname="devrel.workshop.rag")
# Check the connection to the server
mongodb_client.admin.command("ping")

{'ok': 1}

In [4]:
# Retain the quotes ("") when pasting the API key
os.environ["FIREWORKS_API_KEY"] = ""

# Step 3: Load the dataset


In [85]:
import pandas as pd
from datasets import load_dataset
import json

In [88]:
data = pd.read_csv("/content/scraped_articles.csv")
docs = json.loads(data.to_json(orient='records'))

In [89]:
# Check the number of documents in the dataset
len(docs)

8

In [90]:
# Preview a document
# docs[0]
docs
# data = data[data['Scraped Content' != ""]]

[{'Category': 'Packaging Beans',
  'Scraped Content': 'Share:\nTwitter\nFacebook\nPocket\nLinkedIn\nSensorial degradation in green coffee can represent significant financial losses for producers, traders, and roasters. Yet while a certain degree of degradation over time may be inevitable, the material of the coffee packaging can have a significant impact on the shelf life and quality of the beans.\nJute, high-barrier, vacuum: which is really best? How much of an impact does the material actually have on the beans? And how can we measure this? Let’s take a look.\nLee este artículo en español\nMaterial Del Empaque: Cómo Afecta al Café Verde Con el Tiempo\nAn empty, used jute bag with moisture stains. Credit: Ivan Petrich\nReviewing Green Coffee Packaging Materials\nSome of the most common materials are burlap, permeable plastic, high-barrier plastic, and vacuum.\nBurlap\nA canvas made from natural fibres extracted from plants, jute or burlap is the most traditional material used for manu

# Step 4: Chunk up the data


In [91]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from typing import Dict, List

In [92]:
# Separators to split on
separators = ["\n\n", "\n", " ", "", "#", "##", "###"]

📚 https://python.langchain.com/v0.1/docs/modules/data_connection/document_transformers/split_by_token/#tiktoken


In [93]:
# Use the `RecursiveCharacterTextSplitter` text splitter with the `cl100k_base` encoding
# For text data, you typically want to keep 1-2 paragraphs (~200 tokens) in a single chunk
# Chunk overlap of 15-20% of the chunk size is recommended
# Pass the `separators` list above as an argument called `separators`
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    encoding_name="cl100k_base", separators=separators, chunk_size=200, chunk_overlap=30
)

📚 https://api.python.langchain.com/en/latest/character/langchain_text_splitters.character.RecursiveCharacterTextSplitter.html

In [94]:
def get_chunks(doc: Dict, text_field: str) -> List[Dict]:
    """
    Chunk up a document.

    Args:
        doc (Dict): Parent document to generate chunks from.
        text_field (str): Text field to chunk.

    Returns:
        List[Dict]: List of chunked documents.
    """
    # Extract the field to chunk from `doc`
    text = doc[text_field]
    # Split `text` using the appropriate method of the `RecursiveCharacterTextSplitter` class
    # NOTE: `text` is a string
    chunks = text_splitter.split_text(text)

    # Iterate through `chunks` and for each chunk:
    # 1. Create a shallow copy of `doc`, call it `temp`
    # 2. Set the `text_field` field in `temp` to the content of the chunk
    # 3. Append `temp` to `chunked_data`
    chunked_data = []
    for chunk in chunks:
       temp = doc.copy()
       temp[text_field]=chunk
       chunked_data.append(temp)

    return chunked_data

In [95]:
split_docs = []
type(docs)

list

In [96]:
# Iterate through `docs`, use the `get_chunks` function to chunk up the documents based on the "body" field, and add the list of chunked documents to `split_docs` initialized above.
split_docs = []
for doc in docs:
    # print(doc[1]["Scraped Content"])
    chunks = get_chunks(doc, "Scraped Content")
    split_docs.extend(chunks)

In [98]:
print(docs)
type(docs)

[{'Category': 'Packaging Beans', 'Scraped Content': 'Share:\nTwitter\nFacebook\nPocket\nLinkedIn\nSensorial degradation in green coffee can represent significant financial losses for producers, traders, and roasters. Yet while a certain degree of degradation over time may be inevitable, the material of the coffee packaging can have a significant impact on the shelf life and quality of the beans.\nJute, high-barrier, vacuum: which is really best? How much of an impact does the material actually have on the beans? And how can we measure this? Let’s take a look.\nLee este artículo en español\nMaterial Del Empaque: Cómo Afecta al Café Verde Con el Tiempo\nAn empty, used jute bag with moisture stains. Credit: Ivan Petrich\nReviewing Green Coffee Packaging Materials\nSome of the most common materials are burlap, permeable plastic, high-barrier plastic, and vacuum.\nBurlap\nA canvas made from natural fibres extracted from plants, jute or burlap is the most traditional material used for manufa

list

In [99]:
# Check that the length of the list of chunked documents is greater than the length of `docs`
len(split_docs)

79

In [100]:
# Preview one of the items in split_docs- ensure that it is a Python dictionary
split_docs[0]

{'Category': 'Packaging Beans',
 'Scraped Content': 'Share:\nTwitter\nFacebook\nPocket\nLinkedIn\nSensorial degradation in green coffee can represent significant financial losses for producers, traders, and roasters. Yet while a certain degree of degradation over time may be inevitable, the material of the coffee packaging can have a significant impact on the shelf life and quality of the beans.\nJute, high-barrier, vacuum: which is really best? How much of an impact does the material actually have on the beans? And how can we measure this? Let’s take a look.\nLee este artículo en español\nMaterial Del Empaque: Cómo Afecta al Café Verde Con el Tiempo\nAn empty, used jute bag with moisture stains. Credit: Ivan Petrich\nReviewing Green Coffee Packaging Materials\nSome of the most common materials are burlap, permeable plastic, high-barrier plastic, and vacuum.\nBurlap'}

# Step 5: Generate embeddings


In [101]:
from sentence_transformers import SentenceTransformer
from tqdm import tqdm

In [102]:
# Load the `gte-small` model using the Sentence Transformers library
embedding_model = SentenceTransformer("thenlper/gte-small")

📚 https://huggingface.co/thenlper/gte-small

In [103]:
# Define a function that takes a piece of text (`text`) as input, embeds it using the `embedding_model` instantiated above and returns the embedding as a list
# An array can be converted to a list using the `tolist()` method
def get_embedding(text: str) -> List[float]:
    """
    Generate the embedding for a piece of text.

    Args:
        text (str): Text to embed.

    Returns:
        List[float]: Embedding of the text as a list.
    """
    embedding = embedding_model.encode(text)

    return embedding.tolist()

In [104]:
embedded_docs = []

In [105]:
# Add an `embedding` field to each dictionary in `split_docs`
# The `embedding` field should correspond to the embedding of the value of the `body` field
# Use the `get_embedding` function defined above to generate the embedding
# Append the updated dictionaries to `embedded_docs` initialized above.
for doc in tqdm(split_docs):
    doc["embedding"] = get_embedding(doc["Scraped Content"])

    embedded_docs.append(doc)

100%|██████████| 79/79 [00:18<00:00,  4.24it/s]


In [106]:
# Check that the length of `embedded_docs` is the same as that of `split_docs`
len(embedded_docs)

79

# Step 6: Ingest data into MongoDB


In [107]:
# Name of the database -- Change if needed or leave as is
DB_NAME = "mongodb_rag_lab"
# Name of the collection -- Change if needed or leave as is
COLLECTION_NAME = "knowledge_base"
# Name of the vector search index -- Change if needed or leave as is
ATLAS_VECTOR_SEARCH_INDEX_NAME = "vector_index"

📚 https://pymongo.readthedocs.io/en/stable/tutorial.html#getting-a-database

📚 https://pymongo.readthedocs.io/en/stable/tutorial.html#getting-a-collection

In [108]:
# Connect to the collection defined above using the `mongodb_client` defined in Step 2
collection = mongodb_client[DB_NAME][COLLECTION_NAME]

In [109]:
# Bulk delete all existing records from the collection defined above
collection.delete_many({})

DeleteResult({'n': 0, 'electionId': ObjectId('7fffffff00000000000000d7'), 'opTime': {'ts': Timestamp(1728923575, 20), 't': 215}, 'ok': 1.0, '$clusterTime': {'clusterTime': Timestamp(1728923575, 20), 'signature': {'hash': b's\x9c"\x9e\x0c\x10\xc3\x95\x12\x10T9~\xa9\xc2f\xea\xd2\xbe\x12', 'keyId': 7374051199001034755}}, 'operationTime': Timestamp(1728923575, 20)}, acknowledged=True)

📚 https://pymongo.readthedocs.io/en/stable/examples/bulk.html#bulk-insert


In [110]:
# Bulk insert `embedded_docs` into the collection defined above -- should be a one-liner
collection.insert_many(embedded_docs)

print("Data ingestion into MongoDB completed")

Data ingestion into MongoDB completed


# Step 7: Create a vector search index

Follow the instructions in the documentation to create a Vector Search index in the Atlas UI.


# Step 8: Perform semantic search on your data


### Define a vector search function

📚 https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-stage/#fields

📚 https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-stage/#ann-examples (Refer to the "Basic Example")


In [120]:
# Define a function to retrieve relevant documents for a user query using vector search
def vector_search(user_query: str) -> List[Dict]:
    """
    Retrieve relevant documents for a user query using vector search.

    Args:
    user_query (str): The user's query string.

    Returns:
    list: A list of matching documents.
    """

    # Generate embedding for the `user_query` using the `get_embedding` function defined in Step 5
    query_embedding = get_embedding(user_query)

    # Define an aggregation pipeline consisting of a $vectorSearch stage, followed by a $project stage
    # Set the number of candidates to 150 and only return the top 5 documents from the vector search
    # In the $project stage, exclude the `_id` field and include only the `body` field and `vectorSearchScore`
    # NOTE: Use variables defined previously for the `index`, `queryVector` and `path` fields in the $vectorSearch stage
    pipeline = [
      {
          "$vectorSearch": {
              "index": ATLAS_VECTOR_SEARCH_INDEX_NAME,
              "queryVector": query_embedding,
              "path": "embedding",
              "numCandidates": 150,
              "limit": 5,
          }
      },
      {
          "$project": {
              "_id": 0,
              "Scraped Content": 1,
              "score": {"$meta": "vectorSearchScore"}
          }
      }
  ]

    # Execute the aggregation `pipeline` and store the results in `results`
    results = collection.aggregate(pipeline)

    return list(results)

### Run vector search queries


In [121]:
vector_search("What is MCMC Regulations?")

[{'Scraped Content': 'Search\nSearch\nCommodities\nAn Ultimate Guide to Shipping Metals and Minerals\nAn Ultimate Guide to Shipping Generators and Power Systems\nAn Ultimate Guide to Shipping Electrical and Plumbing Equipment\nMost Popular\nAn Insight Into BMW Supply Chain Strategy\nBreaking Down McDonald’s Supply Chain Strategy: A Recipe for Success\nAn Insight into Unilever Supply Chain Strategy\nPrivacy Policy\nCopyright © 2021 Dfreight.org\nSafe Web\nContact Us\nOur customer support team is available for your assistance. You can give us a call or send us an email.\nInfo@Dfreight.org\n402, Emaar Square Bldg 4, Downtown, Dubai\nQuick Menu\nAbout\nContact Us\nCareers\nCopyright © 2021 Dfreight.org',
  'score': 0.9026746153831482},
 {'Scraped Content': 'NATIONAL POLICY OBJECTIVES\nThe Commission has been entrusted to promote the following policy objectives for the postal services industry:\nTo safeguard the provision of affordable and quality universal service;\nTo promote the growth o

In [124]:
vector_search("What are National Policy of MCMC on Postal")

[{'Scraped Content': 'NATIONAL POLICY OBJECTIVES\nThe Commission has been entrusted to promote the following policy objectives for the postal services industry:\nTo safeguard the provision of affordable and quality universal service;\nTo promote the growth of a competitive and innovative postal services industry;\nTo establish Malaysia as a major global centre and hub for postal services;\nTo regulate for the long-term benefit of the user;\nTo promote a\xa0high level of consumer confidence in service delivery by the postal services industry;\nTo ensure the security of postal service industry workers and the security of postal articles and the postal network;\nTo respond to the technical, economic and social environment and needs of consumers; and\nTo ensure fair competition amongst the parties involved in the postal services industry in Malaysia.\nNATIONAL POSTAL STRATEGY\nEffective postal and courier services which meet commercial and social needs of the country are important to both 

# 🦹‍♀️ Combine pre-filtering with vector search

📚 https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-type/#about-the-filter-type

📚 https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-stage/#ann-examples (Refer to the "Filter Example")


### Filter for documents where the content type is `Video`

Modify the vector search index definition (from Step 7) in the Atlas UI to include the `metadata.contentType` field as a `filter` field

In [125]:
# Embed the user query
query_embedding = get_embedding("What is MongoDB Atlas Search?")

In [None]:
# Modify the $vectorSearch stage of the aggregation pipeline defined previously to include a filter for documents where the `metadata.contentType` field has the value "Video"
pipeline = [
    {
        "$vectorSearch": {
            "index": "vector_index",
            "path": "embedding",
            "queryVector": query_embedding,
            "numCandidates": 150,
            "limit": 5,
            "filter": {"metadata.contentType": "Video"}
        }
    },
    {
        "$project": {
            "_id": 0,
            "body": 1,
            "score": {"$meta": "vectorSearchScore"}
        }
    }
]

In [None]:
# Execute the aggregation pipeline and view the results
results = collection.aggregate(pipeline)
list(results)

[{'body': "# The Atlas Search 'cene: Season 1\n\n# The Atlas Search 'cene: Season 1\n\nWelcome to the first season of a video series dedicated to Atlas Search!  This series of videos is designed to guide you through the journey from getting started and understanding the concepts, to advanced techniques.\n\n## What is Atlas Search?\n\n[Atlas Search][1] is an embedded full-text search in MongoDB Atlas that gives you a seamless, scalable experience for building relevance-based app features. Built on Apache Lucene, Atlas Search eliminates the need to run a separate search system alongside your database.\n\nBy integrating the database, search engine, and sync mechanism into a single, unified, and fully managed platform, Atlas Search is the fastest and easiest way to build relevance-based search capabilities directly into applications.",
  'score': 0.9609129428863525},
 {'body': '[1]: https://www.mongodb.com/atlas/search\n  [2]: https://www.mongodb.com/developer/videos/what-is-atlas-search-q

### Filter on documents which have been updated on or after `2024-05-19` and where the content type is `Tutorial`

Modify the index definition (from Step 7) in the Atlas UI to include the `metadata.contentType` and `updated` fields as `filter` fields

In [None]:
# Embed the user query
query_embedding = get_embedding("What is MongoDB Atlas Search?")

In [None]:
# Modify the $vectorSearch stage of the aggregation pipeline defined previously to include a filter for documents where
# the `metadata.contentType` field has the value "Tutorial"
# AND
# the `updated` field is greater than or equal to "2024-05-19"
pipeline = [
    {
        "$vectorSearch": {
            "index": "vector_index",
            "path": "embedding",
            "queryVector": query_embedding,
            "numCandidates": 150,
            "limit": 5,
            "filter": {
                "metadata.contentType": "Tutorial",
                "updated": {"$gte": "2024-05-19"}
            }
        }
    },
    {
        "$project": {
            "_id": 0,
            "body": 1,
            "score": {"$meta": "vectorSearchScore"}
        }
    }
]

In [None]:
# Execute the aggregation pipeline and view the results
results = collection.aggregate(pipeline)
list(results)

[{'body': '# How to Model Your Documents for Vector Search\n\nAtlas Vector Search was recently released, so let’s dive into a tutorial on how to properly model your documents when utilizing vector search to revolutionize your querying capabilities!\n\n## Data modeling normally in MongoDB\n\nVector search is new, so let’s first go over the basic ways of modeling your data in a MongoDB document before continuing on into how to incorporate vector embeddings. \n\nData modeling in MongoDB revolves around organizing your data into documents within various collections. Varied projects or organizations will require different ways of structuring data models due to the fact that successful data modeling depends on the specific requirements of each application, and for the most part, no one document design can be applied for every situation. There are some commonalities, though, that can guide the user. These are:',
  'score': 0.952588677406311},
 {'body': '# How to Deploy MongoDB Atlas with AWS 

# Step 9: Build the RAG application


### Instantiate a chat model


In [126]:
from fireworks.client import Fireworks

In [127]:
# Initializing the Fireworks AI client and the model string
fw_client = Fireworks()
model = "accounts/fireworks/models/llama-v3-8b-instruct"

### Define a function to create the chat prompt

In [131]:
# Define a function to create the user prompt for our RAG application
def create_prompt(user_query: str) -> str:
    """
    Create a chat prompt that includes the user query and retrieved context.

    Args:
        user_query (str): The user's query string.

    Returns:
        str: The chat prompt string.
    """
    # Retrieve the most relevant documents for the `user_query` using the `vector_search` function defined in Step 8
    context = vector_search(user_query)

    # Join the retrieved documents into a single string, where each document is separated by two new lines ("\n\n")
    context = "\n\n".join([doc.get('Scraped Content') for doc in context])
    # Prompt consisting of the question and relevant context to answer it
    prompt = f"Answer the question based only on the following context. If the context is empty, say I DON'T KNOW\n\nContext:\n{context}\n\nQuestion:{user_query}"
    return prompt

### Define a function to answer user queries

📚 https://docs.fireworks.ai/guides/querying-text-models#chat-completions-api

In [132]:
# Define a function to answer user queries using Fireworks' Chat Completion API
def generate_answer(user_query: str) -> None:
    """
    Generate an answer to the user query.

    Args:
        user_query (str): The user's query string.
    """
    # Use the `create_prompt` function above to create a chat prompt
    prompt = create_prompt(user_query)

    # Use the `prompt` created above to populate the `content` field in the chat message
    response = fw_client.chat.completions.create(
        model=model,
        messages=[{"role": "user", "content": prompt}]
    )
    # Print the final answer
    print(response.choices[0].message.content)

### Query the RAG application


In [133]:
generate_answer("Packaging coffee bean tips?")

According to the context, here are some packaging coffee bean tips:

* Use mailer boxes for convenience and protection of your coffee pouches and cans.
* Use sturdy exterior boxes to protect the beans and ensure they reach their destination in optimal condition.
* Store coffee beans in an airtight container to increase the shelf life of the beans.
* Store ground coffee in an unopened bag for 2-4 months, roasted whole coffee beans in a vacuum-sealed bag for 3-5 months, and whole coffee beans in an airtight container for 12 months.
* Protect coffee beans from moisture and light to prevent mould, off-flavours, and shorter shelf life.
* Use degassing valves to allow freshly roasted coffee beans to release carbon dioxide without compromising the package and bean quality.

These tips are aimed at enhancing the unboxing experience, elevating the brand's perception, and simplifying the shipping process and reducing the carbon footprint.


In [134]:
generate_answer("What did I just ask you?")

I DON'T KNOW


# 🦹‍♀️ Re-rank retrieved results


In [135]:
from sentence_transformers import CrossEncoder

In [136]:
rerank_model = CrossEncoder("mixedbread-ai/mxbai-rerank-xsmall-v1")

OSError: We couldn't connect to 'https://huggingface.co' to load this file, couldn't find it in the cached files and it looks like mixedbread-ai/mxbai-rerank-xsmall-v1 is not the path to a directory containing a file named config.json.
Checkout your internet connection or see how to run the library in offline mode at 'https://huggingface.co/docs/transformers/installation#offline-mode'.

📚 https://huggingface.co/mixedbread-ai/mxbai-rerank-xsmall-v1

In [None]:
# Add a re-ranking step to the following function
def create_prompt(user_query: str) -> str:
    """
    Create a chat prompt that includes the user query and retrieved context.

    Args:
        user_query (str): The user's query string.

    Returns:
        str: The chat prompt string.
    """
    # Retrieve the most relevant documents for the `user_query` using the `vector_search` function defined in Step 8
    context = vector_search(user_query)
    # Extract the "body" field from each document in `context`
    documents = [d.get("Scraped Content") for d in context]
    # Use the `rerank_model` instantiated above to re-rank `documents`
    # Set the `top_k` argument to 5
    reranked_documents = rerank_model.rank(
        user_query, documents, return_documents=True, top_k=5
    )
    # Join the re-ranked documents into a single string, where each document is separated by two new lines ("\n\n")
    context = "\n\n".join([d.get("text", "") for d in reranked_documents])
    # Prompt consisting of the question and relevant context to answer it
    prompt = f"Answer the question based only on the following context. If the context is empty, say I DON'T KNOW\n\nContext:\n{context}\n\nQuestion:{user_query}"
    return prompt

In [None]:
# Note the impact of re-ranking on the generated answer
generate_answer("What are triggers in MongoDB Atlas?")

According to the context, triggers in MongoDB Atlas are a feature that allows you to create a trigger that monitors all changes in a certain collection (in this case, the `test` collection) for `insert`, `update`, and `delete` operations.


# 🦹‍♀️ Return streaming responses

📚 https://docs.fireworks.ai/guides/querying-text-models#streaming


In [None]:
# Define a function to answer user queries in streaming mode using Fireworks' Chat Completion API
def generate_answer(user_query: str) -> None:
    """
    Generate an answer to the user query.

    Args:
        user_query (str): The user's query string.
    """
    # Use the `create_prompt` function defined in Step 9 to create a chat prompt
    prompt = create_prompt(user_query)

    # Use the `prompt` created above to populate the `content` field in the chat message
    # Set the `stream` parameter to True
    response = fw_client.chat.completions.create(
        model=model,
        messages=[{"role": "user", "content": prompt}],
        stream=True
    )

    # Iterate through the `response` generator and print the results as they are generated
    for chunk in response:
        if chunk.choices[0].delta.content:
            print(chunk.choices[0].delta.content, end="")

In [None]:
generate_answer("What is MongoDB Atlas Search?")

Atlas Search is an embedded full-text search in MongoDB Atlas that gives you a seamless, scalable experience for building relevance-based app features.

# Step 10: Add memory to the RAG application


In [137]:
from datetime import datetime

In [138]:
history_collection = mongodb_client[DB_NAME]["chat_history"]

📚 https://pymongo.readthedocs.io/en/stable/api/pymongo/collection.html#pymongo.collection.Collection.create_index


In [139]:
# Create an index on the key `session_id` for the `history_collection` collection
history_collection.create_index("session_id")

'session_id_1'

### Define a function to store chat messages in MongoDB

📚 https://pymongo.readthedocs.io/en/stable/api/pymongo/collection.html#pymongo.collection.Collection.insert_one

In [140]:
def store_chat_message(session_id: str, role: str, content: str) -> None:
    """
    Store a chat message in a MongoDB collection.

    Args:
        session_id (str): Session ID of the message.
        role (str): Role for the message. One of `system`, `user` or `assistant`.
        content (str): Content of the message.
    """
    # Create a message object with `session_id`, `role`, `content` and `timestamp` fields
    # `timestamp` should be set the current timestamp
    message = {
        "session_id": session_id,
        "role": role,
        "content": content,
        "timestamp": datetime.now(),
    }
    # Insert the `message` into the `history_collection` collection
    history_collection.insert_one(message)



### Define a function to retrieve chat history from MongoDB

📚 https://pymongo.readthedocs.io/en/stable/api/pymongo/collection.html#pymongo.collection.Collection.find

📚 https://pymongo.readthedocs.io/en/stable/api/pymongo/cursor.html#pymongo.cursor.Cursor.sort

In [141]:
def retrieve_session_history(session_id: str) -> List:
    """
    Retrieve chat message history for a particular session.

    Args:
        session_id (str): Session ID to retrieve chat message history for.

    Returns:
        List: List of chat messages.
    """
    # Query the `history_collection` collection for documents where the "session_id" field has the value of the input `session_id`
    # Sort the results in increasing order of the values in `timestamp` field
    cursor =  history_collection.find({"session_id": session_id}).sort("timestamp", 1)

    if cursor:
        # Iterate through the cursor and extract the `role` and `content` field from each entry
        # Then format each entry as: {"role": <role_value>, "content": <content_value>}
        messages = [{"role": msg["role"], "content": msg["content"]} for msg in cursor]
    else:
        # If cursor is empty, return an empty list
        messages = []

    return messages

### Handle chat history in the `generate_answer` function

📚 https://docs.fireworks.ai/guides/querying-text-models#chat-completions-api


In [142]:
def generate_answer(session_id: str, user_query: str) -> None:
    """
    Generate an answer to the user's query taking chat history into account.

    Args:
        session_id (str): Session ID to retrieve chat history for.
        user_query (str): The user's query string.
    """
    # Initialize list of messages to pass to the chat completion model
    messages = []

    # Retrieve documents relevant to the user query and convert them to a single string
    context = vector_search(user_query)
    context = "\n\n".join([d.get("Scraped Content", "") for d in context])
    # Create a system prompt containing the retrieved context
    system_message = {
        "role": "system",
        "content": f"Answer the question based only on the following context. If the context is empty, say I DON'T KNOW\n\nContext:\n{context}",
    }
    # Append the system prompt to the `messages` list
    messages.append(system_message)

    # Use the `retrieve_session_history` function to retrieve message history from MongoDB for the session ID `session_id`
    # And add all messages in the message history to the `messages` list
    message_history = retrieve_session_history(session_id)

    messages.extend(message_history)

    # Format the user message in the format {"role": <role_value>, "content": <content_value>}
    # The role value for user messages must be "user"
    # And append the user message to the `messages` list
    user_message = {"role": "user", "content": user_query}


    messages.append(user_message)

    # Call the chat completions API
    response = fw_client.chat.completions.create(model=model, messages=messages)

    # Extract the answer from the API response
    answer = response.choices[0].message.content

    # Use the `store_chat_message` function to store the user message and also the generated answer in the message history collection
    # The role value for user messages is "user", and "assistant" for the generated answer
    store_chat_message(session_id, "user", user_query)
    store_chat_message(session_id, "assistant", answer)

    print(answer)

In [143]:
generate_answer(
    session_id="1",
    user_query="Which type of coffee bean packaging is the best",
)

I DON'T KNOW. The context only provides general information about coffee packaging, its importance, and the factors to consider when choosing a packaging material. It does not specifically mention or recommend a particular type of coffee bean packaging as the best.


In [144]:
generate_answer(
    session_id="1",
    user_query="What did I just ask you?",
)

You asked me which type of coffee bean packaging is the best.
