**Simple RAG: data from mongodb => embeddings => to in-memory vectorDB:**

In [None]:
# Use a shell command to get the external IP address to whitelist in mongodb atlas
!curl ipinfo.io/ip

In [None]:
%pip install pymongo langchain langchain-community langchain-core faiss-cpu

In [None]:
%pip install langchain-google-genai google-generativeai huggingface_hub
#langchain-mongodb

In [None]:
# import urllib
# urllib.parse.quote_plus("pwd@123")  #encode special chars in connection string

#Note: below user credentials has readonly access and its free version cluster

In [None]:
from pymongo import MongoClient

# MongoDB Atlas connection string
client = MongoClient("mongodb+srv://user1:pwd%40123@cluster-1.73uqabe.mongodb.net/?retryWrites=true&w=majority&appName=Cluster-1")

# database and collection names
db = client['notebook']
collection = db['ai_ml']

print("Test for collection name: ", collection.name)

In [None]:
# Link to store embedddings in mongodb - https://www.mongodb.com/docs/atlas/ai-integrations/langchain/get-started/

In [None]:
import google.generativeai as genai
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.agents import initialize_agent, Tool, AgentType
from langchain.docstore.document import Document
from langchain.vectorstores import FAISS

In [None]:
# Query the collection
# The filter {'desc': {'$exists': True}} finds documents where the 'desc' field exists
results = collection.find({'desc': {'$exists': True}})

documents = []
for doc in results:
    documents.append(Document(page_content=doc['desc']))

documents

In [None]:
from google.colab import userdata

# Access the Gemini API key from Colab secrets
gemini_api_key = userdata.get("GOOGLE_API_KEY")
genai.configure(api_key=gemini_api_key)

#initialize llm
llm = ChatGoogleGenerativeAI(
    model="models/gemini-2.0-flash",
    google_api_key = gemini_api_key,
    temperature=0.6,
    max_output_tokens=512
)

In [None]:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [None]:
vectorstore = FAISS.from_documents(documents, embeddings)

def search_documents(query):
  results = vectorstore.similarity_search(query, k=2) #1 - releave
  combined = "\n".join([f"- {res.page_content}" for res in results])
  return f"Relevant documents:\n{combined}"

search_tool = Tool(
    name='VectorStoreLookup',
    description="useful for when you need to find information about a specific context from documents",
    func=search_documents
)

agent = initialize_agent(
    tools=[search_tool],
    llm=llm,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION
)

In [None]:
from langchain.prompts import PromptTemplate

template = """
   Use the VectorStoreLookup tool if you need more info to answer the question at the end.
   Question: {question}
"""
prompt = PromptTemplate.from_template(template)

In [None]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough, RunnableLambda

chain = (
   { "question": RunnablePassthrough() }
   | prompt
   | agent
   | RunnableLambda(lambda x: x['output'])
   | StrOutputParser()
)

# Prompt the chain
question = "What is AI and DL?"
answer = chain.invoke(question)
print("Question: " + question)
print("Answer: " + answer)