In [None]:
from langchain_openai import OpenAIEmbeddings
from langchain_mongodb import MongoDBAtlasVectorSearch
from pymongo import MongoClient
from langchain.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
import openai
import os, pprint
from functools import lru_cache
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
load_dotenv()
OPENAI_KEY = os.getenv("OPENAI_API_KEY")
MONGODB_URI = os.getenv('MONGODB_URI')
db_name = os.getenv('MONGODB_DATABASE')
collection_name = os.getenv('MONGODB_TEMPUSER')
vector_search_idx = os.getenv('MONGODB_VECTOR_INDEX_TEMPUSER')

In [None]:
client = MongoClient(MONGODB_URI)
db = client[db_name]
collection = db[collection_name]

def get_user_embeddings(session_id):
    user_doc = collection.find_one({"session_id": session_id})
    if user_doc and "embeddings" in user_doc:
        return user_doc["embeddings"]
    return None

# Assuming 'session_id' is known and valid
session_embeddings = get_user_embeddings("_HrwNQCNinc_Ki6kMT3vkw")

In [None]:
vector_search = MongoDBAtlasVectorSearch(
   embedding=OpenAIEmbeddings(disallowed_special=()),
   collection=collection,  # Ensure this collection is correctly set up for vector search
   index_name=vector_search_idx,
   embedding_key = "embeddings"
)

retriever = vector_search.as_retriever(
   search_type="similarity",
   search_kwargs={"k": 5, "score_threshold": 0.75}
)


In [None]:
template = """
Use the following pieces of context to answer the question at the end.
If you don't know the answer or if it is not provided in the context, just say that you don't know, don't try to make up an answer.
If the answer is in the context, don't say mentioned in the context.
Please provide a detailed explanation and if applicable, give examples or historical context.
{context}
Question: {question}
"""

custom_rag_prompt = PromptTemplate.from_template(template)
llm = ChatOpenAI()

def format_docs(docs):
   return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
   {"context": retriever | format_docs, "question": RunnablePassthrough()}
   | custom_rag_prompt
   | llm
   | StrOutputParser()
)

In [None]:
MAX_CACHE_SIZE = 100
@lru_cache(maxsize=MAX_CACHE_SIZE)
def cached_query(question):
    response = rag_chain.invoke(question)
    return response

RED = '\033[91m'
GREEN = '\033[92m'
YELLOW = '\033[93m'
RESET = '\033[0m'


question = "What's my name?"
answer = cached_query(question) 

print(f"{YELLOW}Cache Info: {cached_query.cache_info()}{RESET}")
print(f"{RED}Question: {question}{RESET}")
print(f"{GREEN}Answer: {answer}{RESET}")

documents = retriever.get_relevant_documents(question)
print("\nSource documents:")
pprint.pprint(documents)

In [5]:
# TRYING OUT NEW MONGODB IMPLEMENTATION

from dotenv import load_dotenv
load_dotenv()
import os, pprint
OPENAI_KEY = os.getenv("OPENAI_API_KEY")
MONGODB_URI = os.getenv('MONGODB_URI')
from langchain_openai import OpenAIEmbeddings
from uuid import uuid4
from langchain_core.documents import Document
from langchain_mongodb.vectorstores import MongoDBAtlasVectorSearch
from pymongo import MongoClient

DB_NAME = os.getenv('MONGODB_DATABASE')
COLLECTION_NAME = os.getenv('MONGODB_VECTORS_COURSEEVALUATION_DOCS')
ATLAS_VECTOR_SEARCH_INDEX_NAME = os.getenv('MONGODB_VECTOR_INDEX_TEMPUSER_DOC')

embeddings = OpenAIEmbeddings()
client = MongoClient(MONGODB_URI)


MONGODB_COLLECTION = client[DB_NAME][COLLECTION_NAME]


vector_store = MongoDBAtlasVectorSearch(
    collection=MONGODB_COLLECTION,
    embedding=embeddings,
    index_name=ATLAS_VECTOR_SEARCH_INDEX_NAME,
    relevance_score_fn="cosine",
)


In [None]:
print(MONGODB_URI, DB_NAME, COLLECTION_NAME, ATLAS_VECTOR_SEARCH_INDEX_NAME)

In [None]:
document_1 = Document(
    page_content="I had chocalate chip pancakes and scrambled eggs for breakfast this morning.",
    metadata={"source": "tweet"},
)

document_2 = Document(
    page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.",
    metadata={"source": "news"},
)

document_3 = Document(
    page_content="Building an exciting new project with LangChain - come check it out!",
    metadata={"source": "tweet"},
)

document_4 = Document(
    page_content="Robbers broke into the city bank and stole $1 million in cash.",
    metadata={"source": "news"},
)

document_5 = Document(
    page_content="Wow! That was an amazing movie. I can't wait to see it again.",
    metadata={"source": "tweet"},
)

document_6 = Document(
    page_content="Is the new iPhone worth the price? Read this review to find out.",
    metadata={"source": "website"},
)

document_7 = Document(
    page_content="The top 10 soccer players in the world right now.",
    metadata={"source": "website"},
)

document_8 = Document(
    page_content="LangGraph is the best framework for building stateful, agentic applications!",
    metadata={"source": "tweet"},
)

document_9 = Document(
    page_content="The stock market is down 500 points today due to fears of a recession.",
    metadata={"source": "news"},
)

document_10 = Document(
    page_content="I have a bad feeling I am going to get deleted :(",
    metadata={"source": "tweet"},
)

documents = [
    document_1,
    document_2,
    document_3,
    document_4,
    document_5,
    document_6,
    document_7,
    document_8,
    document_9,
    document_10,
]

result = vector_store.add_documents(documents=documents)
len(result)

In [None]:
results = vector_store.similarity_search(query="amazing",k=1,pre_filter={"source": {"$eq": "tweet"}})
len(results)
for doc in results:
    print(f"* {doc.page_content} [{doc.metadata}]")


# results = vector_store.similarity_search(
#     "LangChain provides abstractions to make working with LLMs easy", k=2
# )
# for res in results:
#     print(f"* {res.page_content} [{res.metadata}]")

In [None]:
results = vector_store.similarity_search_with_score("tomorrow?", k=1)
print(results)
for res, score in results:
    print(f"* [SIM={score:3f}] {res.page_content} [{res.metadata}]")

In [7]:
result = MONGODB_COLLECTION.delete_many({})
print(result.deleted_count)

362
