# Introduction

In this tutorial, we'll demonstrate how to leverage a sample dataset stored in Azure Cosmos DB for MongoDB vCore to ground OpenAI models. We'll do this taking advantage of Azure Cosmos DB for Mongo DB vCore's [vector similarity search](https://learn.microsoft.com/azure/cosmos-db/mongodb/vcore/vector-search) functionality. In the end, we'll create an interatice chat session with the GPT-3.5 completions model to answer questions about Azure services informed by our dataset. This process is known as Retrieval Augmented Generation, or RAG.

This tutorial borrows some code snippets and example data from the Azure Cognitive Search Vector Search demo 

# Preliminaries <a class="anchor" id="preliminaries"></a>
First, let's start by installing the packages that we'll need later. 

In [None]:
# ! pip install openai
# ! pip install pymongo
# ! pip install python-dotenv
# ! pip install azure-storage-blob
# ! pip install json
# ! pip install ijson
# from tenacity import retry, wait_random_exponential, stop_after_attempt

In [None]:
import json
import ijson
from dotenv import dotenv_values
import pymongo
#from azure.storage.blob import BlobServiceClient
from openai import AzureOpenAI
from langchain_openai import AzureOpenAI as AOAI
from langchain.chains import RetrievalQA
from langchain_openai import AzureChatOpenAI
from langchain_community.chat_message_histories import MongoDBChatMessageHistory
from langchain.memory.buffer import ConversationBufferMemory

from langchain_community.document_loaders import JSONLoader # Check if really need
from langchain_community.vectorstores.azure_cosmos_db import (
    AzureCosmosDBVectorSearch,
    CosmosDBSimilarityType,
    CosmosDBVectorSearchType,
)

from langchain_openai import AzureOpenAIEmbeddings

Please use the example.env as a template to provide the necessary keys and endpoints in your own .env file.
Make sure to modify the env_name accordingly. 

In [None]:

# specify the name of the .env file name 
env_name = "fabcondemo.env" # following example.env template change to your own .env file name
config = dotenv_values(env_name)

mongo_conn = config['mongo_connection_string']
mongo_database = config['mongo_database_name']
mongo_collection = config['mongo_collection_name']
mongo_cache_collection = config['mongo_cache_collection_name']
mongo_client = pymongo.MongoClient(mongo_conn)

openai_endpoint = config['openai_endpoint']
openai_key = config['openai_key']
openai_version = config['openai_version']
openai_embeddings_deployment = config['openai_embeddings_deployment']
openai_embeddings_model = config['openai_embeddings_model']
openai_embeddings_dimensions = int(config['openai_embeddings_dimensions'])
openai_completions_deployment = config['openai_completions_deployment']
openai_completions_model = config['openai_completions_model']

from langchain_community.cache import AzureCosmosDBSemanticCache
from langchain.globals import set_llm_cache
import urllib 

dimensions = 1536


In [None]:
openai_client = AzureOpenAI(azure_endpoint=openai_endpoint, api_key=openai_key, api_version=openai_version)

azure_openai_embeddings: AzureOpenAIEmbeddings = AzureOpenAIEmbeddings(
    azure_deployment=openai_embeddings_deployment, api_key=openai_key, azure_endpoint=openai_endpoint,  model=openai_embeddings_model,dimensions=dimensions)

#  Set up the MongoDB vCore database and collection

In [None]:
# Get the database FabConfDB
db = mongo_client[mongo_database]

# Get the collection FabConfCollection
collection = db[mongo_collection]
# Get the collection CacheCollection
# cache = db[mongo_cache_collection]


# Define a function to generate embeddings

This is used to vectorize the user input for the vector search

In [None]:
#@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(10))
def generate_embeddings(text):
    '''
    Generate embeddings from string of text.
    This will be used to vectorize data and user input for interactions with Azure OpenAI.
    '''
    # OpenAI asks for a model but it's actually a deployment.
    response = openai_client.embeddings.create(input = text, model = openai_embeddings_deployment, dimensions= dimensions)
    #response = azure_openai_embeddings.embed_query(text) #LangChain example from Jasmine
    embeddings = response.model_dump()
    return embeddings['data'][0]['embedding']


In [None]:
# Test embedding generation function
emb = generate_embeddings('test')
emb[0:5], len(emb)

# Testing vector search w/ LangChain

In [None]:
cdb = AzureCosmosDBVectorSearch(text_key="content", embedding_key="contentVector", collection=mongo_collection, embedding=azure_openai_embeddings)
vectorstore = cdb.from_connection_string(
connection_string=mongo_conn, namespace=mongo_database+"."+mongo_collection, embedding=azure_openai_embeddings)

# Redfine where embedding and text data is stored
vectorstore._embedding_key = "contentVector"
vectorstore._text_key = 'content'

In [None]:
# If you want to reset the index
#collection.drop_indexes()

In [None]:
num_lists = 100
similarity_algorithm = CosmosDBSimilarityType.COS
kind = CosmosDBVectorSearchType.VECTOR_HNSW
m = 16
ef_construction = 64
ef_search = 40
score_threshold = 0.1

vectorstore.create_index(
    num_lists, dimensions, similarity_algorithm, kind, m, ef_construction
)

In [None]:
# test vector search and document retrieval
vectorstore.similarity_search("Azure SQL databases", k=10, score_threshold=0.6)


# Adding RAG to chain

In [None]:
qa_retriever = vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 5},
)

In [None]:
from langchain.prompts import PromptTemplate

prompt_template = """Use the following pieces of context to answer the question at the end. Always list out at least 3 options if the user asks for information about Azure. If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}

Question: {question}
"""
PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

In [None]:


qa = RetrievalQA.from_chain_type(   
    llm= AzureChatOpenAI(azure_endpoint=openai_endpoint,
                        api_key=openai_key,
                        api_version=openai_version,
                        azure_deployment=openai_completions_deployment),
    # llm = llm,
chain_type="stuff",
    retriever=qa_retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt": PROMPT},
)

In [None]:
docs = qa({"query": "Tell me about NoSQL or nonrelational databases in Azure"})
print(docs["result"])
print(docs["source_documents"])
docs

# Adding semantic caching to RAG

In [None]:
qa_retriever = vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 10},
)

sem_qa = RetrievalQA.from_chain_type(
    llm= AOAI(azure_endpoint=openai_endpoint,
                        api_key=openai_key,
                        api_version=openai_version,
                        azure_deployment="completions",#openai_completions_deployment",
                        cache=True,n=2,best_of=2),
    # llm = llm,
    chain_type="stuff",
    retriever=qa_retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt": PROMPT},
)

# sem_qa = RetrievalQA.from_chain_type(
#     llm= AzureChatOpenAI(azure_endpoint=openai_endpoint,
#                         api_key=openai_key,
#                         api_version=openai_version,
#                         azure_deployment=openai_completions_deployment,
#                         cache=True,n=3),
#     # llm = llm,
#     chain_type="stuff",
#     retriever=qa_retriever,
#     return_source_documents=True,
#     chain_type_kwargs={"prompt": PROMPT},
# )

In [None]:
num_lists = 1
dimensions = 1536
similarity_algorithm = CosmosDBSimilarityType.COS
kind = CosmosDBVectorSearchType.VECTOR_IVF
m = 16
ef_construction = 64
ef_search = 40
score_threshold = 0.98

sem_cache = AzureCosmosDBSemanticCache(
        cosmosdb_connection_string=mongo_conn,
        cosmosdb_client=None,
        embedding=azure_openai_embeddings,
        database_name="ExampleDB",
        collection_name="chatcache",
        num_lists=num_lists,
        similarity=similarity_algorithm,
        kind=kind,
        dimensions=dimensions,
        m=m,
        ef_construction=ef_construction,
        ef_search=ef_search,
        score_threshold=score_threshold)

set_llm_cache(
    sem_cache
)

In [None]:
#db['cache_collection'].index_information()
db['chatcache'].drop_indexes()
db.drop_collection('chatcache')

In [None]:
sem_qa('Tell me about NoSQL or nonrelational databases in Azure') 

In [None]:
sem_qa('Tell me about NoSQL or nonrelational databases in Azure') 

# Everything after this line is experimental and a work in progress.

In [None]:
chat_message_history = MongoDBChatMessageHistory(
    session_id="test_session",
    connection_string=mongo_conn,
    database_name=mongo_database,
    collection_name="chat_histories",
)

chat_message_history.add_user_message("Hello")
chat_message_history.add_ai_message("Hi")

In [None]:
chat_message_history.messages

In [None]:
from langchain.prompts import PromptTemplate

prompt_template = """Use the following pieces of context to answer the question at the end. Always list out at least 3 options if the user asks for information about Azure. 

{context},

This is the history of previous conversations. Use this to help answer the question.

{history},

If you don't know the answer, just say that you don't know, don't try to make up an answer.

Question: {question}
"""
PROMPT = PromptTemplate.from_template(template=prompt_template) #, input_variables=["context", "history", "question"])

# from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
# p = ChatPromptTemplate.from_messages(
#     [
#         ("system", "You are a helpful assistant."),
#         MessagesPlaceholder(variable_name="history"),
#         ("human", "{question}"),
#     ] ]
# )

In [None]:
from langchain.chains import ConversationalRetrievalChain


In [None]:
#from langchain.chains

qa_retriever = vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 5},
)

hist_llm = AzureChatOpenAI(azure_endpoint=openai_endpoint,
                        api_key=openai_key,
                        api_version=openai_version,
                        azure_deployment=openai_completions_deployment)

mongo_history = MongoDBChatMessageHistory(
    session_id="test_session",
    connection_string=mongo_conn,
    database_name=mongo_database,
    collection_name="chat_histories")

chat_message_history = ConversationBufferMemory(
    chat_memory = mongo_history,
    input_key='question',
    memory_key='history',
    llm=hist_llm,
    output_key='result',
    return_messages=True)

chathist_qa = RetrievalQA.from_chain_type(hist_llm,
    #chain_type="stuff",
    retriever=qa_retriever,
    return_source_documents=True,
    memory= chat_message_history,
    chain_type_kwargs={"prompt": PROMPT, "verbose" :True, "memory": chat_message_history})

In [None]:
num_lists = 1
dimensions = 1536
similarity_algorithm = CosmosDBSimilarityType.COS
kind = CosmosDBVectorSearchType.VECTOR_IVF
m = 16
ef_construction = 64
ef_search = 40
score_threshold = .99

sem_cache = AzureCosmosDBSemanticCache(
        cosmosdb_connection_string=mongo_conn,
        cosmosdb_client=None,
        embedding=azure_openai_embeddings,
        database_name="ExampleDB",
        collection_name="chatcache",
        num_lists=num_lists,
        similarity=similarity_algorithm,
        kind=kind,
        dimensions=dimensions,
        m=m,
        ef_construction=ef_construction,
        ef_search=ef_search,
        score_threshold=score_threshold)

set_llm_cache(
    sem_cache
)
set_llm_cache(None)

In [None]:
# Test case for chain with only history/memory


from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

hist_llm = AzureChatOpenAI(azure_endpoint=openai_endpoint,
                        api_key=openai_key,
                        api_version=openai_version,
                        azure_deployment=openai_completions_deployment)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a helpful assistant."),
        MessagesPlaceholder(variable_name="history"),
        ("human", "{question}"),
    ]
)
prompt
hist_llm = AzureChatOpenAI(azure_endpoint=openai_endpoint,
                        api_key=openai_key,
                        api_version=openai_version,
                        azure_deployment=openai_completions_deployment, temperature=0)

chain = prompt | hist_llm

from langchain_core.runnables.history import RunnableWithMessageHistory
chain_with_history = RunnableWithMessageHistory(
    chain,
    lambda session_id: MongoDBChatMessageHistory(
        session_id=session_id,
        connection_string=mongo_conn,
        database_name=mongo_database,
        collection_name="chat_histories",
    ),
    input_messages_key="question",
    history_messages_key="history",
)

config = {"configurable": {"session_id": "<SESSION_ID>"}}

# Testing memory. This chain has only Memory, no RAG and no Semantic caching

In [None]:
chain_with_history.invoke({"question": "My name is James."}, config=config)


In [None]:
chain_with_history.invoke({"question": "What did I tell you was my name? Explain it"}, config=config)
