# Introduction

In this tutorial, we'll demonstrate how to leverage a sample dataset stored in Azure Cosmos DB for MongoDB vCore to ground OpenAI models. We'll do this taking advantage of Azure Cosmos DB for Mongo DB vCore's [vector similarity search](https://learn.microsoft.com/azure/cosmos-db/mongodb/vcore/vector-search) functionality. In the end, we'll create an interatice chat session with the GPT-3.5 completions model to answer questions about Azure services informed by our dataset. This process is known as Retrieval Augmented Generation, or RAG.

This tutorial borrows some code snippets and example data from the Azure Cognitive Search Vector Search demo 

# Preliminaries <a class="anchor" id="preliminaries"></a>
First, let's start by installing the packages that we'll need later. 

In [None]:
# ! pip install gradio
# ! pip install langchain
# ! pip install langchain_community
# ! pip install langchain_openai
# ! pip install openai
# ! pip install pymongo
# ! pip install python-dotenv

In [None]:
from dotenv import dotenv_values
import gradio as gr
from langchain.chains import ConversationalRetrievalChain
from langchain.globals import set_llm_cache
from langchain.memory import ConversationBufferMemory
from langchain.prompts import PromptTemplate
from langchain_community.cache import AzureCosmosDBSemanticCache
from langchain_community.chat_message_histories import MongoDBChatMessageHistory
from langchain_community.vectorstores.azure_cosmos_db import (
    AzureCosmosDBVectorSearch,
    CosmosDBSimilarityType,
    CosmosDBVectorSearchType)
from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings
import pymongo

Please use the example.env as a template to provide the necessary keys and endpoints in your own .env file.
Make sure to modify the env_name accordingly. 

In [None]:
# specify the name of the .env file name 
env_name = "fabcondemo.env" # following example.env template change to your own .env file name
config = dotenv_values(env_name)

# Azure Cosmos DB connection details
mongo_conn = config['mongo_connection_string']
mongo_database = config['mongo_database_name']
mongo_collection = config['mongo_collection_name']
mongo_vector_property = config['mongo_vector_property_name']
mongo_semcache = config['mongo_semcache_collection_name']
mongo_chat_history = config['mongo_chathistory_collection_name']

# Azure OpenAI connection details
openai_endpoint = config['openai_endpoint']
openai_key = config['openai_key']
openai_version = config['openai_version']
openai_embeddings_deployment = config['openai_embeddings_deployment']
openai_embeddings_model = config['openai_embeddings_model']
openai_embeddings_dimensions = int(config['openai_embeddings_dimensions'])
openai_completions_deployment = config['openai_completions_deployment']
openai_completions_model = config['openai_completions_model']

In [None]:
azure_openai_embeddings = AzureOpenAIEmbeddings(
    azure_deployment = openai_embeddings_deployment, 
    api_key = openai_key, 
    azure_endpoint = openai_endpoint, 
    model = openai_embeddings_model,
    dimensions = openai_embeddings_dimensions)

# Azure Cosmos DB for MongoDB connections

In [None]:
# Establish connection to db
mongo_client = pymongo.MongoClient(mongo_conn)

# Get the database
database = mongo_client[mongo_database]

# Get the movie collection
movies = database[mongo_collection]

# Get the cache collection
cache = database[mongo_semcache]

# Get the chat history collection
chathistory = database[mongo_chat_history]

# Vector Search w/ LangChain

In [None]:
cdb = AzureCosmosDBVectorSearch(
    collection = mongo_collection, 
    embedding = azure_openai_embeddings)

vectorstore = cdb.from_connection_string(
    connection_string = mongo_conn, 
    namespace = mongo_database + "." + mongo_collection,
    embedding = azure_openai_embeddings,
    embedding_key = mongo_vector_property,
    text_key = "overview")

In [None]:
# Create a vector index in your vector store (optional)

# num_lists = 100
# similarity_algorithm = CosmosDBSimilarityType.COS
# kind = CosmosDBVectorSearchType.VECTOR_HNSW
# m = 16
# ef_construction = 64
# ef_search = 40
# score_threshold = 0.7

# vectorstore.create_index(
#     num_lists, openai_embeddings_dimensions, similarity_algorithm, kind, m, ef_construction
# )

In [None]:
# test vector search and document retrieval
vectorstore.similarity_search_with_score("Buzz Lightyear", k=5, score_threshold=0.2)

# Add RAG with Semantic Caching

In [None]:
retriever = vectorstore.as_retriever(
    search_type = "similarity",
    search_kwargs = {"k": 10})

llm = AzureChatOpenAI(
            azure_endpoint = openai_endpoint,
            api_key = openai_key,
            api_version = openai_version,
            azure_deployment = "completions", 
            cache = True,
            n = 1)

sem_qa = ConversationalRetrievalChain.from_llm(
    llm = llm,
    chain_type = "stuff",
    retriever = retriever,
    return_source_documents = True,
    combine_docs_chain_kwargs = {"prompt": PROMPT})

In [None]:
num_lists = 1
dimensions = 1536
similarity_algorithm = CosmosDBSimilarityType.COS
kind = CosmosDBVectorSearchType.VECTOR_IVF
m = 16
ef_construction = 64
ef_search = 40
score_threshold = 1.0

sem_cache = AzureCosmosDBSemanticCache(
        cosmosdb_connection_string = mongo_conn,
        cosmosdb_client = None,
        embedding = azure_openai_embeddings,
        database_name = mongo_database, 
        collection_name = mongo_semcache, 
        num_lists = num_lists,
        similarity = similarity_algorithm,
        kind = kind,
        dimensions = openai_embeddings_dimensions, 
        m = m,
        ef_construction = ef_construction,
        ef_search = ef_search,
        score_threshold = score_threshold)

set_llm_cache(
    sem_cache)

In [None]:
# Clearing Semantic Cache inbetween testing
cache.drop_indexes()
database.drop_collection(cache)

In [None]:
%%time
# Test call to LLM, no history
res = sem_qa.invoke(({'question':'Tell me about movies with Buzz Lightyear', 'chat_history': []})) 
print(res['answer'])

In [None]:
%%time
# Test call to LLM, no history, utilizing semantic cache
res = sem_qa.invoke(({'question':'Tell me about movies with Buzz Lightyear', 'chat_history': []})) 
print(res['answer'])

# Gradio / UI integration

In [None]:
prompt_template = """
You are an upbeat AI assistant who is excited to help answer questions. 
You can use this context

{context},

or this chat history

{chat_history},

to answer this question. 

Question: {question}
If you don't know the answer, just say that you don't know, don't try to make up an answer.
"""
chatbot_prompt = PromptTemplate(
    template = prompt_template, input_variables = ["context", "question","chat_history"])

In [None]:
retriever = vectorstore.as_retriever(
    search_type = "similarity",
    search_kwargs = {"k": 10})

llm = AzureChatOpenAI(
            azure_endpoint = openai_endpoint,
            api_key = openai_key,
            api_version = openai_version,
            azure_deployment = "completions", 
            cache = True,
            n = 1)

chatbot_chain = ConversationalRetrievalChain.from_llm(
    llm = llm,
    chain_type = "stuff",
    retriever = retriever,
    return_source_documents = True,
    combine_docs_chain_kwargs = {"prompt": chatbot_prompt})


In [None]:
num_lists = 1
dimensions = 1536
similarity_algorithm = CosmosDBSimilarityType.COS
kind = CosmosDBVectorSearchType.VECTOR_IVF
m = 16
ef_construction = 64
ef_search = 40
score_threshold = 0.999

sem_cache = AzureCosmosDBSemanticCache(
        cosmosdb_connection_string = mongo_conn,
        cosmosdb_client = None,
        embedding = azure_openai_embeddings,
        database_name = mongo_database, #"ExampleDB",
        collection_name = mongo_semcache, #"chatcache",
        num_lists = num_lists,
        similarity = similarity_algorithm,
        kind = kind,
        dimensions = openai_embeddings_dimensions, #dimensions,
        m = m,
        ef_construction = ef_construction,
        ef_search = ef_search,
        score_threshold = score_threshold)

set_llm_cache(
    sem_cache)

In [None]:
mongo_message_history = MongoDBChatMessageHistory(
    session_id = "test_session",
    connection_string = mongo_conn,
    database_name = mongo_database,
    collection_name = mongo_chat_history)

conversational_memory = ConversationBufferMemory(
    chat_memory=mongo_message_history,
    memory_key='chat_history',
    return_messages=True)

# Load history locally. Grab last 
hist = []
with gr.Blocks() as demo:
    chatbot = gr.Chatbot()
    msg = gr.Textbox()
    clear = gr.Button("Clear")

    def user(user_message, chat_history):
        # Get response from QA chain
        response = chatbot_chain.invoke({"question": user_message, "chat_history":conversational_memory.buffer_as_messages[-6:]},temperature=0.2)
        # Append user message and response to chat history
        hist.append(["User: "+user_message, "Chatbot: "+response['answer']])
        mongo_message_history.add_user_message(user_message)
        mongo_message_history.add_ai_message(response['answer'])
        return gr.update(value=""), hist

    msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False)
    clear.click(lambda: None, None, chatbot, queue=False)

In [None]:
demo.launch(debug=True)


# Everything after this line is experimental and a work in progress.

In [None]:
chat_message_history = MongoDBChatMessageHistory(
    session_id = "test_session",
    connection_string = mongo_conn,
    database_name = mongo_database,
    collection_name = mongo_chat_history)

# chat_message_history.add_user_message("My name is James")
# chat_message_history.add_ai_message("Hi, James!")
# chat_message_history.add_message()

In [None]:
conversational_memory = ConversationBufferMemory(
    chat_memory=chat_message_history,
    memory_key='chat_history',
    return_messages=True)

In [None]:
database.drop_collection(chathistory)
database.drop_collection(cache)