# RAG with Azure Cosmos DB for Mongo vcore
You can run this notebook after running succesfully the "RAG - Cosmos for Mongo - create embeddings" notebook. 

In [None]:
from dotenv import load_dotenv
import pandas as pd
from IPython.display import display, HTML, JSON, Markdown
import os

# Configure environment variables
load_dotenv()

# Configure OpenAI API
OPENAI_GPT35_DEPLOYMENT_NAME = os.getenv("OPENAI_GPT35_DEPLOYMENT_NAME")
OPENAI_GPT4_DEPLOYMENT_NAME = os.getenv("OPENAI_GPT4_DEPLOYMENT_NAME")
OPENAI_GPT4V_DEPLOYMENT_NAME = os.getenv("OPENAI_GPT4V_DEPLOYMENT_NAME")
OPENAI_ADA_EMBEDDING_DEPLOYMENT_NAME = os.getenv("OPENAI_ADA_EMBEDDING_DEPLOYMENT_NAME")
OPENAI_DALLE_DEPLOYMENT_NAME = os.getenv("OPENAI_DALLE_DEPLOYMENT_NAME")

OPENAI_DEPLOYMENT_ENDPOINT = os.getenv("OPENAI_DEPLOYMENT_ENDPOINT")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

#configure Cosmos 
COSMOS_MONGO_CONNECTION_STRING = os.getenv("COSMOS_MONGO_CONNECTION_STRING")
COSMOS_INDEX_NAME = os.getenv("COSMOS_INDEX_NAME")
COSMOS_DBNAME = os.getenv("COSMOS_DBNAME")
COSMOS_COLLECTION_NAME = os.getenv("COSMOS_COLLECTION_NAME")

In [None]:
from langchain_openai import AzureOpenAIEmbeddings
from tenacity import retry, wait_random_exponential, stop_after_attempt

In [None]:
embeddingmodel = AzureOpenAIEmbeddings(
    deployment=OPENAI_ADA_EMBEDDING_DEPLOYMENT_NAME,
    model=OPENAI_ADA_EMBEDDING_DEPLOYMENT_NAME,
    azure_endpoint=OPENAI_DEPLOYMENT_ENDPOINT,
    chunk_size = 1)

In [None]:
#we use the tenacity library to create delays and retries when calling openAI embeddings to avoid hitting throttling limits
@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
def calc_embeddings(text):
    deployment = OPENAI_ADA_EMBEDDING_DEPLOYMENT_NAME
    # replace newlines, which can negatively affect performance.
    txt = text.replace("\n", " ")
    return embeddingmodel.embed_query(txt)

In [None]:
from pymongo import MongoClient
from langchain_community.vectorstores.azure_cosmos_db import (
    AzureCosmosDBVectorSearch,
    CosmosDBSimilarityType,
)

client: MongoClient = MongoClient(COSMOS_MONGO_CONNECTION_STRING)

# Create database if it doesn't exist
db = client[COSMOS_DBNAME]
if COSMOS_DBNAME not in client.list_database_names():
    # Create a database with 400 RU throughput that can be shared across
    # the DB's collections
    db = client[COSMOS_DBNAME]
    print("Created db '{}'.\n".format(COSMOS_DBNAME))
else:
    print("Using database: '{}'.\n".format(COSMOS_DBNAME))

# Create collection if it doesn't exist
collection = db[COSMOS_COLLECTION_NAME]
if COSMOS_COLLECTION_NAME not in db.list_collection_names():
    # Creates a unsharded collection that uses the DBs shared throughput
    collection = db[COSMOS_COLLECTION_NAME]
    print("Created collection '{}'.\n".format(COSMOS_COLLECTION_NAME))
else:
    print("Using collection: '{}'.\n".format(COSMOS_COLLECTION_NAME))

collection = client[COSMOS_DBNAME][COSMOS_COLLECTION_NAME]

In [None]:
vectorstore = AzureCosmosDBVectorSearch(
    collection, embeddingmodel, index_name=COSMOS_INDEX_NAME
)

# perform a similarity search between a query and the ingested documents
question = "Why does the coffin prepared for Queequeg become Ishmael's life buoy once the Pequod sinks?"
docs = vectorstore.similarity_search(question)
answer = docs[0].page_content
print(answer)


In [None]:
# use GPT to answer a question based on the question and the answer from our similarity search
from openai import AzureOpenAI
clientOpenAI = AzureOpenAI(
  azure_endpoint = OPENAI_DEPLOYMENT_ENDPOINT, 
  api_key=OPENAI_API_KEY,  
  api_version="2023-05-15"
)

def call_openAI(text):
    response = clientOpenAI.chat.completions.create(
        model=OPENAI_GPT35_DEPLOYMENT_NAME,
        messages = text,
        temperature=0.7,
        max_tokens=800,
        top_p=0.95,
        frequency_penalty=0,
        presence_penalty=0,
        stop=None
    )

    return response.choices[0].message.content

In [None]:
prompt = 'Question: {}'.format(question) + '\n' + 'Information: {}'.format(answer)
# prepare prompt
messages = [{"role": "system", "content": "You are a HELPFUL assistant answering users questions. Answer the question using the provided information and do not add anything else."},
            {"role": "user", "content": prompt}]

result = call_openAI(messages)
display(HTML(result))