## Load the vector db

In [None]:
import os

from dotenv import load_dotenv, find_dotenv
from langchain_openai import AzureOpenAIEmbeddings
from langchain_community.vectorstores.azuresearch import AzureSearch


load_dotenv(find_dotenv())

embedding_model = AzureOpenAIEmbeddings(
    azure_deployment="text-embedding-ada-002",
    api_version="2023-05-15",
)

vector_store = AzureSearch(
    azure_search_endpoint=os.getenv("AZURE_SEARCH_ENDPOINT"),
    azure_search_key=os.getenv("AZURE_SEARCH_KEY"),
    index_name="ef_docs",
    embedding_function=embedding_model.embed_query,
)

## Set up the RAG pipeline

In [None]:
from langchain.memory import ChatMessageHistory, ConversationBufferMemory
from langchain_openai import AzureChatOpenAI
from langchain.chains import ConversationalRetrievalChain


message_history = ChatMessageHistory()
conversation_buffer_memory = ConversationBufferMemory(
    memory_key="chat_history",
    output_key="answer",
    chat_memory=message_history,
    return_messages=True
)

chat_model = AzureChatOpenAI(
    deployment_name="gpt-35-turbo",
    api_version="2023-12-01-preview",
    temperature=0.0,
)

rag_chain = ConversationalRetrievalChain.from_llm(
    llm=chat_model,
    chain_type="stuff",
    retriever=vector_store.as_retriever(),
    memory=conversation_buffer_memory,
    return_source_documents=True
)

## Test the RAG chain with an example query

In [None]:
result = rag_chain.invoke("What tests should we run on machine learning code?")
result

In [None]:
from IPython.display import Markdown, display

print("QUESTION:", result["question"])
print("ANSWER:")
display(Markdown(result["answer"]))
print()
print("SOURCE DOCUMENTS:")
for doc in result["source_documents"]:
    print(doc)