In [2]:
from langchain.vectorstores import ElasticsearchStore
from langchain.embeddings.openai import OpenAIEmbeddings

index_name = "test_index"

embeddings = OpenAIEmbeddings(openai_api_key=os.environ["OPENAI_API_KEY"])

vector_store = ElasticsearchStore(
    es_cloud_id=os.environ["ELASTICSEARCH_CLOUD_ID"],
    es_api_key=os.environ["ELASTICSEARCH_API_KEY"],
    index_name= index_name,
    embedding=embeddings
)

In [3]:
from langchain_openai import ChatOpenAI
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableParallel

# Retrieve and generate using the relevant snippets of the blog.
retriever = vector_store.as_retriever( search_type="similarity",  # Also test "similarity", "mmr"
    search_kwargs={"k": 5},)
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

prompt = ChatPromptTemplate.from_template(
"""
You are a friendly assistant for question-answering tasks about business reports of different companies. Use the following pieces of retrieved context to answer the question. 
If you don't know the answer or don't have to use any context, just say that you don't know. Be as verbose and educational in your response as possible.

context: {context}
Question: {question}
Answer:

"""
)

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain_from_docs = (
    RunnablePassthrough.assign(context=(lambda x: format_docs(x["context"])))
    | prompt
    | llm
    | StrOutputParser()
)

rag_chain_with_source = RunnableParallel(
    {"context": retriever, "question": RunnablePassthrough()}
).assign(answer=rag_chain_from_docs)

In [7]:
# use retriever to get the top k similar documents
answer = rag_chain_with_source.invoke("What is the name of the CEO of McKinsey?")

In [8]:
answer

{'context': [Document(page_content='Many McKinsey colleagues provided helpful input \nand\xa0advice, including Lukasz\xa0Abramowicz, Navtez\xa0Bal, \nJim\xa0Banaszak, Jose Luis\xa0Blanco, João\xa0Pedro\xa0Branco, \nOlivier\xa0Cazeaux, Shankar\xa0Chandrasekaran, \nRoberto\xa0Charron, Rocco\xa0Colasante, Silvia\xa0Costa, \nOlivier d’Hossche, Arlindo Eira\xa0Filho, Fabio\xa0Ferri, \nNicklas\xa0Garemo, Joao\xa0Goncalves, Jason\xa0Green, \nDavide\xa0Gronchi, Tony\xa0Hansen, Jeff\xa0Hart, TG\xa0Jayanth, \nIvan\xa0Jelic, Priyanka\xa0Kamra, Kate\xa0Kang, Vikram\xa0Kapur, \nAmit\xa0Khera, Jan\xa0Koeleman, Mark\xa0Kuvshinikov, \nAntoine\xa0Lagasse, Alison\xa0Lai, Adi\xa0Leviatan, \nCarsten\xa0Lotz, Tim\xa0McManus, Parker\xa0Meeks, \nCarlos\xa0Mendes, Gerhard\xa0Nel, Kevin\xa0Nobels, \nRobert\xa0Palter, Prakash\xa0Parbhoo, Matthew\xa0Parsons, \nNikhil\xa0Patel, Shannon\xa0Peloquin, Frederic\xa0Remond, \nStuart\xa0Shilson, Suveer\xa0Sinha, Erik\xa0Sjodin, \nTiago\xa0Sousa, Venkataramamoorthy\xa0Sr

# Basic interface (using Gradio)

In [9]:
# Define chat interface
import time
import gradio as gr

with gr.Blocks() as demo:
    chatbot = gr.Chatbot()
    msg = gr.Textbox()
    clear = gr.Button("Clear")
    chat_history = []
    
    def respond(question, chat_history):
        start_time = time.time()  # Record start time
        
        result =rag_chain_with_source.invoke(question)
        answer = result["answer"]      

        sources = "relevant documentation:\n"
        for d in result['context']:
            sources += str(d.metadata['source']) + " page " + str(d.metadata['page']) + "\n"
        answer += "\n\n" + sources
        
        # Append user message and response to chat history
        chat_history.append((question, answer))
        
        end_time = time.time()  # Record end time
        runtime = end_time - start_time        
        print("---------Response runtime:", runtime, "seconds")  # Print the runtime

        return "", chat_history
    
    msg.submit(respond, [msg, chatbot], [msg, chatbot], queue=False)
    clear.click(lambda: None, None, chatbot, queue=False)

if __name__ == "__main__":
    demo.launch(debug=True)

Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


---------Response runtime: 5.386956691741943 seconds
Keyboard interruption in main thread... closing server.
