In [None]:
#!pip install langchain unstructured openai chromadb Cython tiktoken pypdf lark patool

# Retrieval stage:

## 1- we'll load the vector store.
## 2- retrive the most relevant splits (of documents).
## 3- combine the context (the most relevent splits) with a LangChain prompt.
## 4- pass the prompt containing the context to the LLM (GPT 3.5 turbo) to get the answer.

### Note:
    We added a memory to make the chatbot "conversational" which will follow these steps:
    1- get the new question from the user, pass it to the LLM with the conversation memory.
    2- if it was a follow-up question, it will paraphrase it as a stand a lone question.
    3- pass the new paraphrased question to the retriver and continue with following the last steps from step 2.
    
![Langchian_second_phase.JPG](./images/Langchain_second_phase.JPG)

## Imports & environment setup

In [None]:
from langchain.embeddings.openai import OpenAIEmbeddings # to get the embeddings for our docs
from langchain.vectorstores import Chroma # dealing with the DB which we saved the docs embedding in
from langchain.memory import ConversationBufferMemory # memory for paraphrasing the follow-up question
from langchain.chains import ConversationalRetrievalChain # chain from LangChain which will use the memory and the saved docs
import os
os.environ["OPENAI_API_KEY"] = "YOUR_OPENAI_API_KEY"

### prepare the embedding function & model 

In [None]:
embedding = OpenAIEmbeddings()
vectordb = Chroma(persist_directory="path_to_save_docs_after_vectorizing/chroma/", embedding_function=embedding)

In [None]:
llm_name = "gpt-3.5-turbo"
from langchain.chat_models import ChatOpenAI
llm = ChatOpenAI(model_name=llm_name, temperature=0)

### Create an instance for the chatbot memory

In [None]:
memory = ConversationBufferMemory(
    memory_key="chat_history",
    input_key="question",
    return_messages=True
)

### putting it all togather

In [None]:
chain = ConversationalRetrievalChain.from_llm(
      llm=llm,
      retriever=vectordb.as_retriever(),
      memory=memory,
      rephrase_question = False,
      verbose= True
)

### Helper function
#### we noticed that the chain returns a string for the first message, and a dictionary for the rest of the conversation, we need the final answer.

In [None]:
def printing_results(result):
  if type(result) == str:
    print(result)
  else:
    print(result["answer"])

### Testing the chatbot

In [None]:
question = "what are the different classifications for sunspots?"
result = chain.run({"question": question,})
printing_results(result)