In [None]:
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT
from langchain.prompts import PromptTemplate
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.chains.conversation.memory import ConversationBufferMemory
from langchain.chains import LLMChain
from langchain.chains import ConversationalRetrievalChain
from langchain.chains.question_answering import load_qa_chain
from langchain.chains.qa_with_sources import load_qa_with_sources_chain

## Setting the LLM

In [None]:
with open("openai_api.txt", "r") as f:
    OPENAI_API = f.read()

llm = OpenAI(
    model_name = "gpt-3.5-turbo-instruct",
    openai_api_key = OPENAI_API
)

chat_llm = ChatOpenAI(
    model_name = "gpt-3.5-turbo",
    openai_api_key = OPENAI_API
)

embedding_llm = OpenAIEmbeddings(
    model = "text-embedding-ada-002",
    openai_api_key = OPENAI_API
)

## Chat Over Documents with Chat History

The difference between this Chain and `RetrivalQAChain` is that this allows for passing in of a history which can be used to allow for follow up questions.

In [None]:
## Setting the Docsearch Object

loader = TextLoader("state_of_the_union.txt")
docs = loader.load()

text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)

text = text_splitter.split_documents(docs)

docsearch = Chroma.from_documents(
    documents = text,
    embedding = embedding_llm
)

In [None]:
## Setting Memory

memory = ConversationBufferMemory(
    memory_key = "chat_history",
    return_messages = True
)

In [None]:
## Setting Chain

qa = ConversationalRetrievalChain.from_llm(
    llm = llm,
    retriever = docsearch.as_retriever(),
    memory = memory
)

## Using a different model for condensing the question

This chain has two steps:
1. It condenses the current question and the chat history into a standalone question. This is neccessary to create a standanlone vector to use for retrieval.
2. It does retrieval and then answers the question using retrieval augmented generation with a separate model.

We can use seperate models for those tasks, i.e., a cheaper model for task 1 and a more powerfull model for part 2.

### 1. Condenses with Different Model

In [None]:
## Setting a `get history` function

def get_chat_history(inputs: tuple) -> str:
    res = []
    for human, ai in inputs:
        res.append(f"Human:{human}\nAI:{ai}")
    return "\n".join(res)

In [None]:
## Setting the Chain
qa = ConversationalRetrievalChain.from_llm(
    llm = chat_llm,
    retriever = docsearch.as_retriever(),
    condense_question_llm = llm,
    get_chat_history = get_chat_history
)

## We have to set the `chat history` ourselfs
chat_history = []

In [None]:
query = "What Advengers do?"
results = qa({"question": query, "chat_history": chat_history})
print(results)

In [None]:
## Updating `chat history`
chat_history.append((query, results["answer"]))

query = "Why can I trust them?"
results = qa({"question": query, "chat_history": chat_history})
print(results)

In [None]:
chat_history.append((query, results["answer"]))

query = "What are they awards?"
results = qa({"question": query, "chat_history": chat_history})
print(results)

In [None]:
chat_history.append((query, results["answer"]))

query = "To who I am reffering as 'they'?"
results = qa({"question": query, "chat_history": chat_history})
print(results)

### 2. RAG with Different Model

In [None]:
## Creating the RAG Chain

custom_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question. At the end of standalone question add this 'Answer the question in German language.' If you do not know the answer reply with 'I am sorry'.
Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:"""
CUSTOM_QUESTION_PROMPT = PromptTemplate.from_template(custom_template)

qa = ConversationalRetrievalChain.from_llm(
    llm = chat_llm,
    retriever = docsearch.as_retriever(),
    condense_question_prompt = CUSTOM_QUESTION_PROMPT,
    memory = memory
)

In [None]:
query = "What did the president say about Ketanji Brown Jackson"
result = qa({"question": query})

In [None]:
query = "Did he mention who she succeeded"
result = qa({"question": query})

## Different Combine Documents Types

We can also use different types of combine document chains with the ConversationalRetrievalChain chain.

### ConversationalRetrievalChain with `map_reduce`


In [None]:
question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)

doc_chain = load_qa_chain(llm, chain_type="map_reduce")

chain = ConversationalRetrievalChain(
    retriever = docsearch.as_retriever(),
    question_generator = question_generator,
    combine_docs_chain = doc_chain
)

chat_history = []
query = "What did the president say about Ketanji Brown Jackson"
result = chain({"question": query, "chat_history": chat_history})

result["answer"]

## ConversationalRetrievalChain with Question Answering with sources

In [None]:
question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)

doc_chain = load_qa_with_sources_chain(llm=llm, chain_type="map_reduce")

chain = ConversationalRetrievalChain(
    retriever = docsearch.as_retriever(),
    question_generator = question_generator,
    combine_docs_chain = doc_chain
)

chat_history = []
query = "What did the president say about Ketanji Brown Jackson"
result = chain({"question": query, "chat_history": chat_history})

result["answer"]