In [None]:
from operator import itemgetter

from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
# from langchain_openai import ChatOpenAI
from langchain_together.embeddings import TogetherEmbeddings

from langchain.schema import format_document
from langchain_core.messages import AIMessage, HumanMessage, get_buffer_string
from langchain_core.runnables import RunnableParallel

from langchain.prompts.prompt import PromptTemplate
from langchain.memory import ConversationBufferMemory
from langchain.memory import ChatMessageHistory

from helpers import get_vectorstore, save_response_to_markdown_file

In [None]:
from models import get_together_mix, get_together_quen
# together_mix = get_together_mix()
ACTIVE_LLM = get_together_quen()
def get_retriever(filename, context_size = "2k"):
    model = f"togethercomputer/m2-bert-80M-{context_size}-retrieval"
    embedder = TogetherEmbeddings(model=model)
    local_vector_path = f"{filename[:-4]}-embeddings"

    vectorstore = get_vectorstore(documents = None, embedder = embedder, local_vector_path = local_vector_path)
    retriever = vectorstore.as_retriever()
    return retriever

In [None]:
PDF_FILENAME = "chromosome.pdf"
retriever = get_retriever(PDF_FILENAME, context_size="2k")

In [None]:
# BACKGROUND_OG = "Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language."
_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:"""
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)

template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
ANSWER_PROMPT = ChatPromptTemplate.from_template(template)

DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}")

In [None]:
def _combine_documents(
    docs, document_prompt=DEFAULT_DOCUMENT_PROMPT, document_separator="\n\n"
):
    doc_strings = [format_document(doc, document_prompt) for doc in docs]
    return document_separator.join(doc_strings)

_inputs = RunnableParallel(
    standalone_question=RunnablePassthrough.assign(
        chat_history=lambda x: get_buffer_string(x["chat_history"])
    )
    | CONDENSE_QUESTION_PROMPT
    | ACTIVE_LLM
    | StrOutputParser(),
)
_context = {
    "context": itemgetter("standalone_question") | retriever | _combine_documents,
    "question": lambda x: x["standalone_question"],
}
conversational_qa_chain = _inputs | _context | ANSWER_PROMPT | ACTIVE_LLM

In [None]:
def get_conversational_qa_response(question: str, chat_history: list = None):
    conversational_chat_history = chat_history
    if conversational_chat_history is None:
        conversational_chat_history = ChatMessageHistory()
    response = conversational_qa_chain.invoke(
        {
            "question": question,
            "chat_history": conversational_chat_history,
        }
    )
    return response

history = ChatMessageHistory()
history.add_user_message("Only answer Yes or No. Does this paper discuss chromosomes?")
history.add_ai_message("Yes")

query = "Tell me more about the specific types."
response = get_conversational_qa_response(question = query, chat_history = history.messages)

In [None]:
memory = ConversationBufferMemory(
    return_messages=True, input_key="question", output_key="answer"
)
# First we add a step to load memory
# This adds a "memory" key to the input object
loaded_memory = RunnablePassthrough.assign(
    chat_history=RunnableLambda(memory.load_memory_variables) | itemgetter("history"),
)
# Now we calculate the standalone question
standalone_question = {
    "standalone_question": {
        "question": lambda x: x["question"],
        "chat_history": lambda x: get_buffer_string(x["chat_history"]),
    }
    | CONDENSE_QUESTION_PROMPT
    | ACTIVE_LLM
    | StrOutputParser(),
}
# Now we retrieve the documents
retrieved_documents = {
    "docs": itemgetter("standalone_question") | retriever,
    "question": lambda x: x["standalone_question"],
}
# Now we construct the inputs for the final prompt
final_inputs = {
    "context": lambda x: _combine_documents(x["docs"]),
    "question": itemgetter("question"),
}
# And finally, we do the part that returns the answers
answer = {
    "answer": final_inputs | ANSWER_PROMPT | ACTIVE_LLM,
    "docs": itemgetter("docs"),
}
# And now we put it all together!
final_chain = loaded_memory | standalone_question | retrieved_documents | answer

In [None]:
# Note that the memory does not save automatically
# This will be improved in the future
# For now you need to save it yourself
msgs = memory.load_memory_variables({})['history']
text = ""
for msg in msgs:
    text += msg.content + "\n\n"

save_response_to_markdown_file(text, "response.md")

In [None]:
QUESTION = "Elaborate on this enzyme."
inputs = {"question": QUESTION}
result = final_chain.invoke(inputs)
memory.save_context(inputs, {"answer": result["answer"].content})

In [None]:
memory.save_context(inputs, {"answer": result["answer"].content})

In [None]:
save_response_to_markdown_file(response.content)

In [None]:
msgs = memory.buffer_as_str.split("\n")
count = 1
for msg in msgs:
    print(f"{msg}")
    count += 1

In [None]:
print(memory.buffer_as_str)