In [75]:
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda
from langchain.vectorstores.faiss import FAISS
from langchain.storage import LocalFileStore
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.document_loaders import UnstructuredFileLoader
from langchain.chat_models import ChatOpenAI

from langchain.memory import ConversationBufferMemory
from langchain.text_splitter import CharacterTextSplitter

chat = ChatOpenAI(
    temperature = 0.1,
)

memory = ConversationBufferMemory(return_messages=True)

loader = UnstructuredFileLoader('./files/1984.txt')

#tokenize
splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator = "\n",
    chunk_size = 600,
    chunk_overlap = 100,
)

docs = loader.load_and_split(text_splitter = splitter)

#embedding
embeddings = OpenAIEmbeddings()

#cache embedding
cache_dir = LocalFileStore("./cache/")
cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_dir)

#vector store
vectorstore = FAISS.from_documents(docs, cached_embeddings)

retriever = vectorstore.as_retriever()

map_doc_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            Use the following portion of a long document to see if any of the text is relevant to answer the question. Return any relevant text verbatim. If there is no relevant text, return : ''
            ------
            {context}
            """,
        ),
        (
            "human",
            "{question}"
        ),
    ]
)

map_doc_chain = map_doc_prompt | chat

def map_docs(inputs):
    documents = inputs["documents"]
    question = inputs["question"]
    return "\n\n".join(
        map_doc_chain.invoke(
            {"context": doc.page_content, "question": question},
        ).content
        for doc in documents
    )

map_chain = {
    "documents": retriever,
    "question": RunnablePassthrough(),
} | RunnableLambda(map_docs)

final_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            Given the following extracted parts of a long document and a question, create a final answer.
            If you don't know the answer, just say that you don't know. Don't try to make up an answer.
            ------
            {context}
            """,
        ),
        MessagesPlaceholder(variable_name = "history"),
        ("human", "{question}"),
    ]
)


def load_memory(_):
    return memory.load_memory_variables({})["history"]

chain = {"context": map_chain, "question": RunnablePassthrough()} | RunnablePassthrough.assign(history = load_memory) | final_prompt | chat

def invoke_chain(question):
    result = chain.invoke(question)
    memory.save_context(
        {"input": question},
        {"output": result.content},
    )
    print(result)


In [76]:
invoke_chain("Is Aaronson guilty?")

content='Yes, according to the information provided, Aaronson was guilty of the crimes he was charged with.'


In [77]:
invoke_chain("What message did he write in the table?")

content='He wrote "2+2=5" in the dust on the table.'


In [78]:
invoke_chain("Who is Julia?")

content="Julia is a character mentioned in the text provided. She is someone who was with the main character during a moment at the Ministry of Love, and there are photographs of her and the main character that were shown to him as part of the Thought Police's surveillance."
