In [43]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda
from langchain.memory import ConversationBufferMemory

llm = ChatOpenAI(temperature=0.1)

cache_dir = LocalFileStore("./.cache/")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n", 
    chunk_size=1000,
    chunk_overlap=100
)      

loader = UnstructuredFileLoader("./files/1984_chapter_three.txt")
docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()
cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embeddings, cache_dir
)

vectorstore = FAISS.from_documents(docs, cached_embeddings)
retriever = vectorstore.as_retriever()

memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True
)

def load_memory(_):
    chat_history = memory.load_memory_variables({})["chat_history"]

    # chat_history가 딕셔너리 형태일 경우, 리스트로 변환
    if isinstance(chat_history, dict):
        chat_history = chat_history.get("chat_history", [])

    # chat_history가 None일 경우 빈 리스트 반환
    return chat_history if isinstance(chat_history, list) else []


map_doc_prompt = ChatPromptTemplate.from_messages([
    ("system",  
        """
        Use the following context of a long document to see if any of the text is relevant to answer the question.
        Return any relevant text verbatim
        -------
        {context}
        """ 
    ),
    ("human", "{question}")
])
    
map_doc_chain = map_doc_prompt | llm

def map_docs(inputs):
    documents = inputs["documents"]
    return "\n\n".join(doc.page_content for doc in documents)

map_chain = {
    "documents": retriever, 
    "question": RunnablePassthrough()
} | RunnableLambda(map_docs)

final_prompt = ChatPromptTemplate.from_messages([
    ("system", 
        """
        Given the following extracted parts of a long documents and a question, create a final answer.
        If you don't know the answer, just say that you don't know. Don't try to make up an answer.
        -------
        {context}
        """
    ),
    MessagesPlaceholder(variable_name="chat_history"),
    ("human", "{question}")
])

A = RunnableLambda(load_memory)
chain = {"chat_history":A, "context": map_chain, "question": RunnablePassthrough()} | final_prompt | llm 

def invoke_chain(question):
    response = chain.invoke(question)
    memory.save_context(
        inputs={"input":question}, 
        outputs={"output":response.content}
    )
    print(response)
invoke_chain("Was Aaronson guilty of a crime?")
invoke_chain("What message did he write in the table?")
invoke_chain("Who is Julia?")


content='Yes, according to the extracted text, Jones, Aaronson, and Rutherford were guilty of the crimes they were charged with.' additional_kwargs={} response_metadata={'token_usage': {'completion_tokens': 27, 'prompt_tokens': 3099, 'total_tokens': 3126, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None} id='run-b6e99aa6-a114-4e07-8118-790a2ec2e993-0'
content='He traced with his finger in the dust on the table the equation "2+2=5."' additional_kwargs={} response_metadata={'token_usage': {'completion_tokens': 21, 'prompt_tokens': 3564, 'total_tokens': 3585, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens'

In [44]:
print(memory.load_memory_variables({})["chat_history"])

[HumanMessage(content='Was Aaronson guilty of a crime?', additional_kwargs={}, response_metadata={}), AIMessage(content='Yes, according to the extracted text, Jones, Aaronson, and Rutherford were guilty of the crimes they were charged with.', additional_kwargs={}, response_metadata={}), HumanMessage(content='What message did he write in the table?', additional_kwargs={}, response_metadata={}), AIMessage(content='He traced with his finger in the dust on the table the equation "2+2=5."', additional_kwargs={}, response_metadata={}), HumanMessage(content='Who is Julia?', additional_kwargs={}, response_metadata={}), AIMessage(content='Julia is a character in the text who is involved in a romantic relationship with the protagonist, Winston.', additional_kwargs={}, response_metadata={})]
