In [3]:
import time
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate

from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.storage import LocalFileStore

from langchain.callbacks import StreamingStdOutCallbackHandler
from langchain.memory import ConversationBufferMemory
from langchain.vectorstores import FAISS
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda


loader = UnstructuredFileLoader("./files/document.txt")
splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)
documents = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()
cache_dir = LocalFileStore("./.cache/")
cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_dir)
vectorstore = FAISS.from_documents(documents, cached_embeddings)

retriever = vectorstore.as_retriever(search_type="mmr")

llm = ChatOpenAI(
    model="gpt-4o-mini",
    temperature=0,
    streaming=True,
    callbacks=[StreamingStdOutCallbackHandler()],
)

memory = ConversationBufferMemory(
    llm=llm,
    memory_key="chat_history",
    return_messages=True
)


basic_prompt = ChatPromptTemplate.from_messages([
    (
        "system",
        """
        You are a helpful assistant here to assist the user.
        The user may ask questions and provide previous conversation history with you. 
        Your task is to answer the current question by leveraging both the provided context and the conversation history. 
        If you don’t know the answer, simply say you don’t know—please avoid making up information. 
        Use only the information from the context and chat history to formulate your response.
        """
    ),
    ("human", "Conversation history:\n{chat_history}\n\nCurrent question: {question}")
])

circuit_search_prompt = ChatPromptTemplate.from_messages([
        (
            "system",
            """
            Use the following excerpt from a longer document to determine if any of the text is relevant for answering the question. 
            Return any relevant text verbatim. If there is no relevant text, return an empty string.
            -------
            {context}
            """,
        ),
        ("human", "{question}"),
])

repeat_chain = circuit_search_prompt | llm

def process_document(document, question):
    time.sleep(30)
    result = repeat_chain.invoke({"context": document.page_content, "question": question})
    return result.content


def map_docs(inputs):
    documents = inputs["documents"]
    question = inputs["question"]

    results = []
    for doc in documents:
        results.append(process_document(doc, question))
    return "\n\n".join(results)

map_chain = {
    "documents": retriever,
    "question": RunnablePassthrough(),
} | RunnableLambda(map_docs)

final_chain = {
    "context": map_chain,
    "question": RunnablePassthrough(),
    "chat_history": lambda _: memory.load_memory_variables({})["chat_history"]
} | basic_prompt | llm


questions = ["Aaronson 은 유죄인가요?", "그가 테이블에 어떤 메시지를 썼나요?", "Julia 는 누구인가요?"]

for question in questions:
    print(f"질문: {question}")
    response = final_chain.invoke(question)
    print(f"답변: {response}\n")
    memory.save_context({"question": question}, {"response": str(response)})



질문: Aaronson 은 유죄인가요?
""''No. I believe it. I KNOW that you will fail. There is something in the universe--I don't know, some spirit, some principle--that you will never overcome.''""""죄에 대한 정보가 없기 때문에 Aaronson이 유죄인지 여부에 대해 알 수 없습니다. 추가적인 정보가 필요합니다.답변: content='죄에 대한 정보가 없기 때문에 Aaronson이 유죄인지 여부에 대해 알 수 없습니다. 추가적인 정보가 필요합니다.'

질문: 그가 테이블에 어떤 메시지를 썼나요?
이 텍스트에는 그가 테이블에 어떤 메시지를 썼다는 내용이 포함되어 있지 않습니다. 따라서 관련된 텍스트는 없습니다.그는 테이블에 다음과 같은 메시지를 썼습니다: 

"FREEDOM IS SLAVERY" 

그리고 그 아래에 썼습니다: 

"TWO AND TWO MAKE FIVE" 

마지막으로 그는 다음을 썼습니다: 

"GOD IS POWER"''Tell me,' he said, 'how soon will they shoot me?''''The worst thing in the world,' said O'Brien, 'varies from individual to individual. It may be burial alive, or death by fire, or by drowning, or by impalement, or fifty other deaths. There are cases where it is some quite trivial thing, not even fatal.' ''죄에 대한 정보가 없기 때문에 Aaronson이 유죄인지 여부에 대해 알 수 없습니다. 추가적인 정보가 필요합니다. 테이블에 어떤 메시지를 썼는지에 대한 정보는 제공되지 않았습니다.답변: content='죄에 대한 정보가 없기 때문에 Aaronson이 유