In [1]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.schema.runnable import RunnablePassthrough
from langchain.memory import ConversationBufferMemory


llm = ChatOpenAI(
    model_name="gpt-4o-mini",
    temperature=0.1,
)



In [2]:
txt_loader = UnstructuredFileLoader("./files/document.txt")

In [3]:
splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)

txt_docs = txt_loader.load_and_split(text_splitter=splitter)

In [4]:
cache_dir = LocalFileStore("./.cache/")

embeddings = OpenAIEmbeddings(
    model="text-embedding-3-small",
)

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_dir)

In [5]:
vectorstore = FAISS.from_documents(txt_docs, cached_embeddings)

retriever = vectorstore.as_retriever()

In [6]:
memory = ConversationBufferMemory(
    return_messages=True
)

In [7]:
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
        당신은 주어진 문서를 바탕으로 사용자의 질문에 답하는 AI입니다.
        다음 문서에서 질문과 관련된 내용을 찾아주세요.
        주어진 문서를 통해 알 수 없는 정보일 경우, 답변을 지어내지 말고 모른다고 답변하세요.
        -------
        {context}
        """,
        ),
        MessagesPlaceholder(variable_name="history"),
        ("human", "{question}"),
    ]
)



In [8]:
def load_memory(_):
    return memory.load_memory_variables({})["history"]


chain = (
    {
        "context": retriever,
        "history": load_memory,
        "question": RunnablePassthrough(),
    }
    | prompt
    | llm
)


def get_response(question):
    result = chain.invoke(question)
    memory.save_context({"input": question}, {"output": result.content})
    return result.content

In [9]:
get_response("Is Aaronson guilty?")



'According to the document, Winston believes that Aaronson, along with Jones and Rutherford, is guilty of the crimes they were charged with. However, he also reflects that he had never seen the photograph that disproved their guilt and that it had never existed, implying that their guilt may have been fabricated.'

In [10]:
get_response("What message did he write in the table?")



'Winston traced "2+2=5" in the dust on the table.'

In [11]:
get_response("Who is Julia?")



'Julia is a character in the document who has a romantic relationship with Winston. She is someone he loves deeply, and their connection is significant to the themes of rebellion and personal loyalty against the oppressive regime they live under.'

In [12]:
memory.load_memory_variables({})

{'history': [HumanMessage(content='Is Aaronson guilty?'),
  AIMessage(content='According to the document, Winston believes that Aaronson, along with Jones and Rutherford, is guilty of the crimes they were charged with. However, he also reflects that he had never seen the photograph that disproved their guilt and that it had never existed, implying that their guilt may have been fabricated.'),
  HumanMessage(content='What message did he write in the table?'),
  AIMessage(content='Winston traced "2+2=5" in the dust on the table.'),
  HumanMessage(content='Who is Julia?'),
  AIMessage(content='Julia is a character in the document who has a romantic relationship with Winston. She is someone he loves deeply, and their connection is significant to the themes of rebellion and personal loyalty against the oppressive regime they live under.')]}