In [None]:
from langchain_openai import ChatOpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import UnstructuredFileLoader
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.schema.runnable import RunnablePassthrough, RunnableParallel
from langchain.memory import ConversationBufferMemory

llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.1)
cache_dir = LocalFileStore("./.cache/document")
splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100
)

loader = UnstructuredFileLoader("./files/document.txt")
docs = loader.load_and_split(text_splitter=splitter)
embeddings = OpenAIEmbeddings()
cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_dir)
vectorstore = FAISS.from_documents(docs, cached_embeddings)
retriever = vectorstore.as_retriever()

prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful assistant. Answer questions using only the following context. If you don't know the answer just say you don't know, don't make it up:\n\n{context}"),
    MessagesPlaceholder(variable_name="history"),
    ("human", "{question}")
])

memory = ConversationBufferMemory(return_messages=True)

def load_memory(_):
    return memory.load_memory_variables({})["history"]

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

chain = RunnableParallel(
    {"context": retriever | format_docs, 
     "question": RunnablePassthrough(),
     "history": load_memory}
) | prompt | llm

def invoke_chain(question):
    result = chain.invoke(question)
    memory.save_context(
        {"input": question},
        {"output": result.content}
    )
    return result.content

response = invoke_chain("Is Aaronson guilty?")
print(response)

According to Winston, Aaronson is guilty of the crimes he was charged with, as he has accepted everything the Party states, including that he had never seen the photograph that disproved their guilt.


In [57]:
response = invoke_chain("What message did he write in the table?")
print(response)

Winston traced "2+2=5" in the dust on the table.


In [58]:
response = invoke_chain("Who is Julia?")
print(response)

Julia is a character that Winston loves. She is someone with whom he shares a romantic relationship, and he feels a deep connection to her, especially in moments of despair and longing.


In [60]:
memory.load_memory_variables({})['history']

[HumanMessage(content='Is Aaronson guilty?', additional_kwargs={}, response_metadata={}),
 AIMessage(content='According to Winston, Aaronson is guilty of the crimes he was charged with, as he has accepted everything the Party states, including that he had never seen the photograph that disproved their guilt.', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='What message did he write in the table?', additional_kwargs={}, response_metadata={}),
 AIMessage(content='Winston traced "2+2=5" in the dust on the table.', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='Who is Julia?', additional_kwargs={}, response_metadata={}),
 AIMessage(content='Julia is a character that Winston loves. She is someone with whom he shares a romantic relationship, and he feels a deep connection to her, especially in moments of despair and longing.', additional_kwargs={}, response_metadata={})]