In [110]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.embeddings import CacheBackedEmbeddings
from langchain.storage import LocalFileStore
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.schema.runnable import RunnablePassthrough
from langchain.memory import ConversationBufferMemory

llm = ChatOpenAI(temperature=0.1)

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=3500,
    chunk_overlap=500,
)

loader = UnstructuredFileLoader("./doc.txt")

docs = loader.load_and_split(text_splitter=splitter)



In [111]:
embeddings = OpenAIEmbeddings()

cache_dir = LocalFileStore("./.cache")

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embeddings,
    cache_dir,
)

vectorstore = Chroma.from_documents(docs, cached_embeddings)

In [112]:
memory = ConversationBufferMemory(
    return_messages=True,
    memory_key="history",
)

def load_memory(_):
    return memory.load_memory_variables({})["history"]

In [113]:
retriever = vectorstore.as_retriever(search_kwargs={"k": 5})

prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful assistant. Answer questions using only the following context. If you don't know the answer just say you don't know, don't make it up:\n\n{context}"),
    MessagesPlaceholder(variable_name="history"),
    ("human", "{question}")
])

chain = {
    "context":retriever,
    "question": RunnablePassthrough(),
    "history": load_memory
} | prompt | llm

def invoke_chain(question):
    result = chain.invoke(question)

    memory.save_context(
        {"input": question},
        {"output": result.content}
    )
    return result

In [114]:
invoke_chain("Is Aaronson guilty?")

AIMessage(content='Yes, Jones, Aaronson, and Rutherford were guilty of the crimes they were charged with.')

In [115]:
invoke_chain("What message did he write on the table?")

AIMessage(content="I don't know.")

In [116]:
invoke_chain("Who is Julia?")

AIMessage(content='Julia is a character who plays a significant role in the story.')

In [117]:
memory.load_memory_variables({})

{'history': [HumanMessage(content='Is Aaronson guilty?'),
  AIMessage(content='Yes, Jones, Aaronson, and Rutherford were guilty of the crimes they were charged with.'),
  HumanMessage(content='What message did he write on the table?'),
  AIMessage(content="I don't know."),
  HumanMessage(content='Who is Julia?'),
  AIMessage(content='Julia is a character who plays a significant role in the story.')]}