In [3]:
from langchain.chat_models import ChatOpenAI
from langchain.storage import LocalFileStore
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores.faiss import FAISS
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough

cache_dir = LocalFileStore("./.cache/")


llm = ChatOpenAI(temperature=0.1)

text_splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)

loader = UnstructuredFileLoader("./files/chapter_one.docx")

documents = loader.load_and_split(text_splitter=text_splitter)

embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embeddings,
    cache_dir,
)

vectorstore = FAISS.from_documents(
    documents=documents,
    embedding=cached_embeddings,
)

retriver = vectorstore.as_retriever()

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
You are a helpful assistant. Answer questions using only the context. If you don't know the answer, just say you don't know, don't make it up:\n\n{context}
""",
        ),
        ("human", "{question}"),
    ]
)

chain = {"context": retriver, "question": RunnablePassthrough()} | prompt | llm


chain.invoke("Describe Victory Mansions")

AIMessage(content='Victory Mansions is a building where Winston Smith resides. It has glass doors and a hallway that smells of boiled cabbage and old rag mats. The flat is located on the seventh floor, and the lift is often not working. There is a large colored poster of an enormous face, with the caption "BIG BROTHER IS WATCHING YOU," tacked to the wall. The building is described as grimy and surrounded by rotting houses and bombed sites.')