In [4]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import Chroma
from langchain.storage import LocalFileStore
from langchain.chains import RetrievalQA

llm = ChatOpenAI()

cache_dir = LocalFileStore("./.cache/")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)

loader = UnstructuredFileLoader("./files/chapter_one.docx")

docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embeddings,
    cache_dir,
)

vectorstore = Chroma.from_documents(docs, cached_embeddings)

chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="refine",
    retriever=vectorstore.as_retriever(),
)

# chain.run("Where does Winston live?")
chain.run("Describe Victory Mansions")


'In George Orwell\'s "1984," Victory Mansions is depicted as a dilapidated and oppressive apartment building where the protagonist, Winston Smith, resides. The building is described as having a grim atmosphere, with the hallway smelling of boiled cabbage and old rag mats. The living conditions are poor, with unreliable amenities such as the dysfunctional lift and intermittent electricity cuts as part of the Party\'s economy drive. The building is adorned with propaganda posters, prominently featuring the face of Big Brother with the ominous slogan "BIG BROTHER IS WATCHING YOU," symbolizing the constant surveillance and control imposed by the Party. Winston\'s physical discomfort, such as his varicose ulcer and the meagerness of his body, further emphasize the bleak and restrictive environment of Victory Mansions. Residents like Winston must navigate this oppressive setting under the ever-watchful eye of the Party, adding to the sense of suffocation and fear in their daily lives.'