In [8]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import Chroma
from langchain.storage import LocalFileStore
from langchain.chains import RetrievalQA

llm = ChatOpenAI()

cache_dir = LocalFileStore("./.cache/")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)
loader = UnstructuredFileLoader("./files/chapter_one.docx")

docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()

# 캐시가 저장되어 있는지 확인
# 저장되어 있으면 캐시에서 가져오고, 저장되어있지 않으면 캐시로 저장함
cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
  embeddings, cache_dir
)

vectorstore = Chroma.from_documents(docs, cached_embeddings)

chain = RetrievalQA.from_chain_type(
  llm=llm,
  chain_type='stuff',
  # chain_type='refine',
  # chain_type='map_reduce',
  # chain_type='map_rerank',
  retriever=vectorstore.as_retriever(),
)

chain.run("Describe Victory Mansions")

'Victory Mansions is a dilapidated apartment building where Winston Smith, the protagonist of George Orwell\'s novel "1984," resides. The building is described as run-down and in poor condition, with cramped living spaces, shabby furniture, and no modern amenities. It is a bleak and depressing place that reflects the oppressive and austere atmosphere of the society depicted in the novel.'