In [1]:
from dotenv import load_dotenv

load_dotenv()

True

In [2]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import Chroma
from langchain.storage import LocalFileStore
from langchain.chains import RetrievalQA

llm = ChatOpenAI()

cache_dir = LocalFileStore("./.cache/")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator='\n',
    chunk_size=600,
    chunk_overlap=100,
)

loader = UnstructuredFileLoader("./files/chapter_one.docx")
docs = loader.load_and_split(text_splitter=splitter)
embeddings = OpenAIEmbeddings()

# cache_dir에 있는 embed를 확인하고 업으면 OpenAIEmbeddings 사용
cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embeddings, cache_dir
)

vectorstore = Chroma.from_documents(docs, cached_embeddings)

# retriever : class의 interface. document를 retrieve
# from vector store, cloud 등
chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever(),
)

chain.run("Where does Winston live?")

'Winston lives in Victory Mansions.'

In [4]:
chain.run("Describe Victory Mansions.")

"Victory Mansions is the building where Winston Smith lives in George Orwell's novel 1984. It is described as a dilapidated and run-down apartment complex located in London, the chief city of Airstrip One. The hallways of Victory Mansions have a smell of boiled cabbage and old rag mats. The building is seven floors high, and Winston, who is thirty-nine years old and has a varicose ulcer, struggles to climb the stairs. The elevator is rarely working, and the electric current is cut off during daylight hours as part of the economy drive. The exterior of the building is in disrepair, with rotting nineteenth-century houses nearby, their windows patched with cardboard and their roofs covered in corrugated iron. The building is adorned with posters featuring the face of Big Brother and slogans of the Party. Overall, Victory Mansions represents the bleak and oppressive living conditions under the totalitarian regime of Oceania."

'Victory Mansions is a building located in Airstrip One, the chief city of Oceania. It is described as having glass doors and a hallway that smells of boiled cabbage and old rag mats. The building is seven floors high, and the protagonist, Winston Smith, lives on one of the upper floors. The flat is accessed by stairs, as the lift is often not working due to the electricity being cut off during daylight hours. The building is not well-maintained, with rotting nineteenth-century houses and bombed sites surrounding it. The exterior walls of Victory Mansions are plastered with posters, including one depicting an enormous face with the caption "BIG BROTHER IS WATCHING YOU." Overall, Victory Mansions is portrayed as a rundown and gloomy place.'