In [15]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.storage import LocalFileStore

from dotenv import load_dotenv

load_dotenv('./env/.env')

# llm = ChatOpenAI(
#     model_name="gpt-3.5-turbo",
#     temperature=0.1,
# )

cache_dir = LocalFileStore("./.cache/")

splitter = RecursiveCharacterTextSplitter(
    separators="\n",
    chunk_size = 600,
    chunk_overlap = 100
)

loader = UnstructuredFileLoader("./files/chapter_one.docx")
docs = loader.load_and_split(text_splitter=splitter)

# 캐시에 embeddings가 있는 지 확인한후, 없으면 embeddings를 캐시에 저장
embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_dir)

vectorstore = Chroma.from_documents(docs, cached_embeddings)






In [11]:
# 저장된 vectorStore에서 특정 검색어와 관련이 있는 document를 검색
# 모든 문서를 vector로 바꾼후, 검색어와 가장 유사한 vector를 가진 문서를 반환
result = vectorstore.similarity_search("where does windston live")
print(result)

[Document(page_content='\nWinston kept his back turned to the telescreen. It was safer, though, as he well knew, even a back can be revealing. A kilometre away the Ministry of Truth, his place of work, towered vast and white above the grimy landscape. This, he thought with a sort of vague distaste -- this was London, chief city of Airstrip One, itself the third most populous of the provinces of Oceania. He tried to squeeze out some childhood memory that should tell him whether London had always been quite like this. Were there always these vistas of rotting nineteenth-century houses, their sides shored up with baulks of timber, their windows patched with cardboard and their roofs with corrugated iron, their crazy garden walls sagging in all directions? And the bombed sites where the plaster dust swirled in the air and the willow-herb straggled over the heaps of rubble; and the places where the bombs had cleared a larger patch and there had sprung up sordid colonies of wooden dwellings 