In [18]:
import re
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.storage import LocalFileStore
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough

from dotenv import load_dotenv

load_dotenv('./env/.env')

llm = ChatOpenAI(
    model_name="gpt-4o-mini",
    temperature=0.1,
)

cache_dir = LocalFileStore("./.cache/")

splitter = RecursiveCharacterTextSplitter(
    separators="\n",
    chunk_size = 600,
    chunk_overlap = 100
)

loader = UnstructuredFileLoader("./files/chapter_one.docx")
docs = loader.load_and_split(text_splitter=splitter)

# 캐시에 embeddings가 있는 지 확인한후, 없으면 embeddings를 캐시에 저장
embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_dir)

vectorstore = Chroma.from_documents(docs, cached_embeddings)

# Retriever는 한개의 string을 입력받고 document들의 list를 출력한다.
retriver = vectorstore.as_retriever()
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant. Answer questions using only the following context. If you don't know the answer just say you don't know, don't make it up:\n\n{context}",
        ),
        ("human", "{question}"),
    ]
)

# RunnablePassthrough는 입력값(Description Victory Mansions)을 그대로 출력함(해당 값이 question에 그대로 작성됨)
chain = (
    {
        "context": retriver,
        "question": RunnablePassthrough(),
    }
    | prompt
    | llm
)
# invoke -> retriever(document들의 list 반환, 해당 document들은 context로 입력), 또한 매개인자는 question으로 입력됨 -> 
chain.invoke("Describe Victory Mansions")




음 tags=['Chroma', 'CacheBackedEmbeddings'] vectorstore=<langchain.vectorstores.chroma.Chroma object at 0x320588650>


AIMessage(content='Victory Mansions is a building from which one can see all four of the major Ministries in London. It is not described in detail in the provided context, but it serves as a vantage point to observe the surrounding architecture, which is dwarfed by the size of the Ministries. The context does not provide further information about its characteristics or significance.')