In [3]:
# --- 0. 의존 패키지 (미설치 시) ---
# pip install langchain-openai langchain-community chromadb tiktoken

from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.memory import ConversationBufferMemory
from langchain.prompts import PromptTemplate
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from langchain.chains import RetrievalQA

# 1) 원본 문서 로드
RAW_URL = (
    "https://gist.githubusercontent.com/serranoarevalo/"
    "5acf755c2b8d83f1707ef266b82ea223/raw/"
)
loader = WebBaseLoader(RAW_URL)
docs = loader.load()  # → Document() 리스트

# 2) 청크 분할
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = splitter.split_documents(docs)

# 3) 임베딩 & 벡터스토어
embeddings = OpenAIEmbeddings()
vectordb = Chroma.from_documents(chunks, embedding=embeddings)
retriever = vectordb.as_retriever(search_kwargs={"k": 4})

# 4) StuffDocuments 체인 (단순 이어붙이기)
doc_prompt = PromptTemplate(
    input_variables=["page_content"],
    template="{page_content}",
)
combine_docs_chain = StuffDocumentsChain(
    llm=ChatOpenAI(model_name="gpt-4o-mini", temperature=0),
    document_prompt=doc_prompt,
    document_variable_name="context",
)

# 5) ConversationBufferMemory 부여한 RAG QA
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
rag_qa = RetrievalQA(
    retriever=retriever,
    combine_documents_chain=combine_docs_chain,
    memory=memory,
    verbose=True,
)

# 6) 질의 실행
questions = [
    "Aaronson 은 유죄인가요?",
    "그가 테이블에 어떤 메시지를 썼나요?",
    "Julia 는 누구인가요?",
]

for q in questions:
    result = rag_qa.invoke({"query": q})
    print(f"Q: {q}\nA: {result['result']}\n")


ImportError: Could not import chromadb python package. Please install it with `pip install chromadb`.