In [4]:
import os
from dotenv import load_dotenv
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI 


load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [5]:
# Groq API를 사용하는 ChatOpenAI 인스턴스 생성
llm = ChatOpenAI(
    #api_key=OPENAI_API_KEY,
    base_url="https://api.groq.com/openai/v1",  # Groq API 엔드포인트
    model="meta-llama/llama-4-scout-17b-16e-instruct",
    #model="mistral-saba-24b",
    temperature=0
)

### Project 4-1. OpenAI에서 Ollama Qwen3로 RAG 시스템 변경하기

In [6]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_ollama import ChatOllama
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

print("==> 1. 문서 로딩 → PDF 읽기...")
loader = PyPDFLoader('../data/tutorial-korean.pdf')
documents = loader.load()
print(f"  총 {len(documents)}페이지 로드 완료")

print("==> 2. 문서 분할 → 작은 청크로 나누기")
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=800,        # 로컬 모델에 맞게 조금 더 작게!
    chunk_overlap=150,     # 중복 조금만 남기기
    separators=["\n\n", "\n", ".", " ", ""]
)
chunks = text_splitter.split_documents(documents)
print(f"  {len(chunks)}개 청크 생성 완료")

print("==> 3. 벡터화 → 임베딩으로 변환")
EMBEDDING_MODEL_NAME = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
embeddings = HuggingFaceEmbeddings(
    model_name=EMBEDDING_MODEL_NAME,
    model_kwargs={'device': 'cpu'},  # GPU면 'cuda'
    encode_kwargs={'normalize_embeddings': True}
)

print("==> 4. 저장 → FAISS 벡터스토어에 저장")
vectorstore = FAISS.from_documents(chunks, embeddings)

print("===> 5. 검색 → 질문과 유사한 문서 찾기")
retriever = vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 4}  # 최적화: 4개만 반환
)

print("===> 6. 생성 → LLM으로 답변 생성")
llm = ChatOllama(
    model="qwen3:1.7b",
    base_url="http://localhost:11434",
    temperature=0.1,
    num_predict=1500
)

# ↓↓↓ RAG QA 체인 구성 및 사용 예시 ↓↓↓
qa = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    return_source_documents=True,
)

question = "Qwen3로 바꾼 RAG 시스템의 특징은?"
result = qa({"query": question})

print(result["result"])


==> 1. 문서 로딩 → PDF 읽기...
  총 39페이지 로드 완료
==> 2. 문서 분할 → 작은 청크로 나누기
  76개 청크 생성 완료
==> 3. 벡터화 → 임베딩으로 변환
==> 4. 저장 → FAISS 벡터스토어에 저장
===> 5. 검색 → 질문과 유사한 문서 찾기
===> 6. 생성 → LLM으로 답변 생성
<think>
Okay, let's see. The user is asking about the characteristics of the RAG system rewritten with Qwen3. But wait, the context provided is about BlueJ, a Java development environment, and some installation instructions. There's no mention of RAG or Qwen3 in the given text.

So, the user might be referring to a different topic. The context is about programming tutorials, BlueJ, Java classes, compiling, and installing BlueJ. There's no information about RAG (Retrieval-Augmented Generation) systems or Qwen3. Since the question is outside the provided context, I should inform the user that I don't have the necessary information to answer their question.
</think>

I don't know the answer to this question because the provided context is about BlueJ, a Java development environment, and does not mention anyt