<a href="https://colab.research.google.com/github/Jieunlee-svg/automation/blob/main/%5B%EC%B1%97%EB%B4%87_%ED%95%99%EC%8A%B5_%EC%8B%A4%ED%8C%A8%5D_250830_%EB%B2%A1%ED%84%B0_%EB%8D%B0%EC%9D%B4%ED%84%B0%EB%B2%A0%EC%9D%B4%EC%8A%A4_%EC%97%B0%EB%8F%99_%EB%AC%B4%EB%A3%8C_%EB%9D%BC%EC%9D%B4%EB%B8%8C%EB%9F%AC%EB%A6%AC.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# 0) 충돌 가능 패키지 제거
!pip uninstall -y -q google-generativeai google-ai-generativelanguage google-genai google-cloud-aiplatform

# 1) 호환 버전 "강제" 설치(의존성 무시)
!pip install -q --no-deps "google-ai-generativelanguage==0.6.15"
!pip install -q --no-deps "google-generativeai==0.8.4"

# 2) LangChain 어댑터가 필요하면
!pip install -q langchain-google-genai

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
google-generativeai 0.8.4 requires google-ai-generativelanguage==0.6.15, but you have google-ai-generativelanguage 0.6.18 which is incompatible.[0m[31m
[0m

In [None]:
# 임베딩 (로컬 실행)
!pip install -Uqq sentence-transformers

# 벡터 DB
!pip install -Uqq chromadb

# LangChain 본체 + 커뮤니티 모듈
!pip install -Uqq langchain langchain-community

# PDF 텍스트 추출
!pip install -Uqq pypdf

# 토큰 계산
!pip install -Uqq tiktoken

# Gemini API 연결 (LangChain ↔ Gemini)
!pip install -Uqq langchain-google-genai

In [None]:
#환경 변수 설정
import os
os.environ["GOOGLE_API_KEY"] = "ㅇㅇ"

In [None]:
#PDF 텍스트 추출
from pypdf import PdfReader

def load_pdf(path):
    reader = PdfReader(path)
    texts = []
    for page in reader.pages:
        texts.append(page.extract_text())
    return "\n".join(texts)

from google.colab import drive
drive.mount('/content/drive')

text = load_pdf("/content/[자료] 오픈채팅방 자주묻는 질문 - 시트2.pdf")
print(text[:500])  # 앞부분만 출력

In [None]:
#문서 분할(chuncking)
from langchain.text_splitter import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=100,
    length_function=len
)

docs = splitter.split_text(text)
print(len(docs), "chunks")

In [None]:
from sentence_transformers import SentenceTransformer
from langchain_community.embeddings import HuggingFaceEmbeddings

# 임베딩 모델 로드
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# 문서 임베딩
doc_embeddings = embedding_model.embed_documents(docs)

In [None]:
#ChromaDB에 저장

import chromadb
from langchain_community.vectorstores import Chroma

# 로컬 ChromaDB 초기화
chroma_client = chromadb.Client()
vectorstore = Chroma.from_texts(
    texts=docs,
    embedding=embedding_model,
    client=chroma_client,
    collection_name="my_docs"
)

In [None]:
#Gemini 2.5 Flash 모델 로드
import google.generativeai as genai
from langchain_google_genai import ChatGoogleGenerativeAI

genai.configure(api_key=os.environ["GOOGLE_API_KEY"])

llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash")

In [None]:
#RAG 체인 구성
from langchain.chains import RetrievalQA

retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    return_source_documents=True
)

query = "이 문서에서 결제 방법은 무엇인가요?"
result = qa_chain.invoke(query)

print("Answer:", result["result"])
print("Sources:", [doc.page_content[:50] for doc in result["source_documents"]])