<a href="https://colab.research.google.com/github/JSJeong-me/AI-Innovation-2024/blob/main/NLP/4-5-RAG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install langchain langchain_community openai chromadb tiktoken --quiet

In [None]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI
from langchain.document_loaders import TextLoader

In [None]:
from google.colab import userdata
import os

userdata.get('OPENAI_API_KEY')
os.environ["OPENAI_API_KEY"] = userdata.get('OPENAI_API_KEY')
api_key = os.getenv("OPENAI_API_KEY")

In [None]:
!mkdir data
!wget https://github.com/JSJeong-me/GPT-Web/raw/main/images/paul_graham_essay.txt -O ./data/paul_graham_essay.txt

In [None]:
# Step 1: 문서 로드
# 여기에 분석하고자 하는 텍스트 파일을 넣으세요.
loader = TextLoader('./data/paul_graham_essay-cat.txt')
documents = loader.load()


In [None]:
len(documents)

In [None]:
documents[0].page_content[:100]

In [None]:
# Step 2: 텍스트 분할
from langchain.text_splitter import CharacterTextSplitter # import the CharacterTextSplitter class

# 문서를 최대 1000 토큰의 청크로 분할합니다.
text_splitter = CharacterTextSplitter(chunk_size=100000, chunk_overlap=0) # reduced chunk size to 1000
documents = text_splitter.split_documents(documents)

In [None]:
len(documents)

In [None]:
# Step 2: 임베딩 및 Chroma 벡터 스토어 생성
embeddings = OpenAIEmbeddings()

In [None]:
vectorstore = Chroma.from_documents(documents, embeddings)

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.llms import OpenAI

# Step 3: OpenAI LLM 설정 (GPT-4 모델 사용)
# llm = OpenAI(model="gpt-4") # This line is using the incorrect API for this model
llm = ChatOpenAI(model="gpt-4.1", temperature=0.0, top_p=0.3) # Use ChatOpenAI for chat models

In [None]:
# Step 4: 검색 및 QA 체인 생성
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff", # map reduce, rerank
    retriever=vectorstore.as_retriever()
)

In [None]:

# Step 5: 질문에 답변
query = "How many legs does a cat have?"

In [None]:

answer = qa.run(query)

In [None]:

print(f"Answer: {answer}")


In [None]:
# Step 5: 질문에 답변
query = "Who is the author of the essay?"
answer = qa.run(query)
print(f"Answer: {answer}")

In [None]:
# Step 5: 질문에 답변
query = "What is the summary ofessay?"
answer = qa.run(query)
print(f"Answer: {answer}")