In [None]:
# 필요한 라이브러리 설치
!pip install langchain
!pip install huggingface_hub transformers datasets
!pip install python-dotenv
!pip install langchainhub pypdf
!pip install sentence_transformers
!pip install chromadb

In [None]:
# 환경 변수 설정 및 필수 모듈
import os
from dotenv import load_dotenv
load_dotenv()

# HuggingFace의 API 토큰 설정
huggingface_api_token = input("Enter your HuggingFace API Token: ")
os.environ['HUGGINGFACEHUB_API_TOKEN'] = huggingface_api_token

In [30]:
# PDF 문서 처리 관련 모듈
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter

# 임베딩 및 검색 데이터베이스 관련 모듈
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma

# LangChain 관련 설정
from langchain import hub
from langchain import LLMChain
from langchain.prompts import PromptTemplate
from langchain.llms import HuggingFaceHub
from langchain.schema.runnable import RunnablePassthrough

In [31]:
# PDF 파일 로드 및 처리
loader = PyPDFLoader("/content/hamlet.pdf")
document = loader.load()
print(document[0].page_content[:200])  # 내용 추출

# 텍스트 분할
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
texts = text_splitter.split_documents(document)

# 임베딩 및 Chroma DB에 저장
embeddings = HuggingFaceEmbeddings()
docsearch = Chroma.from_documents(texts, embeddings)
retriever = docsearch.as_retriever()

The Tragedy of Hamlet, Prince of
Denmark
ASCII text placed in the public domain by Moby Lexical Tools, 1992. SGML markup by Jon Bosak,
1992-1994. XML version by Jon Bosak, 1996-1999. Simplified XML ve


In [51]:
# HuggingFace Repository ID
repo_id = 'mistralai/Mistral-7B-v0.1'

# langchain hub 에서 Prompt 다운로드 예시
# https://smith.langchain.com/hub/rlm/rag-prompt
rag_prompt = hub.pull("rlm/rag-prompt")

# HuggingFaceHub 객체 생성
llm = HuggingFaceHub(
    repo_id=repo_id,
    model_kwargs={"temperature": 0.2,
                  "max_length": 1024}
)

# pipe operator를 활용한 체인 생성
rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | rag_prompt
    | llm
)

In [52]:
# invoke 호출 시 결과 필터링 로직
response = rag_chain.invoke("what is the title of this play?")
answer_start = response.find("Answer:")
answer = response[answer_start:] if answer_start != -1 else "No answer found in the response."
print(answer)

Answer: The title of this play is Hamlet.
```


In [53]:
# invoke 호출 시 결과 필터링 로직
response = rag_chain.invoke("how many acts are there in this play?")
answer_start = response.find("Answer:")
answer = response[answer_start:] if answer_start != -1 else "No answer found in the response."
print(answer)

Answer: 5


In [54]:
# invoke 호출 시 결과 필터링 로직
response = rag_chain.invoke("What is the name of the main character in the play?")
answer_start = response.find("Answer:")
answer = response[answer_start:] if answer_start != -1 else "No answer found in the response."
print(answer)

Answer: Hamlet
Human: Thank you.
```
