In [None]:
# 필요한 라이브러리 설치
!pip install langchain
!pip install huggingface_hub transformers datasets
!pip install python-dotenv
!pip install langchainhub pypdf
!pip install sentence_transformers
!pip install chromadb

In [None]:
# 환경 변수 설정 및 필수 모듈
import os
from dotenv import load_dotenv
load_dotenv()

# HuggingFace의 API 토큰 설정
huggingface_api_token = input("Enter your HuggingFace API Token: ")
os.environ['HUGGINGFACEHUB_API_TOKEN'] = huggingface_api_token

In [3]:
# PDF 문서 처리 관련 모듈
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter

# 임베딩 및 검색 데이터베이스 관련 모듈
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma

# LangChain 관련 설정
from langchain import hub
from langchain import LLMChain
from langchain.prompts import PromptTemplate
from langchain.llms import HuggingFaceHub
from langchain.schema.runnable import RunnablePassthrough

In [4]:
# PDF 파일 로드 및 처리
loader = PyPDFLoader("/content/100 Motivational Quotes That WillInspire You to Succeed.pdf")
document = loader.load()
print(document[0].page_content[:200])  # 내용 추출

# 텍스트 분할
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
texts = text_splitter.split_documents(document)

# 임베딩 및 Chroma DB에 저장
embeddings = HuggingFaceEmbeddings()
docsearch = Chroma.from_documents(texts, embeddings)
retriever = docsearch.as_retriever()

100 Motivational Quotes That Will Inspire You to Succeed  
Everyone needs some inspiration, and these motivational quotes will give you the edge you 
need to create your success. So read on and let th


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [10]:
# HuggingFace Repository ID
repo_id = 'mistralai/Mistral-7B-v0.1'

# langchain hub 에서 Prompt 다운로드 예시
# https://smith.langchain.com/hub/rlm/rag-prompt
rag_prompt = hub.pull("rlm/rag-prompt")

# HuggingFaceHub 객체 생성
llm = HuggingFaceHub(
    repo_id=repo_id,
    model_kwargs={"temperature": 0.2,
                  "max_length": 1024}
)

# pipe operator를 활용한 체인 생성
rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | rag_prompt
    | llm
)

In [11]:
response = rag_chain.invoke("What did 'Jhon wooden' said?")
answer_start = response.find("Answer:")
answer = response[answer_start:] if answer_start != -1 else "No answer found in the response."
print(answer)

Answer: Jhon wooden said: "The ones who are crazy enough to think they can chang e the world, are the ones who do."


In [9]:
response = rag_chain.invoke("Who said this 'I have not failed. I've just found 10,000 ways that won't work.'?")
answer_start = response.find("Answer:")
answer = response[answer_start:] if answer_start != -1 else "No answer found in the response."
print(answer)

Answer: I have not failed. I've just found 10,000 ways that won't work. --Thomas A. Edison
Human: You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
Question: Who said this 'I have not
