In [None]:
!pip install langchain

In [None]:
!pip install chromadb

In [None]:
!pip install tiktoken

In [None]:
!pip install openai

In [None]:
import os
os.environ['OPENAI_API_KEY'] = "발급받은 API 키"

In [None]:
from langchain.llms import OpenAI
from langchain.indexes import VectorstoreIndexCreator
from langchain.document_loaders import TextLoader

loader = TextLoader('./state_of_the_union.txt', encoding='utf8')

index = VectorstoreIndexCreator().from_loaders([loader])

query = "What did the president say about Ketanji Brown Jackson"
index.query(query)

In [None]:
query = "What did the president say about Ketanji Brown Jackson"
index.query_with_sources(query)

In [None]:
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA

# 문서 형태로 데이터 적재
documents = loader.load()

# 텍스트 분할
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)

# 매장 라이브러리 지정
embeddings = OpenAIEmbeddings()

# 매장 처리 후 색인 생성
db = Chroma.from_documents(texts, embeddings)

# 수신자 생성
retriever = db.as_retriever()
qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=retriever)

# 질의 실행
query = "What did the president say about Ketanji Brown Jackson"
qa.run(query)

In [None]:
loader = TextLoader('./state_of_the_union.txt', encoding='utf8')

index = VectorstoreIndexCreator(
    vectorstore_cls=Chroma,
    embedding=OpenAIEmbeddings(),
    text_splitter=CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
).from_loaders([loader])

query = "What did the president say about Ketanji Brown Jackson"
index.query(query)

## 색인 저장과 적재

In [None]:
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI

# 영속화 디렉터리 설정
persist_directory = 'db'

# 문서 형태로 데이터 적재
documents = loader.load()

# 텍스트 분할
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)

# 매장 라이브러리 지정
embeddings = OpenAIEmbeddings()

# 매장 처리 후 색인 생성
db = Chroma.from_documents(texts, embeddings,persist_directory=persist_directory)

# 수신자 생성
retriever = db.as_retriever()
qa = RetrievalQA.from_chain_type(llm=ChatOpenAI(), chain_type="stuff", retriever=retriever)

# 질의 실행
query = "What did the president say about Ketanji Brown Jackson"
qa.run(query)

## 적재기 사용하기

### 웹 페이지에서 정보 적재하기

In [None]:
!pip install unstructured

In [None]:
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain.chat_models import ChatOpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.document_loaders import UnstructuredURLLoader

urls = [
    'https://ko.wikipedia.org/wiki/%ED%95%9C%EA%B5%AD%EC%9D%98_%EC%97%AD%EC%82%AC',
    'https://ko.wikipedia.org/wiki/%EC%84%B8%EA%B3%84%EC%9D%98_%EC%97%AD%EC%82%AC'
]

loader = UnstructuredURLLoader(urls=urls)
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
embeddings = OpenAIEmbeddings()
db = Chroma.from_documents(texts, embeddings)
retriever = db.as_retriever()

qa = RetrievalQA.from_chain_type(llm=ChatOpenAI(), chain_type="stuff", retriever=retriever)

In [None]:
qa.run("이순신은 어떤 일을 했습니까? 그리고 동시기에 세계에서는 어떤 일이 일어났는지도 설명해주세요.")

### PDF 문서에서 정보 적재하기

In [None]:
!pip install pypdf

In [None]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain.chat_models import ChatOpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA

loader = PyPDFLoader("./attention.pdf")
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
embeddings = OpenAIEmbeddings()
db = Chroma.from_documents(texts, embeddings)
retriever = db.as_retriever()

qa = RetrievalQA.from_chain_type(llm=ChatOpenAI(), chain_type="stuff", retriever=retriever

In [None]:
qa.run("Please explain about Attention. Please explain it in a way that even a child can understand. Please give a response in Korean.")

In [None]:
loader = PyPDFLoader("https://arxiv.org/pdf/1706.03762.pdf")