## 장문의 외부 데이터와의 연계 체험하기

In [None]:
!pip install llama-index

In [None]:
!git clone https://github.com/run-llama/llama_index.git

In [None]:
!ls llama_index/docs/docs/examples/data/

In [None]:
!cp -r llama_index/docs/docs/examples/data/paul_graham data

In [None]:
import os
os.environ['OPENAI_API_KEY'] = "발급받은 API 키"

In [None]:
!pip install openai

In [None]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader

documents = SimpleDirectoryReader('data').load_data()
index = VectorStoreIndex.from_documents(documents)

In [None]:
query_engine = index.as_query_engine()
response = query_engine.query("What did the author do growing up?")
print(response)

In [None]:
index.storage_context.persist()

In [None]:
from llama_index.core import StorageContext, load_index_from_storage

# 저장소 상황 정보 재생성
storage_context = StorageContext.from_defaults(persist_dir="./storage")
# 색인 적재
index = load_index_from_storage(storage_context)

In [None]:
response = query_engine.query("저자는 어떤 성장 과정을 거쳐왔나요? 한국어로 답변 부탁드립니다.")
print(response)

In [None]:
response = query_engine.query("What did the author do growing up? Please give a response in Korean.")
print(response)

In [None]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader

documents = SimpleDirectoryReader('data2').load_data()
index2 = VectorStoreIndex.from_documents(documents)

In [None]:
index2.storage_context.persist('index_constitution')

In [None]:
query_engine2 = index2.as_query_engine()

In [None]:
response = query_engine2.query("국민이 가진 권리 중 중요한 것을 목록으로 나열해주세요. 그것이 몇 조에 있는 항목인지도 알려주세요.")
print(response)

## 적재기 사용하기

### 웹 페이지에서 정보 적재하기

In [None]:
from llama_index.core import VectorStoreIndex, download_loader

BeautifulSoupWebReader = download_loader("BeautifulSoupWebReader")

loader = BeautifulSoupWebReader()
documents = loader.load_data(urls=['https://www.law.go.kr/법령/대한민국헌법'])

In [None]:
indexw = VectorStoreIndex.from_documents(documents)
query_engine_w = indexw.as_query_engine()

In [None]:
response = query_engine_w.query("요약을 부탁합니다.")
print(response)

### PDF 문서에서 정보 적재하기

In [None]:
!pip install pypdf

In [None]:
from pathlib import Path
from llama_index.core import VectorStoreIndex, download_loader

PDFReader = download_loader("PDFReader")

loader = PDFReader()
documents = loader.load_data(file=Path('./attention.pdf'))

In [None]:
index_pdf = VectorStoreIndex.from_documents(documents)
query_engine_pdf = index_pdf.as_query_engine()

In [None]:
response = query_engine_pdf.query("Please summarize this. Please explain it in a way that even a child can understand.")
print(response)

In [None]:
response = query_engine_pdf.query("Explain about attention heads. Please explain it in a way that even a child can understand.")
print(response)