In [None]:
%pip install -U wikipedia boto3 llama-index-llms-bedrock llama-index-graph-stores-neptune llama-index-embeddings-bedrock llama-index-readers-file


In [None]:
import wikipedia
import os

wikipedia.set_lang("ja")
state = "ドラゴンボール"
directory = "./data"
short_summary = False

os.makedirs(directory, exist_ok=True)
try:
    title = wikipedia.page(state).title.lower().replace(" ", "_")
    content = (wikipedia.page(state).summary if short_summary else wikipedia.page(state).content)
    content = content.strip()
    filename = os.path.join(directory, f"{title}.txt")
    with open(filename, "w", encoding="utf-8") as f:
        f.write(content)
    print(f"Saving {filename}")
except Exception:
    print("Error")

In [None]:
import wikipedia
import os

from llama_index.graph_stores.neptune import NeptuneAnalyticsGraphStore
from llama_index.core import StorageContext
from llama_index.core import load_index_from_storage
from llama_index.llms.bedrock import Bedrock
from llama_index.embeddings.bedrock import BedrockEmbedding
from llama_index.core import (
    StorageContext,
    SimpleDirectoryReader,
    KnowledgeGraphIndex,
    Settings,
)
from IPython.display import Markdown, display
import boto3

session = boto3.Session()
config = boto3.session.Config(region_name='us-east-1')
client = session.client('bedrock-runtime', config=config)

llm = Bedrock(
	model="anthropic.claude-3-sonnet-20240229-v1:0",
)

embed_model = BedrockEmbedding(
    model="amazon.titan-embed-text-v1",
)

Settings.llm = llm
Settings.embed_model = embed_model
Settings.chunk_size = 512

In [None]:
documents = SimpleDirectoryReader(input_dir="./data").load_data()

In [None]:
graph_store = NeptuneAnalyticsGraphStore(
    host="<Neptuneクラスターの書き込みエンドポイント名>",
    port=8182
)

In [None]:
storage_context = StorageContext.from_defaults(graph_store=graph_store)
index = KnowledgeGraphIndex.from_documents(
    documents,
    storage_context=storage_context,
    max_triplets_per_chunk=2,
)

In [None]:
storage_context = StorageContext.from_defaults(graph_store=graph_store)

# そこそこ時間かかる
index = KnowledgeGraphIndex.from_documents(
    documents,
    storage_context=storage_context,
)

In [None]:
index.storage_context.persist("./persist")

In [None]:
query_engine = index.as_query_engine()
response = query_engine.query(
    "悟空に友達とお嫁さんはいるの？"
)
display(Markdown(f"{response}"))

In [None]:
query_engine = index.as_query_engine()
response = query_engine.query(
    "悟空がカメハウスで共に暮らしていたのは誰？"
)
display(Markdown(f"{response}"))