<a href="https://colab.research.google.com/github/Huangjian2013/ai-demo/blob/main/rag/18-GraphRAG-Neo4j.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install llama-index llama-index-graph-stores-neo4j --quiet


In [13]:
from llama_index.graph_stores.neo4j import Neo4jPropertyGraphStore
from google.colab import userdata
from llama_index.core import SimpleDirectoryReader, Document
from llama_index.core import PropertyGraphIndex
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openai import OpenAI
from llama_index.core.indices.property_graph import SimpleLLMPathExtractor
import nest_asyncio
import openai

nest_asyncio.apply()
openai.api_key=userdata.get('REAL_OPENAI_KEY')

In [3]:
graph_store = Neo4jPropertyGraphStore(
    username=userdata.get("NEO_USER_NAME"),
    password=userdata.get("NEO_PASSWORD"),
    url=userdata.get("NEO_URL")
)


In [4]:
file_path = './sample_data/story.txt'
with open(file_path, "r", encoding="utf-8") as f:
    content = f.read()

# 将文件内容转换为 LlamaIndex 支持的 Document 对象
documents = [Document(text=content)]

In [15]:
index = PropertyGraphIndex.from_documents(
    documents,
    embed_model=OpenAIEmbedding(model_name="text-embedding-ada-002"),
    kg_extractors=[
        SimpleLLMPathExtractor(
            llm=OpenAI(model="gpt-3.5-turbo", temperature=0.0)
        )
    ],
    property_graph_store=graph_store,
    show_progress=True,
)

Parsing nodes:   0%|          | 0/1 [00:00<?, ?it/s]



Extracting paths from text:   0%|          | 0/1 [00:00<?, ?it/s][A[A

Extracting paths from text: 100%|██████████| 1/1 [00:02<00:00,  2.96s/it]


Generating embeddings:   0%|          | 0/1 [00:00<?, ?it/s][A[A

Generating embeddings: 100%|██████████| 1/1 [00:00<00:00,  1.50it/s]


Generating embeddings:   0%|          | 0/1 [00:00<?, ?it/s][A[A

Generating embeddings: 100%|██████████| 1/1 [00:00<00:00,  2.07it/s]


In [16]:
retriever = index.as_retriever(
    include_text=False,  # include source text in returned nodes, default True
)

nodes = retriever.retrieve("小镇里面都住着谁")

for node in nodes:
    print(node.text)

小镇 -> 生活着 -> 一群人
张医生 -> 是 -> 镇上的老朋友
林叔 -> 是 -> 咖啡馆老板
丽莎 -> 是 -> 赵老板的女儿


In [18]:
query_engine = index.as_query_engine(include_text=True)

response = query_engine.query("小镇里面都住着谁?列出他们的名字")

print(str(response))

林叔、小林、丽莎、赵老板、吴昊、吴倩、陈立、王晓蓉、张医生
