In [2]:
import spacy
from neo4j import GraphDatabase, RoutingControl

In [3]:
# !python -m spacy download zh_core_web_sm -i https://pypi.tuna.tsinghua.edu.cn/simple
# !pip install neo4j

In [4]:
# 加载Spacy模型
nlp = spacy.load("zh_core_web_sm")

# 示例文本
# text = "甄嬛，原名甄玉嬛，嫌玉字俗气而改名甄嬛，为汉人甄远道之女，后被雍正赐姓钮祜禄氏，抬旗为满洲上三旗，获名“钮祜禄·甄嬛”。"
text = """
甄嬛，原名甄玉嬛，嫌玉字俗气而改名甄嬛，为汉人甄远道之女，后被雍正赐姓钮祜禄氏，抬旗为满洲上三旗，获名“钮祜禄·甄嬛”。
同沈眉庄、安陵容参加选秀，因容貌酷似纯元皇后而被选中。入宫后面对华妃的步步紧逼，沈眉庄被冤、安陵容变心，从偏安一隅的青涩少女变成了能引起血雨腥风的宫斗老手。
雍正发现年氏一族的野心后令其父甄远道剪除，甄嬛也于后宫中用她的连环巧计帮皇帝解决政敌，故而深得雍正爱待。几经周折，终于斗垮了嚣张跋扈的华妃。
甄嬛封妃时遭皇后宜修暗算，被皇上嫌弃，生下女儿胧月后心灰意冷，自请出宫为尼。然得果郡王爱慕，二人相爱，得知果郡王死讯后立刻设计与雍正再遇，风光回宫。
此后甄父冤案平反、甄氏复起，她也生下双生子，在滴血验亲等各种阴谋中躲过宜修的暗害，最后以牺牲自己亲生胎儿的方式扳倒了幕后黑手的皇后。
但雍正又逼甄嬛毒杀允礼，以测试甄嬛真心，并让已经生产过孩子的甄嬛去准格尔和亲。甄嬛遂视皇帝为最该毁灭的对象，大结局道尽“人类的一切争斗，皆因统治者的不公不义而起”，并毒杀雍正。
四阿哥弘历登基为乾隆，甄嬛被尊为圣母皇太后，权倾朝野，在如懿传中安度晚年。
"""

# 对文本进行NER
doc = nlp(text)
persons = [ent.text for ent in doc.ents if ent.label_ == "PERSON"]

# 提取人物属性
attributes = {"甄嬛": {"属性": ["美丽", "聪明"], "父亲": "甄远孙"}}

# 关系抽取
relations = {"甄嬛-皇帝": "特殊关系"}

print("提取的人物：", persons)
# print("人物属性：", attributes)
# print("人物关系：", relations)


提取的人物： ['钮祜禄·甄嬛', '王爱慕', '王死讯', '滴血验亲']


## Neo4j
from to 的关系是：from 的 relation 是 to 。from是主人公，to 是他的朋友、父亲等等。

In [5]:
from neo4j import GraphDatabase, RoutingControl


URI      = "neo4j+s://5bb87071.databases.neo4j.io"
username = "neo4j"
password = "PPcEmlN5ognHV-YG0J9Puj0FzCwdiXsElSbaHugN9xA"
AUTH     = (username, password)

def clear_all(driver):
    # 删除所有节点和关系
    driver.execute_query(
        "MATCH (n)"
        "DETACH DELETE n"
    )
    print('Neo4j 数据库已被重置清空。')

def add_friend(driver, name, friend_name):
    driver.execute_query(
        "MERGE (a:Person {name: $name}) "
        "MERGE (friend:Person {name: $friend_name}) "
        "MERGE (a)-[:KNOWS]->(friend)",
        name=name, friend_name=friend_name, database_="neo4j",
    )


def print_friends(driver, name):
    records, _, _ = driver.execute_query(
        "MATCH (a:Person)-[:KNOWS]->(friend) WHERE a.name = $name "
        "RETURN friend.name ORDER BY friend.name",
        name=name, database_="neo4j", routing_=RoutingControl.READ,
    )
    for record in records:
        print(record["friend.name"])


with GraphDatabase.driver(URI, auth=AUTH) as driver:
    # add_friend(driver, "Arthur", "Guinevere")
    # add_friend(driver, "Arthur", "Lancelot")
    # add_friend(driver, "Arthur", "Merlin")
    clear_all(driver)
    
    print_friends(driver, "Arthur")

Neo4j 数据库已被重置清空。


In [6]:
# 定义Neo4j会话
driver = GraphDatabase.driver(URI, auth=(username, password))
session = driver.session()

clear_all(driver)

# 导入人物属性
with open('./Persons.csv', 'r', encoding='gbk') as file:
    lines = file.readlines()[1:]  # 跳过CSV的表头行
    for line in lines:
        values = line.strip().split(',')
        name, gender, alias = values
        print(f'name: {name}, gender: {gender}, alias: {alias}')
        cypher_query = f"CREATE (:Character {{name: '{name}', gender: '{gender}', alias: '{alias}'}})"
        session.run(cypher_query)

# 导入人物关系
with open('./Relationship.csv', 'r', encoding='gbk') as file:
    lines = file.readlines()[1:]  # 跳过CSV的表头行
    for line in lines:
        from_id, to_id, relation = line.strip().split(',')
        cypher_query = f"MATCH (from:Character {{name: '{from_id}'}}) " \
                       f"MATCH (to:Character {{name: '{to_id}'}}) " \
                       f"CREATE (from)-[:{relation}]->(to)"
        session.run(cypher_query)

def query_alias(driver, person_name):
    with driver.session() as session:
        result = session.run("MATCH (person:Character {name: $person_name})"
                                "RETURN person.alias AS alias",
                                person_name=person_name)
        alias = result.single()["alias"]

        return alias
        
def query_relationship(driver, person_name, relationship_type):
    with driver.session() as session:
        query = f"MATCH (person:Character {{name: '{person_name}'}})-[:{relationship_type}]->(related) RETURN related.name AS related_name"
        result = session.run(query)
        related_names = [record["related_name"] for record in result]
        return related_names
    
def query_person_info(driver, person_name):
    with driver.session() as session:
        result = session.run("MATCH (person:Character {name: $person_name})"
                                "RETURN person.name AS name, person.gender AS gender, person.alias AS alias",
                                person_name=person_name)
        record = result.single()
        if record:
            return {
                "name": record["name"],
                "gender": record["gender"],
                "alias": record["alias"]
            }
        else:
            return None
    
person_name = '甄嬛'
relationship_type = '女儿'
related_names = query_relationship(driver, person_name, relationship_type)
if related_names:
    print(f"{person_name}的{relationship_type}是: {', '.join(related_names)}")
else:
    print(f"{person_name}没有{relationship_type}")

aliases = query_alias(driver, person_name)
if aliases:
    print(f"{person_name}的别名: {aliases}")
else:
    print(f"{person_name}没有别名")

person_info = query_person_info(driver, person_name)
if person_info:
    print("人物信息:")
    print(f"姓名: {person_info['name']}")
    print(f"性别: {person_info['gender']}")
    print(f"别名: {person_info['alias']}")
else:
    print(f"{person_name}不存在")

# 关闭会话和驱动
session.close()
driver.close()


Neo4j 数据库已被重置清空。
name: 甄嬛, gender: 女, alias: 熹贵妃
name: 雍正帝, gender: 男, alias: 皇上
name: 皇后, gender: 女, alias: 景仁宫娘娘
name: 华妃, gender: 女, alias: 华贵妃
name: 沈眉庄, gender: 女, alias: 沈贵人
name: 安陵容, gender: 女, alias: 安贵人
name: 果郡王, gender: 男, alias: 爱新觉罗·允礼
name: 温实初, gender: 男, alias: 太医
name: 甄远道, gender: 男, alias: 
name: 胧月公主, gender: 女, alias: 
甄嬛的女儿是: 胧月公主
甄嬛的别名: 熹贵妃
人物信息:
姓名: 甄嬛
性别: 女
别名: 熹贵妃


### 封装

In [7]:
from neo4j import GraphDatabase

class Neo4jManager:
    def __init__(self, uri, username, password):
        self.driver = GraphDatabase.driver(uri, auth=(username, password))
    
    def close(self):
        self.driver.close()

    def import_characters(self, csv_path):
        with open(csv_path, 'r', encoding='gbk') as file:
            lines = file.readlines()[1:]
            with self.driver.session() as session:
                for line in lines:
                    name, gender, alias = line.strip().split(',')
                    session.run("CREATE (:Character {name: $name, gender: $gender, alias: $alias})",
                                name=name, gender=gender, alias=alias)

    def import_relationships(self, csv_path):
        with open(csv_path, 'r', encoding='gbk') as file:
            lines = file.readlines()[1:]
            with self.driver.session() as session:
                for line in lines:
                    from_id, to_id, relation = line.strip().split(',')
                    cypher_query = f"MATCH (from:Character {{name: '{from_id}'}}) " \
                                f"MATCH (to:Character {{name: '{to_id}'}}) " \
                                f"CREATE (from)-[:{relation}]->(to)"
                    session.run(cypher_query)

    def clear_database(self):
        with self.driver.session() as session:
            session.run("MATCH (n) DETACH DELETE n")

    def query_status(self):
        with self.driver.session() as session:
            result     = session.run("MATCH (n) RETURN COUNT(n) AS node_count")
            node_count = result.single()["node_count"]
            return f"Number of nodes in the database: {node_count}"
        
    def query_alias(self, person_name):
        with self.driver.session() as session:
            result = session.run("MATCH (person:Character {name: $person_name})"
                                    "RETURN person.alias AS alias",
                                    person_name=person_name)
            alias = result.single()["alias"]

            return alias
            
    def query_relationship(self, person_name, relationship_type):
        with self.driver.session() as session:
            query = f"MATCH (person:Character {{name: '{person_name}'}})-[:{relationship_type}]->(related) \
                        RETURN related.name AS related_name"
            result        = session.run(query)
            related_names = [record["related_name"] for record in result]
            return related_names
        
    def query_person_info(self, person_name):
        with self.driver.session() as session:
            result = session.run("MATCH (person:Character {name: $person_name})"
                                    "RETURN person.name AS name, person.gender AS gender, person.alias AS alias",
                                    person_name=person_name)
            record = result.single()
            if record:
                return {
                    "name": record["name"],
                    "gender": record["gender"],
                    "alias": record["alias"]
                }
            else:
                return None


uri      = "neo4j+s://5bb87071.databases.neo4j.io"
username = "neo4j"
password = "PPcEmlN5ognHV-YG0J9Puj0FzCwdiXsElSbaHugN9xA"

manager = Neo4jManager(uri, username, password)
manager.clear_database()
manager.import_characters('./Persons.csv')
manager.import_relationships('./Relationship.csv')
status = manager.query_status()
print(status)

person_name = '甄嬛'
relationship_type = '父亲'
related_names = manager.query_relationship(person_name, relationship_type)
if related_names:
    print(f"{person_name}的{relationship_type}是: {', '.join(related_names)}")
else:
    print(f"{person_name}没有{relationship_type}")

aliases = manager.query_alias(person_name)
if aliases:
    print(f"{person_name}的别名: {aliases}")
else:
    print(f"{person_name}没有别名")

person_info = manager.query_person_info(person_name)
if person_info:
    print("人物信息:")
    print(f"姓名: {person_info['name']}")
    print(f"性别: {person_info['gender']}")
    print(f"别名: {person_info['alias']}")
else:
    print(f"{person_name}不存在")

manager.close()


Number of nodes in the database: 10
甄嬛的父亲是: 甄远道
甄嬛的别名: 熹贵妃
人物信息:
姓名: 甄嬛
性别: 女
别名: 熹贵妃


## langchain


In [8]:
from langchain.chat_models import ChatOpenAI
from langchain.chains import GraphCypherQAChain
from langchain.graphs import Neo4jGraph

uri      = "neo4j+s://5bb87071.databases.neo4j.io"
username = "neo4j"
password = "PPcEmlN5ognHV-YG0J9Puj0FzCwdiXsElSbaHugN9xA"

graph = Neo4jGraph(
    url=uri, username=username, password=password
)

# graph.refresh_schema()
print(graph.get_schema)


        Node properties are the following:
        [{'properties': [{'property': 'gender', 'type': 'STRING'}, {'property': 'name', 'type': 'STRING'}, {'property': 'alias', 'type': 'STRING'}], 'labels': 'Character'}]
        Relationship properties are the following:
        []
        The relationships are the following:
        ['(:Character)-[:爱人]->(:Character)', '(:Character)-[:朋友]->(:Character)', '(:Character)-[:女儿]->(:Character)', '(:Character)-[:父亲]->(:Character)', '(:Character)-[:妻子]->(:Character)', '(:Character)-[:妃子]->(:Character)', '(:Character)-[:丈夫]->(:Character)', '(:Character)-[:敌人]->(:Character)']
        


In [9]:
chain = GraphCypherQAChain.from_llm(
    ChatOpenAI(temperature=0), graph=graph, verbose=True
)

# openai.api_key = 'sk-5vOFyAXiUcLGkRKV60D2D7E6Ee6e408f8d366f4c1013C004'
# # 更换OpenAI接口的host
# openai.api_base = "https://api.akm.pw/v1" #在这里设置即可,需要特别注意这里的/v1是必须的，否则报错。前面的地址注意替换即可。

import os 
from dotenv import load_dotenv, find_dotenv, dotenv_values 
# os.environ["OPENAI_API_KEY"] = "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"
os.environ['HTTPS_PROXY']    = 'http://127.0.0.1:7890'
os.environ["HTTP_PROXY"]     = 'http://127.0.0.1:7890'

# 读取本地的环境变量 
env_vars = dotenv_values('.env')
# 获取环境变量 OPENAI_API_KEY
openai_api_key = env_vars['OPENAI_API_KEY']
print(openai_api_key)

# 获取环境变量的值
api_key = os.environ.get('OPENAI_API_KEY')

# 打印环境变量的值
print(api_key)

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
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


In [10]:
chain.run("甄嬛的女儿是谁？")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (c1:Character {name: '甄嬛'})-[:女儿]->(c2:Character)
RETURN c2.name[0m
Full Context:
[32;1m[1;3m[{'c2.name': '胧月公主'}][0m

[1m> Finished chain.[0m


'甄嬛的女儿是胧月公主。'

In [11]:
chain.run("皇后的敌人是谁？")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (queen:Character {name: '皇后'})-[:敌人]->(enemy:Character)
RETURN enemy.name[0m
Full Context:
[32;1m[1;3m[{'enemy.name': '甄嬛'}][0m

[1m> Finished chain.[0m


'皇后的敌人是甄嬛。'

In [12]:
chain.run("甄嬛的孩子是谁？")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (zhenhuan:Character {name: '甄嬛'})-[:女儿]->(child:Character)
RETURN child.name[0m
Full Context:
[32;1m[1;3m[{'child.name': '胧月公主'}][0m

[1m> Finished chain.[0m


'甄嬛的孩子是胧月公主。'