In [1]:
import spacy

nlp = spacy.load("zh_core_web_md")

def extract_entity_relations(text):
    doc = nlp(text)
    entities = []
    for ent in doc.ents:
        entities.append((ent.text, ent.label_))

    relations = []
    for sent in doc.sents:
        for token in sent:
            if token.dep_ in ("ROOT", "nsubj", "dobj"):
                subject = ""
                object = ""
                for child in token.children:
                    if child.dep_ == "nsubj":
                        subject = child.text
                    elif child.dep_ == "dobj":
                        object = child.text
                if subject and object:
                    relations.append((subject, token.text, object))

    return entities, relations


In [3]:
test_cases = [
    ("《红楼梦》的作者是曹雪芹。", [("红楼梦", "WORKS_OF_ART"), ("曹雪芹", "PERSON")], [("红楼梦", "作者", "曹雪芹")]),
    ("北京是中国的首都。", [("北京", "GPE"), ("中国", "GPE")], [("北京", "首都", "中国")]),
    ("李娜是中国的著名网球运动员。", [("李娜", "PERSON"), ("中国", "GPE"), ("网球", "SPORTS")], [("李娜", "运动员", "网球")]),
    ("苹果公司的总部在加利福尼亚。", [("苹果公司", "ORG"), ("加利福尼亚", "GPE")], [("苹果公司", "总部所在地", "加利福尼亚")]),
    ("《三体》是刘慈欣的科幻小说。", [("三体", "WORKS_OF_ART"), ("刘慈欣", "PERSON"), ("科幻小说", "LITERATURE")], [("三体", "作者", "刘慈欣")]),
]

for text, expected_entities, expected_relations in test_cases:
    entities, relations = extract_entity_relations(text)
    print("Text: ", text)
    print("Entities: ", entities)
    print("Expected entities: ", expected_entities)
    print("Relations: ", relations)
    print("Expected relations: ", expected_relations)
    print("\n")


Text:  《红楼梦》的作者是曹雪芹。
Entities:  [('红楼梦', 'WORK_OF_ART'), ('曹雪芹', 'PERSON')]
Expected entities:  [('红楼梦', 'WORKS_OF_ART'), ('曹雪芹', 'PERSON')]
Relations:  []
Expected relations:  [('红楼梦', '作者', '曹雪芹')]


Text:  北京是中国的首都。
Entities:  [('北京', 'GPE'), ('中国', 'GPE')]
Expected entities:  [('北京', 'GPE'), ('中国', 'GPE')]
Relations:  []
Expected relations:  [('北京', '首都', '中国')]


Text:  李娜是中国的著名网球运动员。
Entities:  [('李娜', 'PERSON'), ('中国', 'GPE')]
Expected entities:  [('李娜', 'PERSON'), ('中国', 'GPE'), ('网球', 'SPORTS')]
Relations:  []
Expected relations:  [('李娜', '运动员', '网球')]


Text:  苹果公司的总部在加利福尼亚。
Entities:  [('加利福尼亚', 'GPE')]
Expected entities:  [('苹果公司', 'ORG'), ('加利福尼亚', 'GPE')]
Relations:  [('总部', '在', '加利福尼亚')]
Expected relations:  [('苹果公司', '总部所在地', '加利福尼亚')]


Text:  《三体》是刘慈欣的科幻小说。
Entities:  [('三体', 'WORK_OF_ART'), ('刘慈欣', 'PERSON')]
Expected entities:  [('三体', 'WORKS_OF_ART'), ('刘慈欣', 'PERSON'), ('科幻小说', 'LITERATURE')]
Relations:  []
Expected relations:  [('三体', '作者', '刘慈欣')]


