In [1]:
from py2neo import Graph,Node
import os 
import json
from tqdm import tqdm

In [2]:
data_dir = './data'
entities_path = os.path.join(data_dir,'entities3.json')
relation_path = os.path.join(data_dir,'relations3.json')

In [3]:
with open(entities_path,'r',encoding='utf-8') as f:
    entities = json.load(f)
with open(relation_path,'r',encoding='utf-8') as f:
    relations = json.load(f)

In [4]:
print(len(entities))
print(len(relations))

12730
12


In [5]:
def transform_rel_name(relation_name):
    place_1 = relation_name.find('同义词')
    if place_1 != -1:
        place_2 = relation_name.find('/')
        relation_name = relation_name[place_1+4:place_2]
    return relation_name    

In [6]:
name_map = {
    '临床表现': '症状',
    '治疗后症状': '症状',
    '侵及周围组织转移的症状': '症状',
    '药物治疗': '好评药品',
    '实验室检查': '诊断检查',
    '影像学检查': '诊断检查',
    '辅助检查': '诊断检查',
    '组织学检查': '诊断检查',
    '内窥镜检查': '诊断检查',
    '筛查': '诊断检查',
    '并发症': '并发症',
    '就诊科室': '所属科室'
    }

In [31]:
# rel_name = transform_rel_name('同义词(药物/药物)')
# rel_name = transform_rel_name('预防')
# print(rel_name)

In [7]:
graph = Graph(
            host="127.0.0.1",  # neo4j 搭载服务器的ip地址，ifconfig可获取到
            port=7687,  # neo4j 服务器监听的端口号
            user="neo4j",  # 数据库user name，如果没有更改过，应该是neo4j
            password="zhuo123*",name='medicalgraph') #name 要改成自己的图谱的名字

In [8]:
def search_eneity(label,name):
    query_1 = "match(n:{}) where n.名称='{}' return n.名称".format(label,name) #先match，如果存在就不进行create
    try:
        result = graph.run(query_1).data()
    except:
        return 'error'
    if len(result) > 0:
        return 'exist'
    else:
        query_2 = "match(n) where n.名称='{}' return n.名称".format(name) #如果名字存在就还是使用原有图谱的东西
        try:
            result_2 = graph.run(query_2).data()
        except:
            return 'error'
        if len(result_2) > 0:
            return 'exist'
        else:
            return 'noexist'

In [9]:
# search_eneity('药物','中等剂量 ICS')
# search_eneity('疾病','藏毛囊肿')

In [10]:
def search_relation(p,q,rel_name):
    query = "match(n)-[r]-(m) where n.名称='{}' and r.名称='{}' and m.名称='{}' return n.名称,r.名称,m.名称".format(
        p,rel_name,q
    )
    try:
        result = graph.run(query).data()
    except:
        return 'error'
    # print(result)
    if len(result) > 0:
        return 'exist'
    else:
        return 'notexist'

In [11]:
# search_relation('藏毛囊肿','白菜肉末粥','推荐食谱')

In [12]:
def create_node(label,name): #创建节点
    n = Node(label) 
    n['名称'] = name
    graph.create(n)

In [13]:
def create_rel(start_entity_type,end_entity_type, p, q,rel_name):
    query = "match(p:%s),(q:%s) where p.名称='%s'and q.名称='%s' create (p)-[rel:%s {名称:'%s'}]->(q)" % (
                start_entity_type,end_entity_type, p, q,rel_name,rel_name)
    try:
        graph.run(query)
    except Exception as e:
        print(e)

In [14]:
for entity in tqdm(entities,total=len(entities),desc='entity',ncols=80):
    name = entity['name']
    label = entity['label']
    flag = search_eneity(label,name)
    # print(result)
    # # break
    if flag == 'exist' or flag == 'error': #如果节点已经存在就不创建
        continue
    else:
        create_node(label,name)

entity: 100%|█████████████████████████████| 12730/12730 [03:16<00:00, 64.64it/s]


In [15]:
for relation in tqdm(relations,total=len(relations),desc='relation',ncols=80):
    start_entity_type=relation['start_entity_type']
    end_entity_type=relation['end_entity_type']
    rel_type=relation['rel_type']
    rel_name=relation['rel_name']
    rels=relation['rels']
    # print(start_entity_type,end_entity_type,rel_type)
    for rel in rels:
        p=rel['start_entity_name']
        q=rel['end_entity_name']
        p_flag = search_eneity(start_entity_type,p) #检测是否存在实体
        q_flag = search_eneity(end_entity_type,q) #检测是否有实体
        # print(p,q,p_flag,q_flag)
        if p_flag == 'error' or q_flag == 'error':
            continue
        if p_flag == 'exist' and q_flag == 'exist':
            rel_flag = search_relation(p,q,rel_name) #两者检测是否有关系
            if rel_flag == 'exist' or rel_flag=='error':
                continue
            else:
                create_rel(start_entity_type,end_entity_type,p,q,rel_name) #创建关系
                continue
        elif p_flag == 'noexist': #如果存在entity不存在的状况那就先创建
            create_node(start_entity_type,p)
        elif q_flag == 'noexist':
            create_node(end_entity_type,q) 
        create_rel(start_entity_type,end_entity_type,p,q,rel_name)    

relation: 100%|█████████████████████████████████| 12/12 [08:54<00:00, 44.51s/it]


多了10067个实体，15253条关系

54732 个 实体 533303 个关系

62196 个实体 543669条关系