In [1]:
import ahocorasick
from py2neo import Graph

In [2]:
def check_words(wds, sent):
        for wd in wds:
            if wd in sent:
                return True
        return False

In [3]:
def classifier(question):
    # read keywords
    diseases = [i.strip() for i in open("dict/disease.txt") if i.strip()]
    departments = [i.strip() for i in open("dict/department.txt") if i.strip()]
    checks= [i.strip() for i in open("dict/check.txt") if i.strip()]
    drugs= [i.strip() for i in open("dict/drug.txt") if i.strip()]
    foods= [i.strip() for i in open("dict/food.txt") if i.strip()]
    producers= [i.strip() for i in open("dict/producer.txt") if i.strip()]
    symptoms= [i.strip() for i in open("dict/symptom.txt") if i.strip()]
    regions = list(set(departments + diseases + checks + drugs + foods + producers + symptoms))
    deny_words = [i.strip() for i in open("dict/deny.txt") if i.strip()]
    # build field actree
    actree = ahocorasick.Automaton()
    for index, word in enumerate(regions):
        actree.add_word(word, (index, word))
    actree.make_automaton()
    # build dict
    word_dict = dict()
    for word in regions:
        word_dict[word] = []
        if word in diseases:
            word_dict[word].append('disease')
        if word in departments:
            word_dict[word].append('department')
        if word in checks:
            word_dict[word].append('check')
        if word in drugs:
            word_dict[word].append('drug')
        if word in foods:
            word_dict[word].append('food')
        if word in symptoms:
            word_dict[word].append('symptom')
        if word in producers:
            word_dict[word].append('producer')
    # 问句疑问词
    symptom_qwds = ['症状', '表征', '现象', '症候', '表现']
    cause_qwds = ['原因','成因', '为什么', '怎么会', '怎样才', '咋样才', '怎样会', '如何会', '为啥', '为何', '如何才会', '怎么才会', '会导致', '会造成']
    acompany_qwds = ['并发症', '并发', '一起发生', '一并发生', '一起出现', '一并出现', '一同发生', '一同出现', '伴随发生', '伴随', '共现']
    food_qwds = ['饮食', '饮用', '吃', '食', '伙食', '膳食', '喝', '菜' ,'忌口', '补品', '保健品', '食谱', '菜谱', '食用', '食物','补品']
    drug_qwds = ['药', '药品', '用药', '胶囊', '口服液', '炎片']
    prevent_qwds = ['预防', '防范', '抵制', '抵御', '防止','躲避','逃避','避开','免得','逃开','避开','避掉','躲开','躲掉','绕开',
                         '怎样才能不', '怎么才能不', '咋样才能不','咋才能不', '如何才能不',
                         '怎样才不', '怎么才不', '咋样才不','咋才不', '如何才不',
                         '怎样才可以不', '怎么才可以不', '咋样才可以不', '咋才可以不', '如何可以不',
                         '怎样才可不', '怎么才可不', '咋样才可不', '咋才可不', '如何可不']
    lasttime_qwds = ['周期', '多久', '多长时间', '多少时间', '几天', '几年', '多少天', '多少小时', '几个小时', '多少年']
    cureway_qwds = ['怎么治疗', '如何医治', '怎么医治', '怎么治', '怎么医', '如何治', '医治方式', '疗法', '咋治', '怎么办', '咋办', '咋治']
    cureprob_qwds = ['多大概率能治好', '多大几率能治好', '治好希望大么', '几率', '几成', '比例', '可能性', '能治', '可治', '可以治', '可以医']
    easyget_qwds = ['易感人群', '容易感染', '易发人群', '什么人', '哪些人', '感染', '染上', '得上']
    check_qwds = ['检查', '检查项目', '查出', '检查', '测出', '试出']
    belong_qwds = ['属于什么科', '属于', '什么科', '科室']
    cure_qwds = ['治疗什么', '治啥', '治疗啥', '医治啥', '治愈啥', '主治啥', '主治什么', '有什么用', '有何用', '用处', '用途',
                      '有什么好处', '有什么益处', '有何益处', '用来', '用来做啥', '用来作甚', '需要', '要']
    # classify
    data = {}
    region_wds = []
    for i in actree.iter(question):
        wd = i[1][1]
        region_wds.append(wd)
    stop_wds = []
    for wd1 in region_wds:
        for wd2 in region_wds:
            if wd1 in wd2 and wd1 != wd2:
                stop_wds.append(wd1)
    final_wds = [i for i in region_wds if i not in stop_wds]
    medical_dict = {i:word_dict.get(i) for i in final_wds}
    if not medical_dict:
        return {}
    data['args'] = medical_dict
    # collect all the needed notes
    types = []
    for type_ in medical_dict.values():
        types += type_
    question_type = 'others'
    
    question_types = []
    
    # check relationship
    if check_words(symptom_qwds, question) and ('disease' in types):
        question_type = 'disease_symptom'
        question_types.append(question_type)
    if check_words(symptom_qwds, question) and ('symptom' in types):
        question_type = 'symptom_disease'
        question_types.append(question_type)
    if check_words(cause_qwds, question) and ('disease' in types):
        question_type = 'disease_cause'
        question_types.append(question_type)
    if check_words(acompany_qwds, question) and ('disease' in types):
        question_type = 'disease_acompany'
        question_types.append(question_type)
    if check_words(food_qwds, question) and ('disease' in types):
        deny_status = check_words(deny_words, question)
        if deny_status:
            question_type = 'disease_not_food'
        else:
            question_type = 'disease_do_food'
        question_types.append(question_type)
    if check_words(food_qwds+cure_qwds, question) and 'food' in types:
        deny_status = check_words(deny_words, question)
        if deny_status:
            question_type = 'food_not_disease'
        else:
            question_type = 'disease_do_food'
        question_types.append(question_type)
    if check_words(food_qwds+cure_qwds, question) and ('food' in types):
        deny_status = check_words(deny_words, question)
        if deny_status:
            question_type = 'food_not_disease'
        else:
            question_type = 'food_do_disease'
        question_types.append(question_type)
    # 推荐药品
    if check_words(drug_qwds, question) and 'disease' in types:
        question_type = 'disease_drug'
        question_types.append(question_type)

    # 药品治啥病
    if check_words(cure_qwds, question) and 'drug' in types:
        question_type = 'drug_disease'
        question_types.append(question_type)

    # 疾病接受检查项目
    if check_words(check_qwds, question) and 'disease' in types:
        question_type = 'disease_check'
        question_types.append(question_type)

    # 已知检查项目查相应疾病
    if check_words(check_qwds+cure_qwds, question) and 'check' in types:
        question_type = 'check_disease'
        question_types.append(question_type)

    #　症状防御
    if check_words(prevent_qwds, question) and 'disease' in types:
        question_type = 'disease_prevent'
        question_types.append(question_type)

    # 疾病医疗周期
    if check_words(lasttime_qwds, question) and 'disease' in types:
        question_type = 'disease_lasttime'
        question_types.append(question_type)

    # 疾病治疗方式
    if check_words(cureway_qwds, question) and 'disease' in types:
        question_type = 'disease_cureway'
        question_types.append(question_type)

    # 疾病治愈可能性
    if check_words(cureprob_qwds, question) and 'disease' in types:
        question_type = 'disease_cureprob'
        question_types.append(question_type)

    # 疾病易感染人群
    if check_words(easyget_qwds, question) and 'disease' in types :
        question_type = 'disease_easyget'
        question_types.append(question_type)

    # 若没有查到相关的外部查询信息，那么则将该疾病的描述信息返回
    if question_types == [] and 'disease' in types:
        question_types = ['disease_desc']

    # 若没有查到相关的外部查询信息，那么则将该疾病的描述信息返回
    if question_types == [] and 'symptom' in types:
        question_types = ['symptom_disease']

    # 将多个分类结果进行合并处理，组装成一个字典
    data['question_types'] = question_types
    
    return data

In [4]:
def build_entitydict(args):
    entity_dict = {}
    for arg, types in args.items():
        for type_ in types:
            if type_ not in entity_dict:
                entity_dict[type_] = [arg]
            else:
                entity_dict[type_].append(arg)

    return entity_dict

In [5]:
'''针对不同的问题，分开进行处理'''
def sql_transfer(question_type, entities):
    if not entities:
        return []

    # 查询语句
    sql = []
    # 查询疾病的原因
    if question_type == 'disease_cause':
        sql = ["MATCH (m:Disease) where m.name = '{0}' return m.name, m.cause".format(i) for i in entities]

    # 查询疾病的防御措施
    elif question_type == 'disease_prevent':
        sql = ["MATCH (m:Disease) where m.name = '{0}' return m.name, m.prevent".format(i) for i in entities]

    # 查询疾病的持续时间
    elif question_type == 'disease_lasttime':
        sql = ["MATCH (m:Disease) where m.name = '{0}' return m.name, m.cure_lasttime".format(i) for i in entities]

    # 查询疾病的治愈概率
    elif question_type == 'disease_cureprob':
        sql = ["MATCH (m:Disease) where m.name = '{0}' return m.name, m.cured_prob".format(i) for i in entities]

    # 查询疾病的治疗方式
    elif question_type == 'disease_cureway':
        sql = ["MATCH (m:Disease) where m.name = '{0}' return m.name, m.cure_way".format(i) for i in entities]

    # 查询疾病的易发人群
    elif question_type == 'disease_easyget':
        sql = ["MATCH (m:Disease) where m.name = '{0}' return m.name, m.easy_get".format(i) for i in entities]

    # 查询疾病的相关介绍
    elif question_type == 'disease_desc':
        sql = ["MATCH (m:Disease) where m.name = '{0}' return m.name, m.desc".format(i) for i in entities]

    # 查询疾病有哪些症状
    elif question_type == 'disease_symptom':
        sql = ["MATCH (m:Disease)-[r:has_symptom]->(n:Symptom) where m.name = '{0}' return m.name, r.name, n.name".format(i) for i in entities]

    # 查询症状会导致哪些疾病
    elif question_type == 'symptom_disease':
        sql = ["MATCH (m:Disease)-[r:has_symptom]->(n:Symptom) where n.name = '{0}' return m.name, r.name, n.name".format(i) for i in entities]

    # 查询疾病的并发症
    elif question_type == 'disease_acompany':
        sql1 = ["MATCH (m:Disease)-[r:acompany_with]->(n:Disease) where m.name = '{0}' return m.name, r.name, n.name".format(i) for i in entities]
        sql2 = ["MATCH (m:Disease)-[r:acompany_with]->(n:Disease) where n.name = '{0}' return m.name, r.name, n.name".format(i) for i in entities]
        sql = sql1 + sql2
    # 查询疾病的忌口
    elif question_type == 'disease_not_food':
        sql = ["MATCH (m:Disease)-[r:no_eat]->(n:Food) where m.name = '{0}' return m.name, r.name, n.name".format(i) for i in entities]

    # 查询疾病建议吃的东西
    elif question_type == 'disease_do_food':
        sql1 = ["MATCH (m:Disease)-[r:do_eat]->(n:Food) where m.name = '{0}' return m.name, r.name, n.name".format(i) for i in entities]
        sql2 = ["MATCH (m:Disease)-[r:recommand_eat]->(n:Food) where m.name = '{0}' return m.name, r.name, n.name".format(i) for i in entities]
        sql = sql1 + sql2

    # 已知忌口查疾病
    elif question_type == 'food_not_disease':
        sql = ["MATCH (m:Disease)-[r:no_eat]->(n:Food) where n.name = '{0}' return m.name, r.name, n.name".format(i) for i in entities]

    # 已知推荐查疾病
    elif question_type == 'food_do_disease':
        sql1 = ["MATCH (m:Disease)-[r:do_eat]->(n:Food) where n.name = '{0}' return m.name, r.name, n.name".format(i) for i in entities]
        sql2 = ["MATCH (m:Disease)-[r:recommand_eat]->(n:Food) where n.name = '{0}' return m.name, r.name, n.name".format(i) for i in entities]
        sql = sql1 + sql2

    # 查询疾病常用药品－药品别名记得扩充
    elif question_type == 'disease_drug':
        sql1 = ["MATCH (m:Disease)-[r:common_drug]->(n:Drug) where m.name = '{0}' return m.name, r.name, n.name".format(i) for i in entities]
        sql2 = ["MATCH (m:Disease)-[r:recommand_drug]->(n:Drug) where m.name = '{0}' return m.name, r.name, n.name".format(i) for i in entities]
        sql = sql1 + sql2

    # 已知药品查询能够治疗的疾病
    elif question_type == 'drug_disease':
        sql1 = ["MATCH (m:Disease)-[r:common_drug]->(n:Drug) where n.name = '{0}' return m.name, r.name, n.name".format(i) for i in entities]
        sql2 = ["MATCH (m:Disease)-[r:recommand_drug]->(n:Drug) where n.name = '{0}' return m.name, r.name, n.name".format(i) for i in entities]
        sql = sql1 + sql2
    # 查询疾病应该进行的检查
    elif question_type == 'disease_check':
        sql = ["MATCH (m:Disease)-[r:need_check]->(n:Check) where m.name = '{0}' return m.name, r.name, n.name".format(i) for i in entities]

    # 已知检查查询疾病
    elif question_type == 'check_disease':
        sql = ["MATCH (m:Disease)-[r:need_check]->(n:Check) where n.name = '{0}' return m.name, r.name, n.name".format(i) for i in entities]

    return sql

In [6]:
def parser(res_classify):
    args = res_classify['args']
    entity_dict = build_entitydict(args)
    question_types = res_classify['question_types']
    sqls = []
    for question_type in question_types:
        sql_ = {}
        sql_['question_type'] = question_type
        sql = []
        if question_type == 'disease_symptom':
            sql = sql_transfer(question_type, entity_dict.get('disease'))

        elif question_type == 'symptom_disease':
            sql = sql_transfer(question_type, entity_dict.get('symptom'))

        elif question_type == 'disease_cause':
            sql = sql_transfer(question_type, entity_dict.get('disease'))

        elif question_type == 'disease_acompany':
            sql = sql_transfer(question_type, entity_dict.get('disease'))

        elif question_type == 'disease_not_food':
            sql = sql_transfer(question_type, entity_dict.get('disease'))

        elif question_type == 'disease_do_food':
            sql = sql_transfer(question_type, entity_dict.get('disease'))

        elif question_type == 'food_not_disease':
            sql = sql_transfer(question_type, entity_dict.get('food'))

        elif question_type == 'food_do_disease':
            sql = sql_transfer(question_type, entity_dict.get('food'))

        elif question_type == 'disease_drug':
            sql = sql_transfer(question_type, entity_dict.get('disease'))

        elif question_type == 'drug_disease':
            sql = sql_transfer(question_type, entity_dict.get('drug'))

        elif question_type == 'disease_check':
            sql = sql_transfer(question_type, entity_dict.get('disease'))

        elif question_type == 'check_disease':
            sql = sql_transfer(question_type, entity_dict.get('check'))

        elif question_type == 'disease_prevent':
            sql = sql_transfer(question_type, entity_dict.get('disease'))

        elif question_type == 'disease_lasttime':
            sql = sql_transfer(question_type, entity_dict.get('disease'))

        elif question_type == 'disease_cureway':
            sql = sql_transfer(question_type, entity_dict.get('disease'))

        elif question_type == 'disease_cureprob':
            sql = sql_transfer(question_type, entity_dict.get('disease'))

        elif question_type == 'disease_easyget':
            sql = sql_transfer(question_type, entity_dict.get('disease'))

        elif question_type == 'disease_desc':
            sql = sql_transfer(question_type, entity_dict.get('disease'))

        if sql:
            sql_['sql'] = sql

            sqls.append(sql_)
            
    return sqls

In [7]:
'''根据对应的qustion_type，调用相应的回复模板'''
def answer_prettify(question_type, answers):
    num_limit = 20
    final_answer = []
    if not answers:
        return ''
    if question_type == 'disease_symptom':
        desc = [i['n.name'] for i in answers]
        subject = answers[0]['m.name']
        final_answer = '{0}的症状包括：{1}'.format(subject, '；'.join(list(set(desc))[:num_limit]))

    elif question_type == 'symptom_disease':
        desc = [i['m.name'] for i in answers]
        subject = answers[0]['n.name']
        final_answer = '症状{0}可能染上的疾病有：{1}'.format(subject, '；'.join(list(set(desc))[:num_limit]))

    elif question_type == 'disease_cause':
        desc = [i['m.cause'] for i in answers]
        subject = answers[0]['m.name']
        final_answer = '{0}可能的成因有：{1}'.format(subject, '；'.join(list(set(desc))[:num_limit]))

    elif question_type == 'disease_prevent':
        desc = [i['m.prevent'] for i in answers]
        subject = answers[0]['m.name']
        final_answer = '{0}的预防措施包括：{1}'.format(subject, '；'.join(list(set(desc))[:num_limit]))

    elif question_type == 'disease_lasttime':
        desc = [i['m.cure_lasttime'] for i in answers]
        subject = answers[0]['m.name']
        final_answer = '{0}治疗可能持续的周期为：{1}'.format(subject, '；'.join(list(set(desc))[:num_limit]))

    elif question_type == 'disease_cureway':
        desc = [';'.join(i['m.cure_way']) for i in answers]
        subject = answers[0]['m.name']
        final_answer = '{0}可以尝试如下治疗：{1}'.format(subject, '；'.join(list(set(desc))[:num_limit]))

    elif question_type == 'disease_cureprob':
        desc = [i['m.cured_prob'] for i in answers]
        subject = answers[0]['m.name']
        final_answer = '{0}治愈的概率为（仅供参考）：{1}'.format(subject, '；'.join(list(set(desc))[:num_limit]))

    elif question_type == 'disease_easyget':
        desc = [i['m.easy_get'] for i in answers]
        subject = answers[0]['m.name']

        final_answer = '{0}的易感人群包括：{1}'.format(subject, '；'.join(list(set(desc))[:num_limit]))

    elif question_type == 'disease_desc':
        desc = [i['m.desc'] for i in answers]
        subject = answers[0]['m.name']
        final_answer = '{0},熟悉一下：{1}'.format(subject,  '；'.join(list(set(desc))[:num_limit]))

    elif question_type == 'disease_acompany':
        desc1 = [i['n.name'] for i in answers]
        desc2 = [i['m.name'] for i in answers]
        subject = answers[0]['m.name']
        desc = [i for i in desc1 + desc2 if i != subject]
        final_answer = '{0}的症状包括：{1}'.format(subject, '；'.join(list(set(desc))[:num_limit]))

    elif question_type == 'disease_not_food':
        desc = [i['n.name'] for i in answers]
        subject = answers[0]['m.name']
        final_answer = '{0}忌食的食物包括有：{1}'.format(subject, '；'.join(list(set(desc))[:num_limit]))

    elif question_type == 'disease_do_food':
        do_desc = [i['n.name'] for i in answers if i['r.name'] == '宜吃']
        recommand_desc = [i['n.name'] for i in answers if i['r.name'] == '推荐食谱']
        subject = answers[0]['m.name']
        final_answer = '{0}宜食的食物包括有：{1}\n推荐食谱包括有：{2}'.format(subject, ';'.join(list(set(do_desc))[:num_limit]), ';'.join(list(set(recommand_desc))[:num_limit]))

    elif question_type == 'food_not_disease':
        desc = [i['m.name'] for i in answers]
        subject = answers[0]['n.name']
        final_answer = '患有{0}的人最好不要吃{1}'.format('；'.join(list(set(desc))[:num_limit]), subject)

    elif question_type == 'food_do_disease':
        desc = [i['m.name'] for i in answers]
        subject = answers[0]['n.name']
        final_answer = '患有{0}的人建议多试试{1}'.format('；'.join(list(set(desc))[:num_limit]), subject)

    elif question_type == 'disease_drug':
        desc = [i['n.name'] for i in answers]
        subject = answers[0]['m.name']
        final_answer = '{0}通常的使用的药品包括：{1}'.format(subject, '；'.join(list(set(desc))[:num_limit]))

    elif question_type == 'drug_disease':
        desc = [i['m.name'] for i in answers]
        subject = answers[0]['n.name']
        final_answer = '{0}主治的疾病有{1},可以试试'.format(subject, '；'.join(list(set(desc))[:num_limit]))

    elif question_type == 'disease_check':
        desc = [i['n.name'] for i in answers]
        subject = answers[0]['m.name']
        final_answer = '{0}通常可以通过以下方式检查出来：{1}'.format(subject, '；'.join(list(set(desc))[:num_limit]))

    elif question_type == 'check_disease':
        desc = [i['m.name'] for i in answers]
        subject = answers[0]['n.name']
        final_answer = '通常可以通过{0}检查出来的疾病有{1}'.format(subject, '；'.join(list(set(desc))[:num_limit]))

    return final_answer

In [8]:
def search(sqls):
    g = Graph("http://localhost:7474", auth=("neo4j", "test"))
    final_answers = []
    for sql_ in sqls:
        question_type = sql_['question_type']
        queries = sql_['sql']
        answers = []
        for query in queries:
            ress = g.run(query).data()
            answers += ress
        final_answer = answer_prettify(question_type, answers)
        if final_answer:
            final_answers.append(final_answer)
    return final_answers

In [9]:
def chatbot(question):
    answer = "没能理解您的意思"
    # 解析句子中含有哪些关键词
    res_classify = classifier(question)
    print(res_classify)
    if not res_classify:
        return answer
    # 根据句子含义生成对应的sql语句
    res_sql = parser(res_classify)
    # 查询数据库得到结果，并组合成能阅读的句子
    final_answers = search(res_sql)
    if not final_answers:
        return answer
    return '\n'.join(final_answers)

In [12]:
# question = input("请问您有什么问题呢？")
answer = chatbot("肺病的形状是什么")
print("客服机器人:", answer)

{'args': {'肺病': ['disease']}, 'question_types': ['disease_desc']}
客服机器人: 没能理解您的意思
