### 快速的基于模板的对话机器人


### 获得是否匹配

In [14]:
def is_variable(pat):
    return pat.startswith('?') and all(s.isalpha() for s in pat[1:])

In [20]:
def pat_match(pattern, saying):
    if is_variable(pattern[0]): return True
    else:
        if pattern[0] != saying[0]: return False
        else:
            return pat_match(pattern[1:], saying[1:])

In [3]:
pat_match('I want ?X'.split(), 'I want holiday'.split())

True

In [21]:
pat_match('I want ?X here'.split(),'I want holiday here'.split())

True

### 获得匹配的变量

In [22]:
def pat_match(pattern, saying):
    if is_variable(pattern[0]):
        return pattern[0], saying[0]
    else:
        if pattern[0] != saying[0]: return False
        else:
            return pat_match(pattern[1:], saying[1:])

In [23]:
pattern = 'I want ?X here'.split()
saying = 'I want holiday'.split()

In [24]:
pat_match(pattern, saying)

('?X', 'holiday')

#### 如果具备两个匹配的变量

In [40]:
def pat_match(pattern, saying):
    if not pattern or not saying: return []
    if is_variable(pattern[0]):
        return [(pattern[0], saying[0])] + pat_match(pattern[1:], saying[1:])
    else:
        if pattern[0] != saying[0]: return False
        else:
            return pat_match(pattern[1:], saying[1:])

In [17]:
pat_match("?X greater than ?Y".split(), "3 greater than 2".split())

[('?X', '3'), ('?Y', '2')]

#### 建立两个函数，把解析出来的结果变成一个字典，依据字典按照定义方式进行替换

In [25]:
def pat_to_dict(patterns):
    return {k:v for k,v in patterns}

In [26]:
def subsitite(rule, parsed_rules):
    if not rule: return []
    return [parsed_rules.get(rule[0], rule[0])] + subsitite(rule[1:], parsed_rules)

In [19]:
got_pattern = pat_match("I want ?X".split(), "I want iphone".split())

In [20]:
got_pattern

[('?X', 'iphone')]

In [21]:
subsitite("What if you mean if you got a ?X".split(), pat_to_dict(got_pattern))

['What', 'if', 'you', 'mean', 'if', 'you', 'got', 'a', 'iphone']

#### 可以实现基于模板的对话生成

In [22]:
defined_patterns = {
    "I need ?X": ["Image you will get ?X soon", "Why do you need ?X ?"], 
    "My ?X told me something": ["Talk about more about your ?X", "How do you think about your ?X ?"]
}

In [26]:
import random

In [32]:
def get_response(saying, rules):
    for i in rules:
        got_pattern = pat_match(i.split(),saying.split())
        if got_pattern != False:
            response = random.choice(rules[i])
            response = " ".join(subsitite(response.split(),pat_to_dict(got_pattern)))
            return response

In [29]:
get_response('I need iPhone', defined_patterns)

'Image you will get iPhone soon'

In [37]:
get_response('My mother told me something',defined_patterns)

'How do you think about your mother ?'

### 由逐字逐句匹配变成匹配多个

In [8]:
def is_pattern_segment(pattern):
    return pattern.startswith('?*') and all(a.isalpha() for a in pattern[2:])

In [2]:
is_pattern_segment('?*p')

True

In [3]:
from collections import defaultdict

In [6]:
def segment_match(pattern, saying):
    seg_pat, rest = pattern[0],pattern[1:]
    seg_pat = seg_pat.replace('?*','?')
    if not rest: return (seg_pat,saying), len(saying)
    for i , token in enumerate(saying):
        if rest[0] == token and is_match(rest[1:],saying[(i+1):]):
            return (seg_pat, saying[:i]), i
    return fail # 不能去匹配所有字符

In [7]:
def is_match(rest, saying):
    if not rest and not saying:
        return True
    if not all(a.isalpha() for a in rest[0]):
        return True
    if rest[0] != saying[0]:
        return False
    return is_match(rest[1:], saying[1:])

In [54]:
segment_match('?*P is very good'.split(),"My dog and my cat are very good".split())

(('?P', ['My', 'dog', 'and', 'my', 'cat', 'are', 'very', 'good']), 8)

In [61]:
segment_match('?*P is very good'.split(),"My dog and my cat is very good".split())

(('?P', ['My', 'dog', 'and', 'my', 'cat']), 5)

In [11]:
fail = [True, None]
def pat_match_with_seg(pattern,saying):
    if not pattern or not saying: return []
    pat = pattern[0]
    if is_variable(pat): # ?X
        return [(pat, saying[0]) + pat_match_with_seg(pattern[1:], saying[1:])]
    elif is_pattern_segment(pat): # ?*X
        if segment_match(pattern, saying)!= fail:
            match, index = segment_match(pattern, saying)
            return [match]+pat_match_with_seg(pattern[1:], saying[index:])
        else:
            return segment_match(pattern, saying)
    elif pat == saying[0]:
        return pat_match_with_seg(pattern[1:], saying[1:])
    else:
        return fail

In [56]:
pat_match_with_seg("?*P is very good and ?*X".split(),"My dog is very good and my cat is cute".split())

[('?P', ['My', 'dog']), ('?X', ['my', 'cat', 'is', 'cute'])]

In [16]:
response_pair = {
    'I need ?X': [
        "Why do you neeed ?X"
    ],
    "I dont like my ?X": ["What bad things did ?X do for you?"]
}

In [52]:
pat_match_with_seg('I need ?*X'.split(), "I need an iphone".split())

[('?X', ['an', 'iphone'])]

In [62]:
pat_match_with_seg('?*X hello ?*Y'.split(),"phone I want".split())

[True, None]

In [27]:
subsitite("Why do you neeed ?X".split(), pat_to_dict(pat_match_with_seg('I need ?*X'.split(), 
                  "I need an iPhone".split())))

['Why', 'do', 'you', 'neeed', ['an', 'iPhone']]

In [28]:
def pat_to_dict(patterns):
    return {k:' '.join(v)if isinstance(v, list) else v for k , v in patterns}

In [29]:
subsitite("Why do you neeed ?X".split(), pat_to_dict(pat_match_with_seg('I need ?*X'.split(), 
                  "I need an iPhone".split())))

['Why', 'do', 'you', 'neeed', 'an iPhone']

In [35]:
subsitite("Hi ?X how do you do?".split(), pat_to_dict(pat_match_with_seg("?*X hello ?*Y".split(), "I am Mike, hello".split())))

['Hi', 'I am Mike,', 'how', 'do', 'you', 'do', '?Y', '?']

### Task 1

#### 编写一个程序, get_response(saying, response_rules)输入是一个字符串 + 我们定义的 rules，例如上边我们所写的 pattern， 输出是一个回答。

In [36]:
rules = {
    "?*X hello ?*Y": ["Hi, how do you do?"],
    "I was ?*X": ["Were you really ?X ?", "I already knew you were ?X ."]
}

In [41]:
import random

In [63]:
def get_response(saying, response_rules):
    for i in response_rules:
        result = pat_match_with_seg(i.split(), saying.split())
        if result != fail:
            response = " ".join(subsitite(random.choice(rules[i]).split(), pat_to_dict(result)))
            print(response)

In [66]:
get_response("I am Mike, hello", rules)

Hi, how do you do?


In [65]:
get_response("I was an actor", rules)

I already knew you were an actor .


### Task2

#### 将以上程序改写为能够支持中文输入的模式（提示：jieba分词）

In [2]:
import jieba
import random
from collections import defaultdict

In [30]:
rules = {
    '?*x我?*z梦见?*y':['真的吗? --- ?y', '你在醒着的时候，以前想象过?y吗？', '你以前梦见过?y吗'],
    '?*x所有人?*y': ['我确定不是人人都是', '你能想到一点特殊情况吗？', '例如谁？', '你看到的其实只是一小部分人'],
    '?*x总是?*y': ['你能想到一些其他情况吗?', '例如什么时候?', '你具体是说哪一次？', '真的---总是吗？'],
    '?*x一直?*y': ['你能想到一些其他情况吗?', '例如什么时候?', '你具体是说哪一次？', '真的---总是吗？'],
    '?*x或许?*y': ['你看起来不太确定']
}

In [None]:
def is_pattern_segment(pattern):
    return pattern.startswith('?*') and all(a.isalpha() for a in pattern[2:])
def is_variable(pat):
    return pat.startswith('?') and all(s.isalpha() for s in pat[1:])

In [None]:
def segment_match(pattern, saying):
    seg_pat, rest = pattern[0],pattern[1:]
    seg_pat = seg_pat.replace('?*','?')
    if not rest: return (seg_pat,saying), len(saying)
    for i , token in enumerate(saying):
        if rest[0] == token and is_match(rest[1:],saying[(i+1):]):
            return (seg_pat, saying[:i]), i
    return fail # 不能去匹配所有字符

In [None]:
def is_match(rest, saying):
    if not rest and not saying:
        return True
    if not all(a.isalpha() for a in rest[0]):
        return True
    if rest[0] != saying[0]:
        return False
    return is_match(rest[1:], saying[1:])

In [None]:
fail = [True, None]
def pat_match_with_seg(pattern,saying):
    if not pattern or not saying: return []
    pat = pattern[0]
    if is_variable(pat): # ?X
        return [(pat, saying[0]) + pat_match_with_seg(pattern[1:], saying[1:])]
    elif is_pattern_segment(pat): # ?*X
        if segment_match(pattern, saying)!= fail:
            match, index = segment_match(pattern, saying)
            return [match]+pat_match_with_seg(pattern[1:], saying[index:])
        else:
            return segment_match(pattern, saying)
    elif pat == saying[0]:
        return pat_match_with_seg(pattern[1:], saying[1:])
    else:
        return fail

In [23]:
def dealInput(pat):
    pat = ' '.join(jieba.cut(pat))
    pat = pat.replace("? * x","?*x").replace("? * y","?*y").replace("? * z","?*z").replace("? x","?x").replace("? y","?y").replace("? z","?z")
    return pat

In [12]:
segment_match(dealInput("?*x如果?*y").split(), list(jieba.cut("假如如果")))

(('?x', ['假如']), 1)

In [15]:
pat_match_with_seg(dealInput("你希望?*x去?*y").split(),list(jieba.cut("你希望小明去写作业")))

[('?x', ['小明']), ('?y', ['写', '作业'])]

In [16]:
pat_match_with_seg(dealInput("?*x，我去了趟?*y").split(), list(jieba.cut("妈妈，我去了趟北京")))

[('?x', ['妈妈']), ('?y', ['北京'])]

In [24]:
pat_match_with_seg(dealInput("?*x我?*z梦见?*y").split(),list(jieba.cut("妈妈我好像梦见爸爸了")))

[('?x', ['妈妈']), ('?z', ['好像']), ('?y', ['爸爸', '了'])]

In [19]:
def pat_to_dict_ch(patterns):
    return {k:''.join(v)if isinstance(v, list) else v for k , v in patterns}

In [18]:
def subsitite(rule, parsed_rules):
    if not rule: return []
    return [parsed_rules.get(rule[0], rule[0])] + subsitite(rule[1:], parsed_rules)

In [112]:
subsitite(dealInput("你真的去了趟?y").split(),pat_to_dict_ch(pat_match_with_seg(dealInput("?*x我去了趟?*y").split(), list(jieba.cut("妈妈，我去了趟北京")))))

['你', '真的', '去', '了', '趟', '北京']

In [36]:
subsitite(dealInput("你以前梦见过?y吗").split(),pat_to_dict_ch(pat_match_with_seg(dealInput("?*x我?*z梦见?*y").split(),list(jieba.cut("我好像又梦见白雪公主")))))

['你', '以前', '梦见', '过', '白雪公主', '吗']

In [37]:
def get_response(saying, pat_dict):
    for i in pat_dict:
        result = pat_match_with_seg(dealInput(i).split(),list(jieba.cut(saying)))
        if result != fail:
            response = ''.join(subsitite(dealInput(random.choice(pat_dict[i])).split(),pat_to_dict_ch(result)))
            print(response)
        

In [54]:
get_response("我好像又梦见会飞的大象",rules)

真的吗?---会飞的大象


In [42]:
get_response("所有人都会编程",rules)

例如谁？


In [50]:
get_response("她总是哭",rules)

真的---总是吗？


In [53]:
get_response("可能或许我能约她",rules)

你看起来不太确定


### task4

#### 1.这样的程序有什么优点？有什么缺点？你有什么可以改进的方法吗？
<p>这样的程序速度快，运行时间短，基于关键词匹配的方式从已有的知识库匹配来随机回答，虽然回答反应迅速，但回答单一，对用户的输入要求高，如果对具有相似的问题不能识别，只能靠严格的匹配方式才能进行回答 </p>

####  2.什么是数据驱动？数据驱动在这个程序里如何体现？
<p>数据驱动个人理解为在机器基于数据处理完成决策的一系列过程，机器从数据获得“信息”，根据这个信息完成决策。在该程序中，我们定义好的匹配规则，对每一句的输入语句进行规则库中的匹配，再从中选择该句的随机某一回答作为回答，这个过程都是在数据驱动的基础上，如果 没有定义的规则，这个程序就没法进行
</p>

#### 3.数据驱动与 AI 的关系是什么？
<p>
    在AI中，数据驱动是非常重要的，没有数据驱动，机器无法学习，AI是在不断的从数据中获取特征进行学习，模型的训练是一个学习的过程。
</p>