# 基于模式匹配的对话机器人实现

## Pattern Match

In [206]:
import random

In [207]:
def is_variable(pat):
    return pat.startswith('?') and all(s.isalpha() for s in pat[1:])

In [208]:
is_variable('?happy')

True

In [209]:
is_variable('happy')

False

In [210]:
def pat_match(pattern,saying):
    if is_variable(pattern[0]):
        print(pattern[0])
        return True
    else:
        if pattern[0] != saying[0]: return False
        else:
            return pat_match(pattern[1:],saying[1:])

In [211]:
pat_match('I want ?X'.split(),'I want holiday'.split())

?X


True

In [212]:
pat_match('I want ?X'.split(),'I like holiday'.split())

False

## 获得匹配的变量

In [213]:
def is_variable(pat):
    return pat.startswith('?') and all(s.isalpha() for s in pat[1:])

In [214]:
def pat_match(pattern, saying):
    if is_variable(pattern[0]):
        return pattern[0],saying[0]
    else:
        if pattern[0]!=saying[0]: return False
        else:
            return pat_match(pattern[1:],saying[1:])

In [215]:
pat_match('I want ?X'.split(),'I want holiday'.split())

('?X', 'holiday')

In [216]:
pat_match('?Z equals ?Y'.split(),'3+2 equals 2+2'.split())

('?Z', '3+2')

In [217]:
def is_variable(pat):
    return pat.startswith('?') and all(s.isalpha() for s in pat[1:])

In [218]:
def pat_match(pattern, saying):
    if not pattern or not saying: return []
    
    if is_variable(pattern[0]):
        return [(pattern[0],saying[0])]+pat_match(pattern[1:],saying[1:])
    else:
        if pattern[0]!=saying[0]: return []
        else:
            return pat_match(pattern[1:],saying[1:])

In [219]:
pat_match('?Z equals ?Y'.split(),'3+2 equals 2+2'.split())

[('?Z', '3+2'), ('?Y', '2+2')]

## 新建两个函数
1.结果转为一个dictionary
2.根据这个dictionary依照自定义的方式替换

In [220]:
def pat_to_dict(patterns):
    return {k : v for k ,v in patterns}

In [221]:
pat_to_dict(pat_match('?Z equals ?Y'.split(),'3+2 equals 2+2'.split()))

{'?Y': '2+2', '?Z': '3+2'}

In [222]:
def subsitite(rule,parsed_rules):
    if not rule: return []
    return [parsed_rules.get(rule[0],rule[0])]+subsitite(rule[1:],parsed_rules)

In [223]:
got_patterns = pat_match('I want ?X'.split(),'I want iphone'.split())

In [224]:
subsitite("What if you mean if you got a ?X".split(),pat_to_dict(got_patterns))

['What', 'if', 'you', 'mean', 'if', 'you', 'got', 'a', 'iphone']

In [225]:
John_pat = pat_match('?P needs ?X'.split(),'John needs resting'.split())

In [226]:
' '.join(subsitite("What if you mean if you got a ?X".split(),pat_to_dict(got_patterns)))

'What if you mean if you got a iphone'

In [227]:
subsitite("Why does ?P need ?X ?".split(),pat_to_dict(John_pat))

['Why', 'does', 'John', 'need', 'resting', '?']

In [228]:
' '.join(subsitite("Why does ?P need ?X ?".split(),pat_to_dict(John_pat)))

'Why does John need resting ?'

In [229]:
defined_patterns={
    "I need ?X":["Image you will get ?X soon","Why do you need ?X ?"],
    "My ?X told me something":["Talk about more about your ?X","How do you think about your ?X ?"]
}

In [271]:
def get_response(saying): 
    
#     1.首先提取出saying对应define_patterns 的key值
#     2.随机选择该key值对应的value
#     3.将对应value值?X用subsitite()替换掉
    
    
    pattern=[]
    for i in defined_patterns:
        k = pat_match(i.split(), saying.split())
        if k:
            response = random.choice(defined_patterns[i])
            return ' '.join(subsitite(response.split(),pat_to_dict(k)))
            
#     遍历怎么找到对应的？==>不为[] 就返回
#     转换为字典

In [272]:
get_response('I need iPhone')

'Why do you need iPhone ?'

In [232]:
def is_pattern_segment(pattern):
    return pattern.startswith('?*') and all(a.isalpha() for a in pattern[2:])

In [233]:
is_pattern_segment('?*P')

True

In [234]:
from collections import defaultdict

In [235]:
fail = [True,None]

def pat_match_with_seg(pattern,saying):
    if not pattern or not saying:return []
    
    pat = pattern[0]
    
    if is_variable(pat): #多个变量的情况
        return [(pat, saying[0])] + pat_match_with_seg(pattern[1:], saying[1:])
    elif is_pattern_segment(pat):
        match, index = segment_match(pattern, saying)
        return [match] + pat_match_with_seg(pattern[1:], saying[index:])
    elif  pat == saying[0]:
        return pat_match_with_seg(pattern[1:],saying[1:])
    else:
        return fail
    

In [279]:
def segment_match(pattern, saying):
    seg_pat, rest = pattern[0], pattern[1:]
    seg_pat = seg_pat.replace('?*', '?')
    
    if not rest: return (seg_pat, saying), len(saying)
    
    for i, token in enumerate(saying): #enumerate() 函数用于将一个可遍历的数据对象(如列表、元组或字符串)组合为一个索引序列，同时列出数据和数据下标，一般用在 for 循环当中。
        if rest[0] == token and is_match(rest[1:], saying[(i + 1):]):
            return(seg_pat, saying[:i]), i
    return (seg_pat, saying), len(saying)

def is_match(rest, saying):
    if not rest and not saying:
        return True
    if not all(a.isalpha() for a in rest[0]):
        return True
    if rest[0]!= saying[0]:
        return False
    return is_match(rest[1:], saying[1:])

In [237]:
segment_match('?*P is very good'.split(),'My dog and my cat is very good'.split())

(('?P', ['My', 'dog', 'and', 'my', 'cat']), 5)

In [238]:
segment_match('?*P is very good and ?*X'.split(),'My dog is very good and my cat is very cute'.split())

(('?P', ['My', 'dog']), 2)

In [239]:
pat_match_with_seg('?*P is very good and ?*X'.split(),'My dog is very good and my cat is very cute'.split())

[('?P', ['My', 'dog']), ('?X', ['my', 'cat', 'is', 'very', 'cute'])]

In [240]:
response_pair = {
    'I need ?X':[
        'Why do you need ?X'
    ],
    "I don't like my ?X":[
        'What bad things did ?X do for you?'
    ]
}

In [241]:
pat_match_with_seg('I need ?*X'.split(),'I need an iPhone'.split())

[('?X', ['an', 'iPhone'])]

In [242]:
subsitite("Why do you need ?X".split(),pat_to_dict(pat_match_with_seg('I need ?*X'.split(),'I need an iPhone'.split())))

['Why', 'do', 'you', 'need', ['an', 'iPhone']]

In [243]:
def pat_to_dict(patterns):
    return {k: ' '.join(v) if isinstance(v, list) else v for k, v in patterns}

In [244]:
subsitite("Why do you need ?X".split(),pat_to_dict(pat_match_with_seg('I need ?*X'.split(),'I need an iPhone'.split())))

['Why', 'do', 'you', 'need', 'an iPhone']

In [245]:
("?*X hello ?*Y","Hi, how do you do")

('?*X hello ?*Y', 'Hi, how do you do')

In [246]:
subsitite("Hi, how do you do".split(),pat_to_dict(pat_match_with_seg('?*X  hello ?*Y'.split(),'I am mike, hello'.split())))

['Hi,', 'how', 'do', 'you', 'do']

In [285]:
rules = {
    "?*X hello ?*Y":["Hi, how do you do?"],
    "I was ?*X": ["Were you really ?X ?","I already knew you were ?X ."]
}

In [286]:
def get_response2(saying, response_rules): 

    pattern=[]
    for i in response_rules:
        k = pat_match_with_seg(i.split(), saying.split())     #==>i 怎么不变了。。
        if k:
            response = random.choice(response_rules[i])
            return ' '.join(subsitite(response.split(),pat_to_dict(k)))

In [288]:
get_response2('I was a baby ', rules)

[('?X', ['I', 'was', 'a', 'baby'])]
Hi, how do you do?


'Hi, how do you do?'

### 问题1

In [289]:
def get_response(saying): 
    
#     1.首先提取出saying对应define_patterns 的key值
#     2.随机选择该key值对应的value
#     3.将对应value值?X用subsitite()替换掉
    
    
    pattern=[]
    for i in defined_patterns:
        k = pat_match(i.split(), saying.split())
        if k:
            response = random.choice(defined_patterns[i])
            return ' '.join(subsitite(response.split(),pat_to_dict(k)))
            
#     遍历怎么找到对应的？==>不为[] 就返回
#     转换为字典

In [290]:
get_response('I need iPhone')

'Why do you need iPhone ?'

In [291]:
def get_response2(saying, response_rules): 

    pattern=[]
    for i in response_rules:
        k = pat_match_with_seg(i.split(), saying.split())     #==>i 怎么不变了。。
        if k:
            response = random.choice(response_rules[i])
            return ' '.join(subsitite(response.split(),pat_to_dict(k)))

In [292]:
get_response2('I was a baby ', rules) #为什么一直是'Hi, how do you do?'

'Hi, how do you do?'

### 问题2

### 问题3

### 问题4

In [None]:
1、这样的程序有什么优点/缺点？改进方法？
答： 优点：可以实现一些对话
        缺点：需要自定义很多句子

2、什么是数据驱动？数据驱动在这个程序中如何体现？
答：我认为指的原来用不同的数据，都需要编写不同的程序，但=data driving使用某种算法可以使不同的数据都可以运用

3、数据驱动与AI的关系
答：我们有了一定的数据，然后使用一种算法模型，再将得到的数据结果反馈，可以不断学习