# Pattern Match

In [1]:
import re
def is_variable(pat):
    return re.match(r'^\?[A-Za-z]+$', pat)


def pat_match(pattern, saying):
    if not pattern or not saying: return []
    
    if is_variable(pattern[0]):
        return [(pattern[0], saying[0])] + pat_match(pattern[1:], saying[1:])
    else:
        if pattern[0] != saying[0]: return []
        else:
            return pat_match(pattern[1:], saying[1:])
        

def pat_to_dict(patterns):
    return {k: v for k, v in patterns}

def subsitite(rule, parsed_rules):
    if not rule: return []
    
    return [parsed_rules.get(rule[0], rule[0])] + subsitite(rule[1:], parsed_rules)

In [2]:
defined_patterns = {
    "I need ?X": ["Image you will get ?X soon", "Why do you need ?X ?"], 
    "My ?X told me something": ["Talk about more about your ?X", "How do you think about your ?X ?"]
}

In [3]:
import random
def get_response(saying, rules=defined_patterns):
    for pattern in rules.keys():
        patterns = pat_match(pattern.split(), saying.split())
        if patterns:
            return ' '.join(subsitite(random.choice(rules[pattern]).split(), pat_to_dict(patterns)))
    return None

In [4]:
get_response('I need iPhone')

'Image you will get iPhone soon'

In [5]:
get_response('My mother told me something')

'How do you think about your mother ?'

# Segment Match

In [6]:
def is_pattern_segment(pattern):
    return re.match(r'^\?\*[A-Za-z]+$', pattern)

In [7]:
is_pattern_segment('?*pppppppp')

<re.Match object; span=(0, 10), match='?*pppppppp'>

In [8]:
from collections import defaultdict

def pat_match_with_seg(pattern, saying):
    if not pattern or not saying: return []
    
    pat = pattern[0]
    
    if is_variable(pat):
        return [(pat, saying[0])] + pat_match_with_seg(pattern[1:], saying[1:])
    elif is_pattern_segment(pat):
        match, index = segment_match(pattern, saying)
        if not match: return []
        return [match] + pat_match_with_seg(pattern[1:], saying[index:])
    elif pat == saying[0]:
        return pat_match_with_seg(pattern[1:], saying[1:])
    else:
        return []

In [9]:
def segment_match(pattern, saying):
    seg_pat, rest = pattern[0], pattern[1:]
    seg_pat = seg_pat.replace('?*', '?')

    if not rest: return (seg_pat, saying), len(saying)    
    
    for i, token in enumerate(saying):
        if rest[0] == token and is_match(rest[1:], saying[(i + 1):]):
            return (seg_pat, saying[:i]), i
    return (), -1

def is_match(rest, saying):
    if not rest and not saying:
        return True
    if not all(a.isalpha() for a in rest[0]):
        return True
    if rest[0] != saying[0]:
        return False
    return is_match(rest[1:], saying[1:])

In [10]:
segment_match('?*P is very good'.split(), "My dog and my cat is very good".split())

(('?P', ['My', 'dog', 'and', 'my', 'cat']), 5)

In [11]:
pat_match_with_seg('?*P is very good and ?*X'.split(), "My dog is very good and my cat is very cute".split())

[('?P', ['My', 'dog']), ('?X', ['my', 'cat', 'is', 'very', 'cute'])]

In [12]:
def pat_to_dict(patterns):
    return {k: ' '.join(v) if isinstance(v, list) else v for k, v in patterns}

In [13]:
subsitite("Why do you neeed ?X".split(), pat_to_dict(pat_match_with_seg('I need ?*X'.split(), 
                  "I need an iPhone".split())))

['Why', 'do', 'you', 'neeed', 'an iPhone']

# 问题1

In [14]:
defined_patterns = {
    "?*X hello ?*Y": ["Hi, how do you do?"],
    "I was ?*X": ["Were you really ?X ?", "I already knew you were ?X ."]
}
def get_response(saying, rules=defined_patterns):
    for pattern in rules.keys():
        patterns = pat_match_with_seg(pattern.split(), saying.split())
        if patterns:
            return ' '.join(subsitite(random.choice(rules[pattern]).split(), pat_to_dict(patterns)))
    return None

In [15]:
get_response('I was a ipone')

'I already knew you were a ipone .'

In [16]:
import jieba
def cut(sentence):
    return list(jieba.cut(sentence))

In [17]:
def pattern_split(pattern):
    if is_chinese(pattern):
        return chinese_split(pattern)
    else:
        return pattern.split(' ')
    
def sentence_split(sentence):
    if is_chinese(sentence):
        return cut(sentence)
    else:
        return sentence.split(' ')

def is_chinese(sentence):
    for c in sentence:
        if '\u4e00' <= c <= '\u9fa5':
            return True
    return False

def chinese_split(pattern):
    chinese_words = []
    placeholders = re.findall(r'\?[\*]?[A-Za-z]+', pattern)
    if placeholders:
        pattern_index = 0
        for placeholder in placeholders:
            p_list = pattern[pattern_index:].split(placeholder)
            chinese_words += cut(p_list[0])
            chinese_words.append(placeholder)
            pattern_index += len(p_list[0]) + len(placeholder)
        chinese_words += cut(pattern[pattern_index:])
    else:
        chinese_words.append(cut(pattern))
    return chinese_words

In [18]:
pattern_split('?*x你好?*y')

Building prefix dict from the default dictionary ...
Loading model from cache C:\Users\ZENGYU~1\AppData\Local\Temp\jieba.cache
Loading model cost 1.280 seconds.
Prefix dict has been built succesfully.


['?*x', '你好', '?*y']

In [19]:
sentence_split('我讨厌你')

['我', '讨厌', '你']

# 问题2

In [20]:
defined_patterns = {
    "?*X hello ?*Y": ["Hi, how do you do?"],
    "I was ?*X": ["Were you really ?X ?", "I already knew you were ?X ."],
    '?*x讨厌?*y': ['?y怎么会那么讨厌呢?', '?x讨厌?y的哪里？', '?y有什么不好呢？', '你不想要?y吗？']
}
def get_response(saying, rules=defined_patterns):
    for pattern in rules.keys():
        patterns = pat_match_with_seg(pattern_split(pattern), sentence_split(saying))
        if patterns:
            return ' '.join(subsitite(pattern_split(random.choice(rules[pattern])), pat_to_dict(patterns)))
    return None

In [21]:
get_response('I was a ipone')

'I already knew you were a ipone .'

In [22]:
get_response('小红、小明和小东讨厌一群小狗')

'你 不 想要 一群 小狗 吗 ？'

# 问题4