In [1]:
def build_bad_char_table(pattern):

    #构建坏字符表，该表记录模式字符串中每个字符最后出现的索引位置。
    
    bad_char_table = {}
    for index, char in enumerate(pattern):
        bad_char_table[char] = index
    return bad_char_table


In [None]:
def build_good_suffix_table(pattern):
    
    #构建好后缀表。
    
    length = len(pattern)
    good_suffix_table = [0] * length
    last_prefix_position = length

    # Process each position in the pattern
    for i in range(length):
        # Check if the suffix of pattern starting from i is a prefix of the whole pattern
        if is_prefix(pattern, i):
            last_prefix_position = i
        good_suffix_table[length - 1 - i] = last_prefix_position - i + length - 1

    # Handle case where the pattern is repeated
    for i in range(length - 1):
        len_suffix = suffix_length(pattern, i)
        good_suffix_table[len_suffix] = length - 1 - i + len_suffix

    return good_suffix_table

def is_prefix(pattern, p):
    
    #判断从位置p到模式末尾的子串是否是模式的前缀。
    
    length = len(pattern)
    j = 0
    while p + j < length:
        if pattern[p + j] != pattern[j]:
            return False
        j += 1
    return True

def suffix_length(pattern, p):
    
    #计算以位置p为右端点的后缀与模式的前缀的最长匹配长度。
    
    length = len(pattern)
    i = 0
    while p - i >= 0 and pattern[p - i] == pattern[length - 1 - i]:
        i += 1
    return i


In [None]:
def boyer_moore(text, pattern):
    
    #使用 Boyer-Moore 算法搜索文本中的所有模式出现位置。
    
    bad_char = build_bad_char_table(pattern)
    good_suffix = build_good_suffix_table(pattern)
    m = len(pattern)
    n = len(text)
    positions = []

    s = 0
    while(s <= n - m):
        j = m - 1
        while j >= 0 and pattern[j] == text[s + j]:
            j -= 1
        if j < 0:
            positions.append(s)
            s += (good_suffix[0] if s + m < n else 1)
        else:
            bad_char_shift = bad_char.get(text[s + j], -1)
            s += max(1, j - bad_char_shift, good_suffix[j])

    return positions
