### Horspool's Algorithms for String Matching

In [9]:
# Horspool's Algorithms for String Matching
def ShiftTable(P):
    m = len(P)
    table = {}
    alphabet = set(P)
    for char in alphabet:
        table[char] = m
    for j in range(m-1):
        table[P[j]] = m - 1 - j
    return table

def HorspoolMatching(P, T):
    shift_table = ShiftTable(P)
    m = len(P)
    n = len(T)
    i = m - 1 # i is the index of the last character of the pattern
    while i < n:
        k = 0
        while k < m and P[m-1-k] == T[i-k]:
            k += 1
        if k == m:
            return i - m + 1
        else:
            i += shift_table.get(T[i], m)
            
    return -1

In [11]:
### Test Case 1 - Horspool's Algorithm ###
Pattern = "BAOBAB"
Text = "BESS_KNEW_ABOUT_BAOBABS"

print(HorspoolMatching(Pattern, Text))

16


In [12]:
### Test Case 2 - Horspool's Algorithm ###
Pattern = "ABABCABAB"
Text = "ABABDABACDABABCABAB"

print(HorspoolMatching(Pattern, Text))

10


---

### Boyer-Moore's Algorithm

ข้อสังเกตคือ Boyer-Moore มี Algorithm ที่เหมือนกับ Horspool's เพียงแค่เปลี่ยนค่าในการ Shift หนึ่งรอบให้เป็นไปตาม Bad-symbol shift กับ The good-suffix shift

Reference : https://www.geeksforgeeks.org/boyer-moore-algorithm-for-pattern-searching/

In [4]:
# Boyer-Moore's Algorithm for String Matching

def HorspoolTable(P):
    m = len(P)
    table = {}
    alphabet = set(P)
    for char in alphabet:
        table[char] = m
    for j in range(m - 1):
        table[P[j]] = m - 1 - j
    return table

def BadCharacterTable(P):
    m = len(P)
    table = {}
    alphabet = set(P)
    for char in alphabet:
        table[char] = -1
    for j in range(m):
        table[P[j]] = j
    return table

def GoodSuffixTable(P):
    m = len(P)
    table = [0] * (m + 1)
    i = m
    j = m + 1
    table[i] = j
    while i > 0:
        while j <= m and P[i - 1] != P[j - 1]:
            if table[j] == 0:
                table[j] = j - i
            j = table[j]
        i -= 1
        j -= 1
        table[i] = j
    return table

def BoyerMooreMatching(P, T):
    m = len(P)
    n = len(T)
    bad_character = BadCharacterTable(P)
    good_suffix = GoodSuffixTable(P)
    i = m - 1
    while i < n:
        k = 0
        while k < m and P[m - 1 - k] == T[i - k]:
            k += 1
        if k == m:
            return i - m + 1
        else:
            bad_char_skip = k - bad_character.get(T[i], 0)
            good_suffix_skip = good_suffix[k]

            i += max(bad_char_skip, good_suffix_skip)

    return -1

In [7]:
### Test Case 1 -  ###
Pattern = "ABOUT"
Text = "BESS_KNEW_ABOUT_BAOBABS"

print(BoyerMooreMatching(Pattern, Text))

10


In [None]:
### Test Case 2 - 