In [1]:
s = "imad"

In [2]:
s[0:0], s[1:0]

('', '')

In [3]:
def find_brute_force(T, P):
    n, m = len(T), len(P)
    if m == 0:
        return 0
    for i in range(n - m + 1):
        j = 0
        while j < m and T[i + j] == P[j]:
            j += 1
        if j == m:
            return i
    return -1

The running time is $O(nm)$. However, for most applications it is proportional to $n$.

In [4]:
def find_brute_force_v1(T, P):
    n, m = len(T), len(P)
    if m == 0:
        return 0
    i = j = 0
    while i < n and j < m:
        if T[i] == P[j]:
            j += 1
        else:
            i -= j
            j = 0
        i += 1
    if j == m:
        return i - m
    return -1

In [5]:
T = "abacaabaccabacabaabb"
P = "abacab"

In [6]:
find_brute_force(T, P)

10

In [7]:
find_brute_force_v1(T, P)

10

In [8]:
def find_boyer_moore(T, P):
    n, m = len(T), len(P)
    if m == 0:
        return 0
    i = k = m - 1
    last = {k:i for i, k in enumerate(P)}
    while i < n:
        if T[i] == P[k]:
            if k == 0:
                return i
            else:
                i -= 1
                k -= 1
        else:
            j = last.get(T[i], -1)
            i += m - min(k, j + 1)
            k = m - 1
    return -1

In [9]:
find_boyer_moore(T, P)

10

It is still has $O(nm)$ in worst-case; however, for most application it is proportional $O(n + m)$.

In [10]:
def compute_lps_table(P):
    """Computes the longest prefix suffix table for KMP algorithm."""
    m = len(P)
    lps_table = [0] * m
    k = 0
    j = 1
    while j < m:
        # match found
        if P[j] == P[k]:
            lps_table[j] = k + 1
            j += 1
            k += 1
        # k follows matching prefix
        elif k > 0:
            k = lps_table[k - 1]
        # no match found at index j
        else:
            j += 1
    return lps_table

In [13]:
def find_kmp(T, P):
    n, m = len(T), len(P)
    if m == 0:
        return 0
    lps_table = compute_lps_table(P)
    j = k = 0
    while j < n:
        if T[j] == P[k]:
            if k == m - 1:
                return j - m + 1
            j += 1
            k += 1
        elif k > 0:
            k = lps_table[k - 1]
        else:
            j += 1
    return -1

In [14]:
find_kmp(T, P)

10

The running time is $O(n + m)$ which is the best any algorithm can achieve since we still have to compare every character at least once in the worst-case.