In [1]:
from time import time
import re
import string
import random

algorytm naiwny

In [2]:
def naive_string_matching(text, pattern, printt = True):
    t = time()
    for s in range(0, len(text) - len(pattern) + 1):
        if pattern == text[s:s + len(pattern)] and printt:
            print(f"Przesunięcie {s} jest poprawne")
    return time() - t

algorytm automatu skończonego

In [3]:
def fa_string_matching(text, pattern, printt = True):
    tt = time()
    q = 0
    delta = transition_table(pattern)

    t = time()
    
    for s in range(0, len(text)):
        q = delta[q].get(text[s], 0) # jesli nie ma znaku w tablicy przejscia to domyslnie wrzuca nam 0

        if (q == len(delta) - 1) and printt:
            print(f"Przesunięcie {s + 1 - q} jest poprawne")
            # s + 1 - ponieważ przeczytaliśmy znak o indeksie s, więc przesunięcie jest po tym znaku
    return time() - tt, time() - t 

def transition_table(pattern):
    result = []

    alphabet = set(pattern)

    for q in range(0, len(pattern) + 1):
        result.append({})
        for a in alphabet:
            k = min(len(pattern) + 1, q + 2)
            while True:
                k = k - 1
                if re.search(f"{pattern[:k]}$", pattern[:q] + a):
                    break
            result[q][a] = k
    return result

algorytm kmp (Knutha-Morrisa-Pratta)

In [4]:
def kmp_string_matching(text, pattern, printt = True):
    tt = time()
    pi = prefix_function(pattern)
    t = time()
    q = 0
    for i in range(0, len(text)):
        while q != 0 and pattern[q] != text[i]:
            q = pi[q - 1]

        if pattern[q] == text[i]:
            q = q + 1
        if q == len(pattern):
            if printt:
                print(f"Przesunięcie {i + 1 - q} jest poprawne")
            q = pi[q - 1]
    return time() - tt, time() - t

def prefix_function(pattern):
    pi = [0] * len(pattern)
    k = 0
    for q in range(1, len(pattern)):
        while k != 0 and pattern[k] != pattern[q]:
            k = pi[k - 1]

        if pattern[k] == pattern[q]:
            k = k + 1
        pi[q] = k
    return pi

Porównanie algorytmów

In [5]:
def speed_tests(text, pattern):
    print("naive:",naive_string_matching(text, pattern, False))
    print("fa")
    total, matching = fa_string_matching(text, pattern, False)
    print("total", total)
    print("matching:", matching)
    
    print("kmp")
    total, matching = kmp_string_matching(text, pattern, False)
    print("total", total)
    print("matching:", matching)

In [6]:
text = open("ustawa", encoding="utf8").read()
pattern = "art"

In [7]:
speed_tests(text, pattern)

naive: 0.050272226333618164
fa
total 0.047126054763793945
matching: 0.047126054763793945
kmp
total 0.039957284927368164
matching: 0.039957284927368164


In [8]:
#text = open("wiki.txt", encoding="utf8").read()
#pattern = "kruszwil"

In [9]:
#speed_tests(text, pattern)

In [None]:
text = ''.join(random.choice("abc") for i in range(1000000))
pattern = ''.join(random.choice("abc") for i in range(10000))
# print(text, pattern)
speed_tests(text, pattern)

naive: 0.4456470012664795
fa
