Skip to content

Commit c416818

Browse files
first commit: added Knuth-Morris-Pratt and Z algorithms implementation
1 parent 6230a2b commit c416818

File tree

11 files changed

+414
-0
lines changed

11 files changed

+414
-0
lines changed
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
//: Playground - noun: a place where people can play
2+
3+
4+
func ZetaAlgorithm(ptnr: String) -> [Int]? {
5+
6+
let pattern = Array(ptnr.characters)
7+
let patternLength: Int = pattern.count
8+
9+
guard patternLength > 0 else {
10+
return nil
11+
}
12+
13+
var zeta: [Int] = [Int](count: patternLength, repeatedValue: 0)
14+
15+
var left: Int = 0
16+
var right: Int = 0
17+
var k_1: Int = 0
18+
var betaLength: Int = 0
19+
var textIndex: Int = 0
20+
var patternIndex: Int = 0
21+
22+
for k in 1 ..< patternLength {
23+
if k > right {
24+
patternIndex = 0
25+
26+
while k + patternIndex < patternLength &&
27+
pattern[k + patternIndex] == pattern[patternIndex] {
28+
patternIndex = patternIndex + 1
29+
}
30+
31+
zeta[k] = patternIndex
32+
33+
if zeta[k] > 0 {
34+
left = k
35+
right = k + zeta[k] - 1
36+
}
37+
} else {
38+
k_1 = k - left + 1
39+
betaLength = right - k + 1
40+
41+
if zeta[k_1 - 1] < betaLength {
42+
zeta[k] = zeta[k_1 - 1]
43+
} else if zeta[k_1 - 1] >= betaLength {
44+
textIndex = betaLength
45+
patternIndex = right + 1
46+
47+
while patternIndex < patternLength && pattern[textIndex] == pattern[patternIndex] {
48+
textIndex = textIndex + 1
49+
patternIndex = patternIndex + 1
50+
}
51+
zeta[k] = patternIndex - k
52+
left = k
53+
right = patternIndex - 1
54+
}
55+
}
56+
}
57+
return zeta
58+
}
59+
60+
extension String {
61+
62+
func indexesOf(ptnr: String) -> [Int]? {
63+
64+
let text = Array(self.characters)
65+
let pattern = Array(ptnr.characters)
66+
67+
let textLength: Int = text.count
68+
let patternLength: Int = pattern.count
69+
70+
guard patternLength > 0 else {
71+
return nil
72+
}
73+
74+
var suffixPrefix: [Int] = [Int](count: patternLength, repeatedValue: 0)
75+
var textIndex: Int = 0
76+
var patternIndex: Int = 0
77+
var indexes: [Int] = [Int]()
78+
79+
/* Pre-processing stage: computing the table for the shifts (through Z-Algorithm) */
80+
let zeta = ZetaAlgorithm(ptnr)
81+
82+
for patternIndex in (1 ..< patternLength).reverse() {
83+
textIndex = patternIndex + zeta![patternIndex] - 1
84+
suffixPrefix[textIndex] = zeta![patternIndex]
85+
}
86+
87+
/* Search stage: scanning the text for pattern matching */
88+
textIndex = 0
89+
patternIndex = 0
90+
91+
while textIndex + (patternLength - patternIndex - 1) < textLength {
92+
93+
while patternIndex < patternLength && text[textIndex] == pattern[patternIndex] {
94+
textIndex = textIndex + 1
95+
patternIndex = patternIndex + 1
96+
}
97+
98+
if patternIndex == patternLength {
99+
indexes.append(textIndex - patternIndex)
100+
}
101+
102+
if patternIndex == 0 {
103+
textIndex = textIndex + 1
104+
} else {
105+
patternIndex = suffixPrefix[patternIndex - 1]
106+
}
107+
}
108+
109+
guard !indexes.isEmpty else {
110+
return nil
111+
}
112+
return indexes
113+
}
114+
}
115+
116+
/* Examples */
117+
118+
let dna = "ACCCGGTTTTAAAGAACCACCATAAGATATAGACAGATATAGGACAGATATAGAGACAAAACCCCATACCCCAATATTTTTTTGGGGAGAAAAACACCACAGATAGATACACAGACTACACGAGATACGACATACAGCAGCATAACGACAACAGCAGATAGACGATCATAACAGCAATCAGACCGAGCGCAGCAGCTTTTAAGCACCAGCCCCACAAAAAACGACAATFATCATCATATACAGACGACGACACGACATATCACACGACAGCATA"
119+
dna.indexesOf("CATA") // [20, 64, 130, 140, 166, 234, 255, 270]
120+
121+
let concert = "🎼🎹🎹🎸🎸🎻🎻🎷🎺🎤👏👏👏"
122+
concert.indexesOf("🎻🎷") // [6]
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
2+
<playground version='5.0' target-platform='osx'>
3+
<timeline fileName='timeline.xctimeline'/>
4+
</playground>

Knuth-Morris-Pratt/KnuthMorrisPratt.playground/playground.xcworkspace/contents.xcworkspacedata

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<Timeline
3+
version = "3.0">
4+
<TimelineItems>
5+
</TimelineItems>
6+
</Timeline>
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
/* Knuth-Morris-Pratt algorithm for pattern/string matching
2+
3+
The code is based on the book:
4+
"Algorithms on String, Trees and Sequences: Computer Science and Computational Biology"
5+
by Dan Gusfield
6+
Cambridge University Press, 1997
7+
*/
8+
9+
import Foundation
10+
11+
extension String {
12+
13+
func indexesOf(ptnr: String) -> [Int]? {
14+
15+
let text = Array(self.characters)
16+
let pattern = Array(ptnr.characters)
17+
18+
let textLength: Int = text.count
19+
let patternLength: Int = pattern.count
20+
21+
guard patternLength > 0 else {
22+
return nil
23+
}
24+
25+
var suffixPrefix: [Int] = [Int](count: patternLength, repeatedValue: 0)
26+
var textIndex: Int = 0
27+
var patternIndex: Int = 0
28+
var indexes: [Int] = [Int]()
29+
30+
/* Pre-processing stage: computing the table for the shifts (through Z-Algorithm) */
31+
let zeta = ZetaAlgorithm(ptnr)
32+
33+
for patternIndex in (1 ..< patternLength).reverse() {
34+
textIndex = patternIndex + zeta![patternIndex] - 1
35+
suffixPrefix[textIndex] = zeta![patternIndex]
36+
}
37+
38+
/* Search stage: scanning the text for pattern matching */
39+
textIndex = 0
40+
patternIndex = 0
41+
42+
while textIndex + (patternLength - patternIndex - 1) < textLength {
43+
44+
while patternIndex < patternLength && text[textIndex] == pattern[patternIndex] {
45+
textIndex = textIndex + 1
46+
patternIndex = patternIndex + 1
47+
}
48+
49+
if patternIndex == patternLength {
50+
indexes.append(textIndex - patternIndex)
51+
}
52+
53+
if patternIndex == 0 {
54+
textIndex = textIndex + 1
55+
} else {
56+
patternIndex = suffixPrefix[patternIndex - 1]
57+
}
58+
}
59+
60+
guard !indexes.isEmpty else {
61+
return nil
62+
}
63+
return indexes
64+
}
65+
}

Z-Algorithm/ZAlgorithm.swift

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
/* Z-Algorithm for pattern/string pre-processing
2+
3+
The code is based on the book:
4+
"Algorithms on String, Trees and Sequences: Computer Science and Computational Biology"
5+
by Dan Gusfield
6+
Cambridge University Press, 1997
7+
*/
8+
9+
import Foundation
10+
11+
func ZetaAlgorithm(ptrn: String) -> [Int]? {
12+
13+
let pattern = Array(ptrn.characters)
14+
let patternLength: Int = pattern.count
15+
16+
guard patternLength > 0 else {
17+
return nil
18+
}
19+
20+
var zeta: [Int] = [Int](count: patternLength, repeatedValue: 0)
21+
22+
var left: Int = 0
23+
var right: Int = 0
24+
var k_1: Int = 0
25+
var betaLength: Int = 0
26+
var textIndex: Int = 0
27+
var patternIndex: Int = 0
28+
29+
for k in 1 ..< patternLength {
30+
if k > right {
31+
patternIndex = 0
32+
33+
while k + patternIndex < patternLength &&
34+
pattern[k + patternIndex] == pattern[patternIndex] {
35+
patternIndex = patternIndex + 1
36+
}
37+
38+
zeta[k] = patternIndex
39+
40+
if zeta[k] > 0 {
41+
left = k
42+
right = k + zeta[k] - 1
43+
}
44+
} else {
45+
k_1 = k - left + 1
46+
betaLength = right - k + 1
47+
48+
if zeta[k_1 - 1] < betaLength {
49+
zeta[k] = zeta[k_1 - 1]
50+
} else if zeta[k_1 - 1] >= betaLength {
51+
textIndex = betaLength
52+
patternIndex = right + 1
53+
54+
while patternIndex < patternLength && pattern[textIndex] == pattern[patternIndex] {
55+
textIndex = textIndex + 1
56+
patternIndex = patternIndex + 1
57+
}
58+
59+
zeta[k] = patternIndex - k
60+
left = k
61+
right = patternIndex - 1
62+
}
63+
}
64+
}
65+
return zeta
66+
}
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
//: Playground - noun: a place where people can play
2+
3+
4+
func ZetaAlgorithm(ptrn: String) -> [Int]? {
5+
6+
let pattern = Array(ptrn.characters)
7+
let patternLength: Int = pattern.count
8+
9+
guard patternLength > 0 else {
10+
return nil
11+
}
12+
13+
var zeta: [Int] = [Int](count: patternLength, repeatedValue: 0)
14+
15+
var left: Int = 0
16+
var right: Int = 0
17+
var k_1: Int = 0
18+
var betaLength: Int = 0
19+
var textIndex: Int = 0
20+
var patternIndex: Int = 0
21+
22+
for k in 1 ..< patternLength {
23+
if k > right {
24+
patternIndex = 0
25+
26+
while k + patternIndex < patternLength &&
27+
pattern[k + patternIndex] == pattern[patternIndex] {
28+
patternIndex = patternIndex + 1
29+
}
30+
31+
zeta[k] = patternIndex
32+
33+
if zeta[k] > 0 {
34+
left = k
35+
right = k + zeta[k] - 1
36+
}
37+
} else {
38+
k_1 = k - left + 1
39+
betaLength = right - k + 1
40+
41+
if zeta[k_1 - 1] < betaLength {
42+
zeta[k] = zeta[k_1 - 1]
43+
} else if zeta[k_1 - 1] >= betaLength {
44+
textIndex = betaLength
45+
patternIndex = right + 1
46+
47+
while patternIndex < patternLength && pattern[textIndex] == pattern[patternIndex] {
48+
textIndex = textIndex + 1
49+
patternIndex = patternIndex + 1
50+
}
51+
52+
zeta[k] = patternIndex - k
53+
left = k
54+
right = patternIndex - 1
55+
}
56+
}
57+
}
58+
return zeta
59+
}
60+
61+
62+
extension String {
63+
64+
func indexesOf(pattern: String) -> [Int]? {
65+
let patternLength: Int = pattern.characters.count
66+
let zeta = ZetaAlgorithm(pattern + "💲" + self)
67+
68+
guard zeta != nil else {
69+
return nil
70+
}
71+
72+
var indexes: [Int] = [Int]()
73+
74+
/* Scan the zeta array to find matched patterns */
75+
for i in 0 ..< zeta!.count {
76+
if zeta![i] == patternLength {
77+
indexes.append(i - patternLength - 1)
78+
}
79+
}
80+
81+
guard !indexes.isEmpty else {
82+
return nil
83+
}
84+
85+
return indexes
86+
}
87+
}
88+
89+
/* Examples */
90+
91+
let str = "Hello, playground!"
92+
str.indexesOf("ground") // [11]
93+
94+
let traffic = "🚗🚙🚌🚕🚑🚐🚗🚒🚚🚎🚛🚐🏎🚜🚗🏍🚒🚲🚕🚓🚌🚑"
95+
traffic.indexesOf("🚑") // [4, 21]
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
2+
<playground version='5.0' target-platform='osx'>
3+
<timeline fileName='timeline.xctimeline'/>
4+
</playground>

Z-Algorithm/ZetaAlgorithm.playground/playground.xcworkspace/contents.xcworkspacedata

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<Timeline
3+
version = "3.0">
4+
<TimelineItems>
5+
</TimelineItems>
6+
</Timeline>

0 commit comments

Comments
 (0)