-
Notifications
You must be signed in to change notification settings - Fork 3
/
Algorithms.py
157 lines (128 loc) · 4.46 KB
/
Algorithms.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import time
import pickle
"""
This is the python file that contains all the important algorithms for our project
The brute_force_search, knuth_morris_pratt and rabin_karp search functions can be found below
All of these functions keep track of the time elapsed and the number of times the query appears in the genome
sequence. It also keeps track of the number of character comparisons made by these 3 algorithms and stores
the length of the genome sequence, length of the query and the number of character comparisons in a binary file
"""
# Implementation of the brute force algorithm
def brute_force_search(sequence, pattern):
comparisons = 0
positions = []
count = 0
start = time.time()
for i in range(len(sequence) - len(pattern) + 1):
t = 0
for j in range(len(pattern)):
comparisons += 1
if sequence[i + j] != pattern[j]:
break
else:
t = t + 1
if t == len(pattern):
count += 1
positions.append(i + 1)
end = time.time()
time_elapsed = end - start
file = "brute_force_observations.pkl"
dump_observations(file, sequence, pattern, comparisons)
return positions, count, time_elapsed, comparisons
# Implementation of the Knuth Morris Pratt algorithm
def knuth_morris_pratt(sequence, pattern):
comparisons = 0
positions = []
i = 0
j = 0
count = 0
start = time.time()
pi_table = kmp_preprocess(pattern)
while i < len(sequence):
comparisons += 1
if sequence[i] == pattern[j]:
i += 1
j += 1
if j == len(pattern):
positions.append((i - j + 1))
j = pi_table[j - 1]
count += 1
elif i < len(sequence) and sequence[i] != pattern[j]:
if j == 0:
i += 1
else:
j = pi_table[j - 1]
end = time.time()
time_elapsed = end - start
file = "knuth_morris_pratt_observations.pkl"
dump_observations(file, sequence, pattern, comparisons)
return positions, count, time_elapsed, comparisons
# This function calculates the pi table which is used in the KMP Algorithm
def kmp_preprocess(pattern):
i = 1
j = 0
pi_table = [0] * len(pattern)
while i < len(pattern):
if pattern[i] == pattern[j]:
j += 1
pi_table[i] = j
i += 1
else:
if j == 0:
pi_table[i] = 0
i += 1
else:
j = pi_table[j - 1]
return pi_table
# Implementation of the Rabin Karp algorithm
def rabin_karp(sequence, pattern):
comparisons = 0
n = len(sequence)
m = len(pattern)
q = 101
base = 5
x = 1
index = []
hash_q = 0
hash_g = 0
start = time.time()
for i in range(0, m - 1):
x = (x * base) % q
for i in range(0, m):
hash_g = (base * hash_g + ord(sequence[i])) % q
hash_q = (base * hash_q + ord(pattern[i])) % q
for i in range(0, (n - m + 1)):
comparisons += 1
if hash_q == hash_g:
for j in range(0, m):
comparisons += 1
if pattern[j] != sequence[i + j]:
break
if j == m - 1:
index.append(i + 1)
if i < n - m:
hash_g = (base * (hash_g - ord(sequence[i]) * x) + ord(sequence[i + m])) % q
if hash_g < 0:
hash_g = hash_g + q
end = time.time()
time_elapsed = end - start
file = "rabin_karp_observations.pkl"
dump_observations(file, sequence, pattern, comparisons)
return index, len(index), time_elapsed, comparisons
# This function loads all our previous observations into the program
def load_observations(filename):
with open(filename, "rb") as f:
observations = pickle.load(f)
f.close()
return observations
# This function appends the records with the new record
def dump_observations(file, sequence, pattern, comparisons):
try:
observations = load_observations(file)
observations.append((len(sequence), len(pattern), comparisons))
except Exception as e:
print(e)
observations = [(len(sequence), len(pattern), comparisons)]
with open(file, 'wb') as f:
pickle.dump(list(observations), f)
f.close()