Skip to content

Commit

Permalink
Split the occurrance table into a seperate data structure so that it …
Browse files Browse the repository at this point in the history
…can be shared between multiple StreamBMH contexts.
  • Loading branch information
FooBarWidget committed Apr 25, 2011
1 parent 45e3ac3 commit 8a268a9
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 13 deletions.
22 changes: 13 additions & 9 deletions StreamBoyerMooreHorspool.h
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,10 @@ typedef unsigned short sbmh_size_t;

typedef void (*sbmh_data_cb)(const struct StreamBMH *ctx, const unsigned char *data, size_t len);

struct StreamBMH_Occ {
sbmh_size_t occ[256];
};

struct StreamBMH {
/***** Public but read-only fields *****/
bool found;
Expand All @@ -195,7 +199,6 @@ struct StreamBMH {

/***** Internal fields, do not access. *****/
sbmh_size_t lookbehind_size;
sbmh_size_t occ[256];
// Algorithm uses at most needle_len - 1 bytes of space in lookbehind buffer.
unsigned char lookbehind[];
};
Expand Down Expand Up @@ -229,8 +232,8 @@ sbmh_reset(struct StreamBMH *restrict ctx) {
}

inline void
sbmh_init(struct StreamBMH *restrict ctx, const unsigned char *restrict needle,
sbmh_size_t needle_len)
sbmh_init(struct StreamBMH *restrict ctx, struct StreamBMH_Occ *restrict occ,
const unsigned char *restrict needle, sbmh_size_t needle_len)
{
sbmh_size_t i;
unsigned int j;
Expand All @@ -242,15 +245,15 @@ sbmh_init(struct StreamBMH *restrict ctx, const unsigned char *restrict needle,

/* Initialize occurrance table. */
for (j = 0; j < 256; j++) {
ctx->occ[j] = needle_len;
occ->occ[j] = needle_len;
}

/* Populate occurance table with analysis of the needle,
* ignoring last letter.
*/
if (needle_len >= 1) {
for (i = 0; i < needle_len - 1; i++) {
ctx->occ[needle[i]] = needle_len - 1 - i;
occ->occ[needle[i]] = needle_len - 1 - i;
}
}
}
Expand All @@ -268,7 +271,8 @@ sbmh_lookup_char(const struct StreamBMH *restrict ctx,

inline bool
sbmh_memcmp(const struct StreamBMH *restrict ctx,
const unsigned char *restrict needle, const unsigned char *restrict data,
const unsigned char *restrict needle,
const unsigned char *restrict data,
ssize_t pos, sbmh_size_t len)
{
ssize_t i = 0;
Expand All @@ -287,7 +291,7 @@ sbmh_memcmp(const struct StreamBMH *restrict ctx,
}

inline size_t
sbmh_feed(struct StreamBMH *restrict ctx,
sbmh_feed(struct StreamBMH *restrict ctx, const struct StreamBMH_Occ *restrict occtable,
const unsigned char *restrict needle, sbmh_size_t needle_len,
const unsigned char *restrict data, size_t len)
{
Expand All @@ -304,7 +308,7 @@ sbmh_feed(struct StreamBMH *restrict ctx,
*/
ssize_t pos = -ctx->lookbehind_size;
unsigned char last_needle_char = needle[needle_len - 1];
const sbmh_size_t *occ = ctx->occ;
const sbmh_size_t *occ = occtable->occ;

if (pos < 0) {
SBMH_DEBUG2("[sbmh] considering lookbehind: (%s)(%s)\n",
Expand Down Expand Up @@ -340,7 +344,7 @@ sbmh_feed(struct StreamBMH *restrict ctx,
int(pos + needle_len));
return pos + needle_len;
} else {
pos += ctx->occ[ch];
pos += occ[ch];
}
}

Expand Down
10 changes: 6 additions & 4 deletions StreamTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,16 @@ namespace tut {

int find(const string &needle, const string &haystack) {
StreamBMH *ctx = (StreamBMH *) alloca(SBMH_SIZE(needle.size()));
StreamBMH_Occ occ;

unmatched_data.clear();
lookbehind.clear();

sbmh_init(ctx, (const unsigned char *) needle.c_str(), needle.size());
sbmh_init(ctx, &occ, (const unsigned char *) needle.c_str(), needle.size());
ctx->callback = append_unmatched_data;
ctx->user_data = this;

size_t analyzed = sbmh_feed(ctx,
size_t analyzed = sbmh_feed(ctx, &occ,
(const unsigned char *) needle.c_str(), needle.size(),
(const unsigned char *) haystack.c_str(), haystack.size());
lookbehind.assign((const char *) ctx->lookbehind, ctx->lookbehind_size);
Expand All @@ -42,17 +43,18 @@ namespace tut {

int feed_in_chunks_and_find(const string &needle, const string &haystack, int chunkSize = 1) {
StreamBMH *ctx = (StreamBMH *) alloca(SBMH_SIZE(needle.size()));
StreamBMH_Occ occ;

unmatched_data.clear();
lookbehind.clear();

sbmh_init(ctx, (const unsigned char *) needle.c_str(), needle.size());
sbmh_init(ctx, &occ, (const unsigned char *) needle.c_str(), needle.size());
ctx->callback = append_unmatched_data;
ctx->user_data = this;

size_t analyzed = 0;
for (string::size_type i = 0; i < haystack.size(); i += chunkSize) {
analyzed += sbmh_feed(ctx,
analyzed += sbmh_feed(ctx, &occ,
(const unsigned char *) needle.c_str(), needle.size(),
(const unsigned char *) haystack.c_str() + i,
std::min((int) chunkSize, (int) (haystack.size() - i)));
Expand Down

0 comments on commit 8a268a9

Please sign in to comment.