Skip to content

Commit

Permalink
fixed union filtering with field masks
Browse files Browse the repository at this point in the history
  • Loading branch information
dvirsky committed Dec 11, 2016
1 parent 2bf78eb commit bc6a18d
Show file tree
Hide file tree
Showing 10 changed files with 89 additions and 65 deletions.
93 changes: 54 additions & 39 deletions src/index.c
Original file line number Diff line number Diff line change
Expand Up @@ -94,15 +94,17 @@ int IR_Read(void *ctx, IndexHit *e) {

// add tf-idf score of the entry to the hit
if (rc == INDEXREAD_OK) {
// LG_DEBUG("docId %d Flags 0x%x, field mask 0x%x, intersection: %x",
// e->docId, e->flags,
// ir->fieldMask, e->flags & ir->fieldMask);
// printf("docId %d Flags 0x%x, field mask 0x%x, intersection: %x\n",
// e->docId, e->flags,
// ir->fieldMask, e->flags & ir->fieldMask);
if (!(e->flags & ir->fieldMask)) {
// LG_DEBUG("Skipping %d", e->docId);
// pri/ntf("Skipping %d\n", e->docId);
return INDEXREAD_NOTFOUND;
}

e->totalFreq = tfidf(freq, ir->header.numDocs);
++ir->len;
// printf("hit %d\n", ir->len);
}
e->type = H_RAW;

Expand Down Expand Up @@ -187,9 +189,17 @@ int IR_SkipTo(void *ctx, u_int32_t docId, IndexHit *hit) {
return INDEXREAD_EOF;
}

size_t IR_NumDocs(void *ir) {
//printf("num docs: %d\n", ((IndexReader *)ir)->header.numDocs);
return (size_t)((IndexReader *)ir)->header.numDocs;
size_t IR_NumDocs(void *ctx) {
IndexReader *ir = ctx;

// in single word optimized mode we only know the size of the record from the
// header.
if (ir->singleWordMode) {
return ir->header.numDocs;
}

// otherwise we use our counter
return ir->len;
}

IndexReader *NewIndexReader(void *data, size_t datalen, SkipIndex *si,
Expand All @@ -210,6 +220,7 @@ IndexReader *NewIndexReaderBuf(Buffer *buf, SkipIndex *si, DocTable *dt,
ret->skipIdxPos = 0;
ret->skipIdx = NULL;
ret->docTable = dt;
ret->len = 0;
ret->singleWordMode = singleWordMode;
// only use score index on single words, no field filter and large entries
ret->useScoreIndex = sci != NULL && singleWordMode && fieldMask == 0xff &&
Expand Down Expand Up @@ -416,45 +427,54 @@ int UI_Read(void *ctx, IndexHit *hit) {
return 0;
}

int minIdx = -1;

int numActive = 0;
do {
// find the minimal iterator
t_docId minDocId = __UINT32_MAX__;
minIdx = -1;
int minIdx = -1;
numActive = 0;
int rc = INDEXREAD_EOF;
for (int i = 0; i < ui->num; i++) {
IndexIterator *it = ui->its[i];

if (it == NULL)
continue;

// if (it->HasNext(it->ctx)) {
// if this hit is behind the min id - read the next entry
if (ui->currentHits[i].docId <= ui->minDocId || ui->minDocId == 0) {
if (it->Read(it->ctx, &ui->currentHits[i]) != INDEXREAD_OK) {
continue;
rc = INDEXREAD_OK;
//if (it->HasNext(it->ctx)) {
// if this hit is behind the min id - read the next entry
if (ui->currentHits[i].docId <= ui->minDocId || ui->minDocId == 0) {
rc = INDEXREAD_NOTFOUND;
// read while we're not at the end and perhaps the flags do not match
while (rc == INDEXREAD_NOTFOUND) {
rc = it->Read(it->ctx, &ui->currentHits[i]);
}
}
}
if (ui->currentHits[i].docId < minDocId) {
minDocId = ui->currentHits[i].docId;
minIdx = i;
}
//}

if (rc != INDEXREAD_EOF) {
numActive++;
}

if (rc == INDEXREAD_OK && ui->currentHits[i].docId < minDocId) {
minDocId = ui->currentHits[i].docId;
minIdx = i;
}
// }

}

// take the minimum entry and yield it
if (minIdx != -1) {

// not found a new minimal docId
if (minIdx == -1) {
return INDEXREAD_EOF;
*hit = ui->currentHits[minIdx];
hit->type = H_UNION;
ui->minDocId = ui->currentHits[minIdx].docId;
ui->len++;
return INDEXREAD_OK;
}

*hit = ui->currentHits[minIdx];
hit->type = H_UNION;
ui->minDocId = ui->currentHits[minIdx].docId;

ui->len++;

return INDEXREAD_OK;

} while (minIdx >= 0);
} while (numActive > 0);

return INDEXREAD_EOF;
}
Expand Down Expand Up @@ -540,9 +560,7 @@ void UnionIterator_Free(IndexIterator *it) {
free(it);
}

size_t UI_Len(void *ctx) {
return ((UnionContext *)ctx)->len;
}
size_t UI_Len(void *ctx) { return ((UnionContext *)ctx)->len; }

void ReadIterator_Free(IndexIterator *it) {
if (it == NULL) {
Expand Down Expand Up @@ -752,7 +770,4 @@ int II_HasNext(void *ctx) {

t_docId II_LastDocId(void *ctx) { return ((IntersectContext *)ctx)->lastDocId; }


size_t II_Len(void *ctx) {
return ((IntersectContext *)ctx)->len;
}
size_t II_Len(void *ctx) { return ((IntersectContext *)ctx)->len; }
2 changes: 2 additions & 0 deletions src/index.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,8 @@ typedef struct indexReader {
ScoreIndex *scoreIndex;
int useScoreIndex;
u_char fieldMask;

size_t len;
} IndexReader;

/* An IndexWriter writes forward index entries to an index buffer */
Expand Down
4 changes: 2 additions & 2 deletions src/numeric_index.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ int numericFilter_Match(NumericFilter *f, double score) {
#define NUMERIC_INDEX_KEY_FMT "num:%s/%s"

RedisModuleString *fmtNumericIndexKey(RedisSearchCtx *ctx, const char *field) {
return RMUtil_CreateFormattedString(ctx->redisCtx, NUMERIC_INDEX_KEY_FMT, ctx->spec->name,
return RedisModule_CreateStringPrintf(ctx->redisCtx, NUMERIC_INDEX_KEY_FMT, ctx->spec->name,
field);
}

Expand All @@ -46,7 +46,7 @@ int NumerIndex_Add(NumericIndex *idx, t_docId docId, double score) {
if (idx->key == NULL) return REDISMODULE_ERR;

return RedisModule_ZsetAdd(idx->key, score,
RMUtil_CreateFormattedString(idx->ctx->redisCtx, "%u", docId), NULL);
RedisModule_CreateStringPrintf(idx->ctx->redisCtx, "%u", docId), NULL);
}

int NumericFilter_Read(void *ctx, IndexHit *e) {
Expand Down
3 changes: 2 additions & 1 deletion src/query.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,11 @@ IndexIterator *query_EvalLoadStage(Query *q, QueryStage *stage) {
// if there's only one word in the query and no special field filtering,
// and we are not paging beyond MAX_SCOREINDEX_SIZE
// we can just use the optimized score index

int isSingleWord = q->numTokens == 1 && q->root->nchildren == 1 &&
q->fieldMask == 0xff &&
q->offset + q->limit <= MAX_SCOREINDEX_SIZE;
//printf("singleword? %d, numTokens: %d, fields %x\n", isSingleWord, q->numTokens, q->fieldMask);

IndexReader *ir = Redis_OpenReader(q->ctx, stage->value, strlen(stage->value), q->docTable,
isSingleWord, q->fieldMask);
Expand Down
8 changes: 4 additions & 4 deletions src/redis_index.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,19 @@
*/
RedisModuleString *fmtRedisTermKey(RedisSearchCtx *ctx, const char *term,
size_t len) {
return RMUtil_CreateFormattedString(ctx->redisCtx, TERM_KEY_FORMAT,
return RedisModule_CreateStringPrintf(ctx->redisCtx, TERM_KEY_FORMAT,
ctx->spec->name, len, term);
}

RedisModuleString *fmtRedisSkipIndexKey(RedisSearchCtx *ctx, const char *term,
size_t len) {
return RMUtil_CreateFormattedString(ctx->redisCtx, SKIPINDEX_KEY_FORMAT,
return RedisModule_CreateStringPrintf(ctx->redisCtx, SKIPINDEX_KEY_FORMAT,
ctx->spec->name, len, term);
}

RedisModuleString *fmtRedisScoreIndexKey(RedisSearchCtx *ctx, const char *term,
size_t len) {
return RMUtil_CreateFormattedString(ctx->redisCtx, SCOREINDEX_KEY_FORMAT,
return RedisModule_CreateStringPrintf(ctx->redisCtx, SCOREINDEX_KEY_FORMAT,
ctx->spec->name, len, term);
}
/**
Expand Down Expand Up @@ -470,7 +470,7 @@ int Redis_DropIndex(RedisSearchCtx *ctx, int deleteDocuments) {
RedisModule_CloseKey(k);
}

RedisModuleString *dmd = RMUtil_CreateFormattedString(
RedisModuleString *dmd = RedisModule_CreateStringPrintf(
ctx->redisCtx, DOCTABLE_KEY_FMT, ctx->spec->name);
RedisModule_Call(ctx->redisCtx, "DEL", "cccs", REDISINDEX_DOCKEY_MAP,
REDISINDEX_DOCIDS_MAP, REDISINDEX_DOCIDCOUNTER, dmd);
Expand Down
29 changes: 15 additions & 14 deletions src/rmutil/strings.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,18 @@

#include "sds.h"

RedisModuleString *RMUtil_CreateFormattedString(RedisModuleCtx *ctx, const char *fmt, ...) {
sds s = sdsempty();
// RedisModuleString *RMUtil_CreateFormattedString(RedisModuleCtx *ctx, const char *fmt, ...) {
// sds s = sdsempty();

va_list ap;
va_start(ap, fmt);
s = sdscatvprintf(s, fmt, ap);
va_end(ap);
// va_list ap;
// va_start(ap, fmt);
// s = sdscatvprintf(s, fmt, ap);
// va_end(ap);

RedisModuleString *ret = RedisModule_CreateString(ctx, (const char *)s, sdslen(s));
sdsfree(s);
return ret;
}
// RedisModuleString *ret = RedisModule_CreateString(ctx, (const char *)s, sdslen(s));
// sdsfree(s);
// return ret;
// }

int RMUtil_StringEquals(RedisModuleString *s1, RedisModuleString *s2) {

Expand All @@ -26,8 +26,9 @@ int RMUtil_StringEquals(RedisModuleString *s1, RedisModuleString *s2) {
size_t l1, l2;
c1 = RedisModule_StringPtrLen(s1, &l1);
c2 = RedisModule_StringPtrLen(s2, &l2);

return strncasecmp(c1, c2, MIN(l1,l2)) == 0;
if (l1 != l2) return 0;

return strncmp(c1, c2, l1) == 0;
}

int RMUtil_StringEqualsC(RedisModuleString *s1, const char *s2) {
Expand All @@ -36,9 +37,9 @@ int RMUtil_StringEqualsC(RedisModuleString *s1, const char *s2) {
const char *c1;
size_t l1, l2 = strlen(s2);
c1 = RedisModule_StringPtrLen(s1, &l1);
if (l1 != l2) return 0;


return strncasecmp(c1, s2, MIN(l1,l2)) == 0;
return strncmp(c1, s2, l1) == 0;
}

void RMUtil_StringToLower(RedisModuleString *s) {
Expand Down
5 changes: 4 additions & 1 deletion src/rmutil/strings.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@
* Create a new RedisModuleString object from a printf-style format and arguments.
* Note that RedisModuleString objects CANNOT be used as formatting arguments.
*/
RedisModuleString *RMUtil_CreateFormattedString(RedisModuleCtx *ctx, const char *fmt, ...);
// DEPRECATED since it was added to the RedisModule API. Replaced with a macro below
//RedisModuleString *RMUtil_CreateFormattedString(RedisModuleCtx *ctx, const char *fmt, ...);
#define RMUtil_CreateFormattedString RedisModule_CreateStringPrintf


/* Return 1 if the two strings are equal. Case *sensitive* */
int RMUtil_StringEquals(RedisModuleString *s1, RedisModuleString *s2);
Expand Down
6 changes: 3 additions & 3 deletions src/spec.c
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ void IndexSpec_Free(IndexSpec *spec) {
/* Saves the spec as a LIST, containing basically the arguments needed to recreate the spec */
int IndexSpec_Save(RedisModuleCtx *ctx, IndexSpec *sp) {
RedisModuleKey *k =
RedisModule_OpenKey(ctx, RMUtil_CreateFormattedString(ctx, "idx:%s", sp->name),
RedisModule_OpenKey(ctx, RedisModule_CreateStringPrintf(ctx, "idx:%s", sp->name),
REDISMODULE_READ | REDISMODULE_WRITE);
if (k == NULL) {
return REDISMODULE_ERR;
Expand All @@ -107,7 +107,7 @@ int IndexSpec_Save(RedisModuleCtx *ctx, IndexSpec *sp) {
RedisModule_CreateString(ctx, sp->fields[i].name, strlen(sp->fields[i].name)));
if (sp->fields[i].type == F_FULLTEXT) {
RedisModule_ListPush(k, REDISMODULE_LIST_TAIL,
RMUtil_CreateFormattedString(ctx, "%f", sp->fields[i].weight));
RedisModule_CreateStringPrintf(ctx, "%f", sp->fields[i].weight));
} else {
RedisModule_ListPush(k, REDISMODULE_LIST_TAIL,
RedisModule_CreateString(ctx, NUMERIC_STR, strlen(NUMERIC_STR)));
Expand All @@ -123,7 +123,7 @@ int IndexSpec_Load(RedisModuleCtx *ctx, IndexSpec *sp, const char *name) {
sp->name = name;

RedisModuleCallReply *resp = RedisModule_Call(
ctx, "LRANGE", "scc", RMUtil_CreateFormattedString(ctx, "idx:%s", sp->name), "0", "-1");
ctx, "LRANGE", "scc", RedisModule_CreateStringPrintf(ctx, "idx:%s", sp->name), "0", "-1");
if (resp == NULL || RedisModule_CallReplyType(resp) != REDISMODULE_REPLY_ARRAY) {
return REDISMODULE_ERR;
}
Expand Down
3 changes: 2 additions & 1 deletion src/tests/test_index.c
Original file line number Diff line number Diff line change
Expand Up @@ -247,8 +247,9 @@ int testUnion() {
15, 16, 18, 20, 21, 24, 27, 30};
int i = 0;
while (ui->Read(ui->ctx, &h) != INDEXREAD_EOF) {
printf("%d <=> %d\n", h.docId, expected[i]);
ASSERT(h.docId == expected[i++]);
// printf("%d, ", h.docId);
//printf("%d, ", h.docId);
}
IW_Free(w);
IW_Free(w2);
Expand Down
1 change: 1 addition & 0 deletions src/tokenize.c
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ int _tokenize(TokenizerCtx *ctx) {
t.s = strndup(stem, sl);
t.type = DT_STEM;
t.len = sl;
t.fieldId = ctx->fieldId;
t.stringFreeable = 1;
if (ctx->tokenFunc(ctx->tokenFuncCtx, t) != 0) {
break;
Expand Down

0 comments on commit bc6a18d

Please sign in to comment.