Skip to content

Commit

Permalink
Implement multi-type indexing
Browse files Browse the repository at this point in the history
This allows the same field to be indexed in different ways depending on
user input. This is still WIP because it's missing a proper interface.

This commit also significantly overhauls the way fields are handled
because a single field can carry multiple "types".

Currently, multitype fields cannot be sortable due to the ambiguity of
the sorting preference (e.g. is `1` lexically sorted or numerically so?)
  • Loading branch information
mnunberg committed Mar 18, 2019
1 parent f085680 commit 77ec9c0
Show file tree
Hide file tree
Showing 27 changed files with 683 additions and 615 deletions.
26 changes: 13 additions & 13 deletions src/cpptests/t_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -733,47 +733,47 @@ TEST_F(IndexTest, testIndexSpec) {

const FieldSpec *f = IndexSpec_GetField(s, body, strlen(body));
ASSERT_TRUE(f != NULL);
ASSERT_TRUE(f->type == FIELD_FULLTEXT);
ASSERT_TRUE(strcmp(f->name, body) == 0);
ASSERT_TRUE(f->textOpts.weight == 2.0);
ASSERT_TRUE(FIELD_IS(f, INDEXFLD_T_FULLTEXT));
ASSERT_STREQ(f->name, body);
ASSERT_EQ(f->ftWeight, 2.0);
ASSERT_EQ(FIELD_BIT(f), 2);
ASSERT_TRUE(f->options == 0);
ASSERT_TRUE(f->sortIdx == -1);
ASSERT_EQ(f->options, 0);
ASSERT_EQ(f->sortIdx, -1);

f = IndexSpec_GetField(s, title, strlen(title));
ASSERT_TRUE(f != NULL);
ASSERT_TRUE(f->type == FIELD_FULLTEXT);
ASSERT_TRUE(FIELD_IS(f, INDEXFLD_T_FULLTEXT));
ASSERT_TRUE(strcmp(f->name, title) == 0);
ASSERT_TRUE(f->textOpts.weight == 0.1);
ASSERT_TRUE(f->ftWeight == 0.1);
ASSERT_TRUE(FIELD_BIT(f) == 1);
ASSERT_TRUE(f->options == 0);
ASSERT_TRUE(f->sortIdx == -1);

f = IndexSpec_GetField(s, foo, strlen(foo));
ASSERT_TRUE(f != NULL);
ASSERT_TRUE(f->type == FIELD_FULLTEXT);
ASSERT_TRUE(FIELD_IS(f, INDEXFLD_T_FULLTEXT));
ASSERT_TRUE(strcmp(f->name, foo) == 0);
ASSERT_TRUE(f->textOpts.weight == 1);
ASSERT_TRUE(f->ftWeight == 1);
ASSERT_TRUE(FIELD_BIT(f) == 4);
ASSERT_TRUE(f->options == FieldSpec_Sortable);
ASSERT_TRUE(f->sortIdx == 0);

f = IndexSpec_GetField(s, bar, strlen(bar));
ASSERT_TRUE(f != NULL);
ASSERT_TRUE(f->type == FIELD_NUMERIC);
ASSERT_TRUE(FIELD_IS(f, INDEXFLD_T_NUMERIC));

ASSERT_TRUE(strcmp(f->name, bar) == 0);
ASSERT_TRUE(f->textOpts.weight == 0);
ASSERT_TRUE(f->ftWeight == 0);
ASSERT_TRUE(FIELD_BIT(f) == 1);
ASSERT_TRUE(f->options == FieldSpec_Sortable);
ASSERT_TRUE(f->sortIdx == 1);
ASSERT_TRUE(IndexSpec_GetField(s, "fooz", 4) == NULL);

f = IndexSpec_GetField(s, name, strlen(name));
ASSERT_TRUE(f != NULL);
ASSERT_TRUE(f->type == FIELD_FULLTEXT);
ASSERT_TRUE(FIELD_IS(f, INDEXFLD_T_FULLTEXT));
ASSERT_TRUE(strcmp(f->name, name) == 0);
ASSERT_TRUE(f->textOpts.weight == 1);
ASSERT_TRUE(f->ftWeight == 1);
ASSERT_TRUE(FIELD_BIT(f) == 8);
ASSERT_TRUE(f->options == FieldSpec_NoStemming);
ASSERT_TRUE(f->sortIdx == -1);
Expand Down
107 changes: 76 additions & 31 deletions src/cpptests/t_llapi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,46 @@ TEST_F(LLApiTest, testGetVersion) {
ASSERT_EQ(RediSearch_GetCApiVersion(), REDISEARCH_CAPI_VERSION);
}

static std::vector<std::string> getResults(RSIndex* index, RSQueryNode* qn,
bool expectEmpty = false) {
std::vector<std::string> ret;
auto it = RediSearch_GetResultsIterator(qn, index);
if (expectEmpty) {
EXPECT_TRUE(it == NULL);
} else {
EXPECT_FALSE(it == NULL);
}

if (!it) {
goto done;
}

while (true) {
size_t n = 0;
auto cur = RediSearch_ResultsIteratorNext(it, index, &n);
if (cur == NULL) {
break;
}
ret.push_back(std::string((const char*)cur, n));
}

done:
if (it) {
RediSearch_ResultsIteratorFree(it);
}
return ret;
}

TEST_F(LLApiTest, testAddDocumentTextField) {
// creating the index
RSIndex* index = RediSearch_CreateIndex("index", NULL, NULL);

// adding text field to the index
RediSearch_CreateTextField(index, FIELD_NAME_1);
RediSearch_CreateField(index, FIELD_NAME_1, RSFLDTYPE_FULLTEXT, RSFLDOPT_NONE);

// adding document to the index
RSDoc* d = RediSearch_CreateDocument(DOCID1, strlen(DOCID1), 1.0, NULL);
RediSearch_DocumentAddTextFieldC(d, FIELD_NAME_1, "some test to index");
RediSearch_DocumentAddFieldCString(d, FIELD_NAME_1, "some test to index", RSFLDTYPE_DEFAULT);
RediSearch_SpecAddDocument(index, d);

// searching on the index
Expand Down Expand Up @@ -68,12 +98,14 @@ TEST_F(LLApiTest, testAddDocumentTextField) {
ASSERT_FALSE(iter);

// adding another text field
RediSearch_CreateTextField(index, FIELD_NAME_2);
RediSearch_CreateField(index, FIELD_NAME_2, RSFLDTYPE_FULLTEXT, RSFLDOPT_NONE);

// adding document to the index with both fields
d = RediSearch_CreateDocument(DOCID2, strlen(DOCID2), 1.0, NULL);
RediSearch_DocumentAddTextFieldC(d, FIELD_NAME_1, "another indexing testing");
RediSearch_DocumentAddTextFieldC(d, FIELD_NAME_2, "another indexing testing");
RediSearch_DocumentAddFieldCString(d, FIELD_NAME_1, "another indexing testing",
RSFLDTYPE_DEFAULT);
RediSearch_DocumentAddFieldCString(d, FIELD_NAME_2, "another indexing testing",
RSFLDTYPE_DEFAULT);
RediSearch_SpecAddDocument(index, d);

// test prefix search, should return both documents now
Expand Down Expand Up @@ -115,7 +147,7 @@ TEST_F(LLApiTest, testAddDocumentTextField) {
RediSearch_DropIndex(index);
}

TEST_F(LLApiTest, testAddDocumetNumericField) {
TEST_F(LLApiTest, testAddDocumentNumericField) {
// creating the index
RSIndex* index = RediSearch_CreateIndex("index", NULL, NULL);

Expand All @@ -124,12 +156,13 @@ TEST_F(LLApiTest, testAddDocumetNumericField) {

// adding document to the index
RSDoc* d = RediSearch_CreateDocument(DOCID1, strlen(DOCID1), 1.0, NULL);
RediSearch_DocumentAddNumericField(d, NUMERIC_FIELD_NAME, 20);
RediSearch_DocumentAddFieldNumber(d, NUMERIC_FIELD_NAME, 20, RSFLDTYPE_DEFAULT);
RediSearch_SpecAddDocument(index, d);

// searching on the index
RSQNode* qn = RediSearch_CreateNumericNode(index, NUMERIC_FIELD_NAME, 30, 10, 0, 0);
RSResultsIterator* iter = RediSearch_GetResultsIterator(qn, index);
ASSERT_TRUE(iter != NULL);

size_t len;
const char* id = (const char*)RediSearch_ResultsIteratorNext(iter, index, &len);
Expand All @@ -151,7 +184,7 @@ TEST_F(LLApiTest, testAddDocumetTagField) {
// adding document to the index
#define TAG_VALUE "tag_value"
RSDoc* d = RediSearch_CreateDocument(DOCID1, strlen(DOCID1), 1.0, NULL);
RediSearch_DocumentAddTextFieldC(d, TAG_FIELD_NAME1, TAG_VALUE);
RediSearch_DocumentAddFieldCString(d, TAG_FIELD_NAME1, TAG_VALUE, RSFLDTYPE_DEFAULT);
RediSearch_SpecAddDocument(index, d);

// searching on the index
Expand Down Expand Up @@ -186,33 +219,24 @@ TEST_F(LLApiTest, testAddDocumetTagField) {
TEST_F(LLApiTest, testPhoneticSearch) {
// creating the index
RSIndex* index = RediSearch_CreateIndex("index", NULL, NULL);
RSField* f = RediSearch_CreateTextField(index, FIELD_NAME_1);
RediSearch_TextFieldPhonetic(f, index);

// creating none phonetic field
RediSearch_CreateTextField(index, FIELD_NAME_2);
RediSearch_CreateField(index, FIELD_NAME_1, RSFLDTYPE_FULLTEXT, RSFLDOPT_TXTPHONETIC);
RediSearch_CreateField(index, FIELD_NAME_2, RSFLDTYPE_FULLTEXT, RSFLDOPT_NONE);

RSDoc* d = RediSearch_CreateDocument(DOCID1, strlen(DOCID1), 1.0, NULL);
RediSearch_DocumentAddTextFieldC(d, FIELD_NAME_1, "felix");
RediSearch_DocumentAddTextFieldC(d, FIELD_NAME_2, "felix");
RediSearch_DocumentAddFieldCString(d, FIELD_NAME_1, "felix", RSFLDTYPE_DEFAULT);
RediSearch_DocumentAddFieldCString(d, FIELD_NAME_2, "felix", RSFLDTYPE_DEFAULT);
RediSearch_SpecAddDocument(index, d);

// make sure phonetic search works on field1
RSQNode* qn = RediSearch_CreateTokenNode(index, FIELD_NAME_1, "phelix");
RSResultsIterator* iter = RediSearch_GetResultsIterator(qn, index);

size_t len;
const char* id = (const char*)RediSearch_ResultsIteratorNext(iter, index, &len);
ASSERT_STREQ(id, DOCID1);
id = (const char*)RediSearch_ResultsIteratorNext(iter, index, &len);
ASSERT_STREQ(id, NULL);

RediSearch_ResultsIteratorFree(iter);
auto res = getResults(index, qn);
ASSERT_EQ(1, res.size());
ASSERT_EQ(DOCID1, res[0]);

// make sure phonetic search on field2 do not return results
qn = RediSearch_CreateTokenNode(index, FIELD_NAME_2, "phelix");
iter = RediSearch_GetResultsIterator(qn, index);
ASSERT_FALSE(iter);
res = getResults(index, qn, true);
ASSERT_EQ(0, res.size());
RediSearch_DropIndex(index);
}

Expand All @@ -227,7 +251,7 @@ TEST_F(LLApiTest, testMassivePrefix) {
sprintf(buff, "doc%d", i);
RSDoc* d = RediSearch_CreateDocument(buff, strlen(buff), 1.0, NULL);
sprintf(buff, "tag-%d", i);
RediSearch_DocumentAddTextFieldC(d, TAG_FIELD_NAME1, buff);
RediSearch_DocumentAddFieldCString(d, TAG_FIELD_NAME1, buff, RSFLDTYPE_DEFAULT);
RediSearch_SpecAddDocument(index, d);
}

Expand Down Expand Up @@ -257,7 +281,7 @@ TEST_F(LLApiTest, testRanges) {
char did[64];
sprintf(did, "doc%c", c);
RSDoc* d = RediSearch_CreateDocument(did, strlen(did), 0, NULL);
RediSearch_DocumentAddTextFieldC(d, FIELD_NAME_1, buf);
RediSearch_DocumentAddFieldCString(d, FIELD_NAME_1, buf, RSFLDTYPE_DEFAULT);
RediSearch_SpecAddDocument(index, d);
}

Expand Down Expand Up @@ -311,7 +335,7 @@ TEST_F(LLApiTest, testMassivePrefixWithUnsortedSupport) {
sprintf(buff, "doc%d", i);
RSDoc* d = RediSearch_CreateDocument(buff, strlen(buff), 1.0, NULL);
sprintf(buff, "tag-%d", i);
RediSearch_DocumentAddTextFieldC(d, TAG_FIELD_NAME1, buff);
RediSearch_DocumentAddFieldCString(d, TAG_FIELD_NAME1, buff, RSFLDTYPE_DEFAULT);
RediSearch_SpecAddDocument(index, d);
}

Expand Down Expand Up @@ -343,9 +367,9 @@ TEST_F(LLApiTest, testPrefixIntersection) {
sprintf(buff, "doc%d", i);
RSDoc* d = RediSearch_CreateDocument(buff, strlen(buff), 1.0, NULL);
sprintf(buff, "tag1-%d", i);
RediSearch_DocumentAddTextFieldC(d, TAG_FIELD_NAME1, buff);
RediSearch_DocumentAddFieldCString(d, TAG_FIELD_NAME1, buff, RSFLDTYPE_DEFAULT);
sprintf(buff, "tag2-%d", i);
RediSearch_DocumentAddTextFieldC(d, TAG_FIELD_NAME2, buff);
RediSearch_DocumentAddFieldCString(d, TAG_FIELD_NAME2, buff, RSFLDTYPE_DEFAULT);
RediSearch_SpecAddDocument(index, d);
}

Expand All @@ -371,3 +395,24 @@ TEST_F(LLApiTest, testPrefixIntersection) {
RediSearch_ResultsIteratorFree(iter);
RediSearch_DropIndex(index);
}

TEST_F(LLApiTest, testMultitype) {
RSIndex* index = RediSearch_CreateIndex("index", NULL, NULL);
auto* f = RediSearch_CreateField(index, "f1", RSFLDTYPE_FULLTEXT, RSFLDOPT_NONE);
ASSERT_TRUE(f != NULL);
f = RediSearch_CreateField(index, "f2", RSFLDTYPE_FULLTEXT | RSFLDTYPE_TAG | RSFLDTYPE_NUMERIC,
RSFLDOPT_NONE);

// Add document...
RSDoc* d = RediSearch_CreateDocumentSimple("doc1");
RediSearch_DocumentAddFieldCString(d, "f1", "hello", RSFLDTYPE_FULLTEXT);
RediSearch_DocumentAddFieldCString(d, "f2", "world", RSFLDTYPE_FULLTEXT | RSFLDTYPE_TAG);
RediSearch_SpecAddDocument(index, d);

// Done
// Now search for them...
auto qn = RediSearch_CreateTokenNode(index, "f1", "hello");
auto results = getResults(index, qn);
ASSERT_EQ(1, results.size());
ASSERT_EQ("doc1", results[0]);
}
13 changes: 7 additions & 6 deletions src/debug_commads.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,14 @@ static void ReplyReaderResults(IndexReader *reader, RedisModuleCtx *ctx) {
ReadIterator_Free(iter);
}

static RedisModuleString *getFieldKeyName(IndexSpec *spec, RedisModuleString *fieldNameRS) {
static RedisModuleString *getFieldKeyName(IndexSpec *spec, RedisModuleString *fieldNameRS,
FieldType t) {
const char *fieldName = RedisModule_StringPtrLen(fieldNameRS, NULL);
const FieldSpec *fieldSpec = IndexSpec_GetField(spec, fieldName, strlen(fieldName));
if (!fieldSpec) {
return NULL;
}
return IndexSpec_GetFormattedKey(spec, fieldSpec);
return IndexSpec_GetFormattedKey(spec, fieldSpec, t);
}

DEBUG_COMMAND(DumpTerms) {
Expand Down Expand Up @@ -154,7 +155,7 @@ DEBUG_COMMAND(NumericIndexSummary) {
}
GET_SEARCH_CTX(argv[0])
RedisModuleKey *keyp = NULL;
RedisModuleString *keyName = getFieldKeyName(sctx->spec, argv[1]);
RedisModuleString *keyName = getFieldKeyName(sctx->spec, argv[1], INDEXFLD_T_NUMERIC);
if (!keyName) {
RedisModule_ReplyWithError(sctx->redisCtx, "Could not find given field in index spec");
goto end;
Expand Down Expand Up @@ -189,7 +190,7 @@ DEBUG_COMMAND(DumpNumericIndex) {
}
GET_SEARCH_CTX(argv[0])
RedisModuleKey *keyp = NULL;
RedisModuleString *keyName = getFieldKeyName(sctx->spec, argv[1]);
RedisModuleString *keyName = getFieldKeyName(sctx->spec, argv[1], INDEXFLD_T_NUMERIC);
if (!keyName) {
RedisModule_ReplyWithError(sctx->redisCtx, "Could not find given field in index spec");
goto end;
Expand Down Expand Up @@ -226,7 +227,7 @@ DEBUG_COMMAND(DumpTagIndex) {
}
GET_SEARCH_CTX(argv[0])
RedisModuleKey *keyp = NULL;
RedisModuleString *keyName = getFieldKeyName(sctx->spec, argv[1]);
RedisModuleString *keyName = getFieldKeyName(sctx->spec, argv[1], INDEXFLD_T_TAG);
if (!keyName) {
RedisModule_ReplyWithError(sctx->redisCtx, "Could not find given field in index spec");
goto end;
Expand Down Expand Up @@ -395,7 +396,7 @@ DEBUG_COMMAND(InfoTagIndex) {
goto end;
}

RedisModuleString *keyName = getFieldKeyName(sctx->spec, argv[1]);
RedisModuleString *keyName = getFieldKeyName(sctx->spec, argv[1], INDEXFLD_T_TAG);
if (!keyName) {
RedisModule_ReplyWithError(sctx->redisCtx, "Could not find given field in index spec");
goto end;
Expand Down
12 changes: 7 additions & 5 deletions src/default_gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ static RedisModuleString *getRandomFieldByType(IndexSpec *spec, FieldType type)
// choose random tag field
int randomIndex = rand() % array_len(tagFields);

RedisModuleString *ret = IndexSpec_GetFormattedKey(spec, tagFields[randomIndex]);
RedisModuleString *ret = IndexSpec_GetFormattedKey(spec, tagFields[randomIndex], type);
array_free(tagFields);
return ret;
}
Expand All @@ -224,7 +224,7 @@ size_t gc_TagIndex(RedisModuleCtx *ctx, GarbageCollectorCtx *gc, int *status) {
}
IndexSpec *spec = sctx->spec;

RedisModuleString *keyName = getRandomFieldByType(spec, FIELD_TAG);
RedisModuleString *keyName = getRandomFieldByType(spec, INDEXFLD_T_TAG);
if (!keyName) {
goto end;
}
Expand Down Expand Up @@ -300,7 +300,7 @@ size_t gc_NumericIndex(RedisModuleCtx *ctx, GarbageCollectorCtx *gc, int *status
}
IndexSpec *spec = sctx->spec;
// find all the numeric fields
numericFields = getFieldsByType(spec, FIELD_NUMERIC);
numericFields = getFieldsByType(spec, INDEXFLD_T_NUMERIC);

if (array_len(numericFields) == 0) {
goto end;
Expand All @@ -312,7 +312,8 @@ size_t gc_NumericIndex(RedisModuleCtx *ctx, GarbageCollectorCtx *gc, int *status
array_len(gc->numericGCCtx)); // it is not possible to remove fields
gc_FreeNumericGcCtxArray(gc);
for (int i = 0; i < array_len(numericFields); ++i) {
RedisModuleString *keyName = IndexSpec_GetFormattedKey(spec, numericFields[i]);
RedisModuleString *keyName =
IndexSpec_GetFormattedKey(spec, numericFields[i], INDEXFLD_T_NUMERIC);
NumericRangeTree *rt = OpenNumericIndex(sctx, keyName, &idxKey);
// if we could not open the numeric field we probably have a
// corruption in our data, better to know it now.
Expand All @@ -327,7 +328,8 @@ size_t gc_NumericIndex(RedisModuleCtx *ctx, GarbageCollectorCtx *gc, int *status
NumericFieldGCCtx *numericGcCtx = gc->numericGCCtx[randomIndex];

// open the relevent numeric index to check that our pointer is valid
RedisModuleString *keyName = IndexSpec_GetFormattedKey(spec, numericFields[randomIndex]);
RedisModuleString *keyName =
IndexSpec_GetFormattedKey(spec, numericFields[randomIndex], INDEXFLD_T_NUMERIC);
NumericRangeTree *rt = OpenNumericIndex(sctx, keyName, &idxKey);
if (idxKey) RedisModule_CloseKey(idxKey);

Expand Down
Loading

0 comments on commit 77ec9c0

Please sign in to comment.