Skip to content

Commit

Permalink
fixed unicode prefix error
Browse files Browse the repository at this point in the history
  • Loading branch information
dvirsky committed Dec 1, 2016
1 parent fa22f25 commit 80989a2
Show file tree
Hide file tree
Showing 8 changed files with 42 additions and 14 deletions.
3 changes: 3 additions & 0 deletions src/index.h
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,9 @@ void IR_Free(IndexReader *ir);
/* Read an entry from an inverted index */
int IR_GenericRead(IndexReader *ir, t_docId *docId, float *freq, u_char *flags,
VarintVector *offsets);

int IR_TryRead(IndexReader *ir, t_docId *docId, t_docId expectedDocId);

/* Read an entry from an inverted index into IndexHit */
int IR_Read(void *ctx, IndexHit *e);

Expand Down
7 changes: 7 additions & 0 deletions src/rmutil/vector.c
Original file line number Diff line number Diff line change
Expand Up @@ -79,3 +79,10 @@ void Vector_Free(Vector *v) {
free(v->data);
free(v);
}


/* return the used size of the vector, regardless of capacity */
inline int Vector_Size(Vector *v) { return v->top; }

/* return the actual capacity */
inline int Vector_Cap(Vector *v) { return v->cap; }
4 changes: 2 additions & 2 deletions src/rmutil/vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,10 @@ int __vector_PushPtr(Vector *v, void *elem);
int Vector_Resize(Vector *v, size_t newcap);

/* return the used size of the vector, regardless of capacity */
inline int Vector_Size(Vector *v) { return v->top; }
int Vector_Size(Vector *v);

/* return the actual capacity */
inline int Vector_Cap(Vector *v) { return v->cap; }
int Vector_Cap(Vector *v);

/* free the vector and the underlying data. Does not release its elements if
* they are pointers*/
Expand Down
19 changes: 18 additions & 1 deletion src/tests/test_trie.c
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,22 @@ int testTrie() {
return 0;
}

int testUnicode() {

char *str = "\xc4\x8caji\xc4\x87";

TrieNode *root = __newTrieNode("", 0, 0, 0, 1, 0);
ASSERT(root != NULL)

int rc = __trie_add(root, str, 1, ADD_REPLACE);
ASSERT_EQUAL_INT(1, rc);
rc = __trie_add(root, str, 1, ADD_REPLACE);
ASSERT_EQUAL_INT(0, rc);
float sc = TrieNode_Find(root, str, strlen(str));
ASSERT(sc == 1);
return 0;
}

int testDFAFilter() {
FILE *fp = fopen("./titles.csv", "r");
assert(fp != NULL);
Expand Down Expand Up @@ -185,5 +201,6 @@ int testDFAFilter() {

int main(int argc, char **argv) {
//TESTFUNC(testDFAFilter);
TESTFUNC(testTrie);
//TESTFUNC(testTrie);
TESTFUNC(testUnicode);
}
10 changes: 5 additions & 5 deletions src/trie/levenshtein.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

// NewSparseAutomaton creates a new automaton for the string s, with a given max
// edit distance check
SparseAutomaton NewSparseAutomaton(const char *s, size_t len, int maxEdits) {
SparseAutomaton NewSparseAutomaton(const unsigned char *s, size_t len, int maxEdits) {
return (SparseAutomaton){s, len, maxEdits};
}

Expand All @@ -23,7 +23,7 @@ sparseVector *SparseAutomaton_Start(SparseAutomaton *a) {

// Step returns the next state of the automaton given a previous state and a
// character to check
sparseVector *SparseAutomaton_Step(SparseAutomaton *a, sparseVector *state, char c) {
sparseVector *SparseAutomaton_Step(SparseAutomaton *a, sparseVector *state, unsigned char c) {
sparseVector *newVec = newSparseVectorCap(state->len);

if (state->len) {
Expand Down Expand Up @@ -87,7 +87,7 @@ void __dfaNode_free(dfaNode *d) {
free(d);
}

inline int __sv_equals(sparseVector *sv1, sparseVector *sv2) {
int __sv_equals(sparseVector *sv1, sparseVector *sv2) {
if (sv1->len != sv2->len) return 0;

for (int i = 0; i < sv1->len; i++) {
Expand All @@ -113,10 +113,10 @@ dfaNode *__dfn_getCache(Vector *cache, sparseVector *v) {
return NULL;
}


void __dfn_putCache(Vector *cache, dfaNode *dfn) { Vector_Push(cache, dfn); }

void dfa_build(dfaNode *parent, SparseAutomaton *a, Vector *cache) {
// printf("building dfa node dist %d\n", parent->distance);
parent->match = SparseAutomaton_IsMatch(a, parent->v);

for (int i = 0; i < parent->v->len; i++) {
Expand Down Expand Up @@ -165,7 +165,7 @@ void dfa_build(dfaNode *parent, SparseAutomaton *a, Vector *cache) {
//}
}

DFAFilter NewDFAFilter(char *str, size_t len, int maxDist, int prefixMode) {
DFAFilter NewDFAFilter(unsigned char *str, size_t len, int maxDist, int prefixMode) {
Vector *cache = NewVector(dfaNode *, 8);

SparseAutomaton a = NewSparseAutomaton(str, len, maxDist);
Expand Down
8 changes: 4 additions & 4 deletions src/trie/levenshtein.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
* This DFA is used while traversing a Trie to decide where to stop.
*/
typedef struct {
const char *string;
const unsigned char *string;
size_t len;
int max;
} SparseAutomaton;
Expand All @@ -38,13 +38,13 @@ void dfa_build(dfaNode *parent, SparseAutomaton *a, Vector *cache);

/* Create a new Sparse Levenshtein Automaton for string s and length len, with a maximal edit
* distance of maxEdits */
SparseAutomaton NewSparseAutomaton(const char *s, size_t len, int maxEdits);
SparseAutomaton NewSparseAutomaton(const unsigned char *s, size_t len, int maxEdits);

/* Create the initial state vector of the root automaton node */
sparseVector *SparseAutomaton_Start(SparseAutomaton *a);

/* Step from a given state of the automaton to the next step given a specific character */
sparseVector *SparseAutomaton_Step(SparseAutomaton *a, sparseVector *state, char c);
sparseVector *SparseAutomaton_Step(SparseAutomaton *a, sparseVector *state, unsigned char c);

/* Is the current state of the automaton a match for the query? */
int SparseAutomaton_IsMatch(SparseAutomaton *a, sparseVector *v);
Expand All @@ -69,7 +69,7 @@ typedef struct {
/* Create a new DFA filter using a Levenshtein automaton, for the given string and maximum
* distance. If prefixMode is 1, we match prefixes within the given distance, and then continue
* onwards to all suffixes. */
DFAFilter NewDFAFilter(char *str, size_t len, int maxDist, int prefixMode);
DFAFilter NewDFAFilter(unsigned char *str, size_t len, int maxDist, int prefixMode);

/* A callback function for the DFA Filter, passed to the Trie iterator */
FilterCode FilterFunc(unsigned char b, void *ctx, int *matched, void *matchCtx);
Expand Down
2 changes: 1 addition & 1 deletion src/trie/trie.c
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ void __trieNode_sortChildren(TrieNode *n);
* 2. If a child has a single child - merge them
* 3. recalculate the max child score
*/
inline void __trieNode_optimizeChildren(TrieNode *n) {
void __trieNode_optimizeChildren(TrieNode *n) {

int i = 0;
TrieNode **nodes = __trieNode_children(n);
Expand Down
3 changes: 2 additions & 1 deletion src/trie/trie_type.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include "../util/heap.h"
#include "trie_type.h"


Trie *NewTrie() {
Trie *tree = RedisModule_Alloc(sizeof(Trie));
tree->root = __newTrieNode("", 0, 0, 0, 0, 0);
Expand Down Expand Up @@ -53,7 +54,7 @@ Vector *Trie_Search(Trie *tree, char *s, size_t len, size_t num, int maxDist,
heap_t *pq = malloc(heap_sizeof(num));
heap_init(pq, cmpEntries, NULL, num);

DFAFilter fc = NewDFAFilter(s, len, maxDist, prefixMode);
DFAFilter fc = NewDFAFilter((unsigned char *)s, len, maxDist, prefixMode);

TrieIterator *it = TrieNode_Iterate(tree->root, FilterFunc, StackPop, &fc);
char *str;
Expand Down

0 comments on commit 80989a2

Please sign in to comment.