Skip to content

Commit

Permalink
Fix use of wrong UNICHARSET
Browse files Browse the repository at this point in the history
Signed-off-by: Stefan Weil <sw@weilnetz.de>
  • Loading branch information
stweil committed Oct 6, 2018
1 parent 0e43ae5 commit 8dc9e9f
Show file tree
Hide file tree
Showing 6 changed files with 23 additions and 12 deletions.
1 change: 1 addition & 0 deletions src/api/baseapi.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ class Trie;
class Wordrec;

typedef int (Dict::*DictFunc)(void* void_dawg_args,
const UNICHARSET& unicharset,
UNICHAR_ID unichar_id, bool word_end) const;
typedef double (Dict::*ProbabilityInContextFunc)(const char* lang,
const char* context,
Expand Down
11 changes: 8 additions & 3 deletions src/dict/dict.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -361,10 +361,13 @@ void Dict::End() {
// according to at least one of the dawgs in the dawgs_ vector.
// See more extensive comments in dict.h where this function is declared.
int Dict::def_letter_is_okay(void* void_dawg_args,
const UNICHARSET& unicharset,
UNICHAR_ID unichar_id,
bool word_end) const {
DawgArgs *dawg_args = static_cast<DawgArgs *>(void_dawg_args);

ASSERT_HOST(unicharset.contains_unichar_id(unichar_id));

if (dawg_debug_level >= 3) {
tprintf("def_letter_is_okay: current unichar=%s word_end=%d"
" num active dawgs=%d\n",
Expand Down Expand Up @@ -410,7 +413,7 @@ int Dict::def_letter_is_okay(void* void_dawg_args,
for (int s = 0; s < slist.length(); ++s) {
int sdawg_index = slist[s];
const Dawg *sdawg = dawgs_[sdawg_index];
UNICHAR_ID ch = char_for_dawg(unichar_id, sdawg);
UNICHAR_ID ch = char_for_dawg(unicharset, unichar_id, sdawg);
EDGE_REF dawg_edge = sdawg->edge_char_of(0, ch, word_end);
if (dawg_edge != NO_EDGE) {
if (dawg_debug_level >=3) {
Expand Down Expand Up @@ -477,7 +480,8 @@ int Dict::def_letter_is_okay(void* void_dawg_args,
// Find the edge out of the node for the unichar_id.
NODE_REF node = GetStartingNode(dawg, pos.dawg_ref);
EDGE_REF edge = (node == NO_EDGE) ? NO_EDGE
: dawg->edge_char_of(node, char_for_dawg(unichar_id, dawg), word_end);
: dawg->edge_char_of(node, char_for_dawg(unicharset, unichar_id, dawg),
word_end);

if (dawg_debug_level >= 3) {
tprintf("Active dawg: [%d, " REFFORMAT "] edge=" REFFORMAT "\n",
Expand Down Expand Up @@ -759,7 +763,8 @@ int Dict::valid_word(const WERD_CHOICE &word, bool numbers_ok) const {
int last_index = word_ptr->length() - 1;
// Call letter_is_okay for each letter in the word.
for (int i = hyphen_base_size(); i <= last_index; ++i) {
if (!((this->*letter_is_okay_)(&dawg_args, word_ptr->unichar_id(i),
if (!((this->*letter_is_okay_)(&dawg_args, *word_ptr->unicharset(),
word_ptr->unichar_id(i),
i == last_index))) break;
// Swap active_dawgs, constraints with the corresponding updated vector.
if (dawg_args.updated_dawgs == &(active_dawgs[1])) {
Expand Down
13 changes: 8 additions & 5 deletions src/dict/dict.h
Original file line number Diff line number Diff line change
Expand Up @@ -351,15 +351,17 @@ class Dict {
*/

//
int def_letter_is_okay(void* void_dawg_args,
int def_letter_is_okay(void* void_dawg_args, const UNICHARSET& unicharset,
UNICHAR_ID unichar_id, bool word_end) const;

int (Dict::*letter_is_okay_)(void* void_dawg_args,
const UNICHARSET& unicharset,
UNICHAR_ID unichar_id, bool word_end) const;
/// Calls letter_is_okay_ member function.
int LetterIsOkay(void* void_dawg_args,
int LetterIsOkay(void* void_dawg_args, const UNICHARSET& unicharset,
UNICHAR_ID unichar_id, bool word_end) const {
return (this->*letter_is_okay_)(void_dawg_args, unichar_id, word_end);
return (this->*letter_is_okay_)(void_dawg_args,
unicharset, unichar_id, word_end);
}


Expand Down Expand Up @@ -428,11 +430,12 @@ class Dict {
// Given a unichar from a string and a given dawg, return the unichar
// we should use to match in that dawg type. (for example, in the number
// dawg, all numbers are transformed to kPatternUnicharId).
inline UNICHAR_ID char_for_dawg(UNICHAR_ID ch, const Dawg *dawg) const {
UNICHAR_ID char_for_dawg(const UNICHARSET& unicharset, UNICHAR_ID ch,
const Dawg *dawg) const {
if (!dawg) return ch;
switch (dawg->type()) {
case DAWG_TYPE_NUMBER:
return getUnicharset().get_isdigit(ch) ? Dawg::kPatternUnicharID : ch;
return unicharset.get_isdigit(ch) ? Dawg::kPatternUnicharID : ch;
default:
return ch;
}
Expand Down
5 changes: 3 additions & 2 deletions src/dict/permdawg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ void Dict::go_deeper_dawg_fxn(
++num_unigrams;
word->append_unichar_id(uch_id, 1, 0.0, 0.0);
unigrams_ok = (this->*letter_is_okay_)(
&unigram_dawg_args,
&unigram_dawg_args, *word->unicharset(),
word->unichar_id(word_index+num_unigrams-1),
word_ending && i == encoding.size() - 1);
(*unigram_dawg_args.active_dawgs) = *(unigram_dawg_args.updated_dawgs);
Expand All @@ -111,7 +111,8 @@ void Dict::go_deeper_dawg_fxn(
// Check which dawgs from the dawgs_ vector contain the word
// up to and including the current unichar.
if (checked_unigrams || (this->*letter_is_okay_)(
more_args, word->unichar_id(word_index), word_ending)) {
more_args, *word->unicharset(), word->unichar_id(word_index),
word_ending)) {
// Add a new word choice
if (word_ending) {
if (dawg_debug_level) {
Expand Down
3 changes: 2 additions & 1 deletion src/lstm/recodebeam.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -771,7 +771,8 @@ void RecodeBeamSearch::ContinueDawg(int code, int unichar_id, float cert,
return; // Can't continue if not a dict word.
}
PermuterType permuter = static_cast<PermuterType>(
dict_->def_letter_is_okay(&dawg_args, unichar_id, false));
dict_->def_letter_is_okay(&dawg_args,
dict_->getUnicharset(), unichar_id, false));
if (permuter != NO_PERM) {
PushHeapIfBetter(kBeamWidths[0], code, unichar_id, permuter, false,
word_start, dawg_args.valid_end, false, cert, prev,
Expand Down
2 changes: 1 addition & 1 deletion src/wordrec/language_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -853,7 +853,7 @@ LanguageModelDawgInfo *LanguageModel::GenerateDawgInfo(
if (language_model_debug_level > 2)
tprintf("Test Letter OK for unichar %d, normed %d\n",
b.unichar_id(), normed_ids[i]);
dict_->LetterIsOkay(&dawg_args_, normed_ids[i],
dict_->LetterIsOkay(&dawg_args_, dict_->getUnicharset(), normed_ids[i],
word_end && i == normed_ids.size() - 1);
if (dawg_args_.permuter == NO_PERM) {
break;
Expand Down

0 comments on commit 8dc9e9f

Please sign in to comment.