Skip to content

Commit

Permalink
Fix Heap-buffer-overflow in GenericVector<int>::size (issue #2298)
Browse files Browse the repository at this point in the history
Credit to OSS-Fuzz:
This fixes a security issue which was reported by OSS-Fuzz, see details at
https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=13590.

Add also some assertions to catch similar bugs.

Signed-off-by: Stefan Weil <sw@weilnetz.de>
  • Loading branch information
stweil committed Mar 10, 2019
1 parent b7279f6 commit 71d4990
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 7 deletions.
17 changes: 12 additions & 5 deletions src/dict/dict.h
Expand Up @@ -107,17 +107,21 @@ class Dict {

// Returns true if unichar_id is a word compounding character like - or /.
inline bool compound_marker(UNICHAR_ID unichar_id) {
const UNICHARSET& unicharset = getUnicharset();
ASSERT_HOST(unicharset.contains_unichar_id(unichar_id));
const GenericVector<UNICHAR_ID>& normed_ids =
getUnicharset().normed_ids(unichar_id);
unicharset.normed_ids(unichar_id);
return normed_ids.size() == 1 &&
(normed_ids[0] == hyphen_unichar_id_ ||
normed_ids[0] == slash_unichar_id_);
}
// Returns true if unichar_id is an apostrophe-like character that may
// separate prefix/suffix words from a main body word.
inline bool is_apostrophe(UNICHAR_ID unichar_id) {
const UNICHARSET& unicharset = getUnicharset();
ASSERT_HOST(unicharset.contains_unichar_id(unichar_id));
const GenericVector<UNICHAR_ID>& normed_ids =
getUnicharset().normed_ids(unichar_id);
unicharset.normed_ids(unichar_id);
return normed_ids.size() == 1 && normed_ids[0] == apostrophe_unichar_id_;
}

Expand All @@ -141,17 +145,20 @@ class Dict {
}
}
/// Check whether the word has a hyphen at the end.
inline bool has_hyphen_end(UNICHAR_ID unichar_id, bool first_pos) const {
inline bool has_hyphen_end(const UNICHARSET* unicharset,
UNICHAR_ID unichar_id, bool first_pos) const {
if (!last_word_on_line_ || first_pos)
return false;
ASSERT_HOST(unicharset->contains_unichar_id(unichar_id));
const GenericVector<UNICHAR_ID>& normed_ids =
getUnicharset().normed_ids(unichar_id);
unicharset->normed_ids(unichar_id);
return normed_ids.size() == 1 && normed_ids[0] == hyphen_unichar_id_;
}
/// Same as above, but check the unichar at the end of the word.
inline bool has_hyphen_end(const WERD_CHOICE &word) const {
int word_index = word.length() - 1;
return has_hyphen_end(word.unichar_id(word_index), word_index == 0);
return has_hyphen_end(word.unicharset(), word.unichar_id(word_index),
word_index == 0);
}
/// Unless the previous word was the last one on the line, and the current
/// one is not (thus it is the first one on the line), erase hyphen_word_,
Expand Down
4 changes: 2 additions & 2 deletions src/wordrec/language_model.cpp
Expand Up @@ -3,7 +3,6 @@
// Description: Functions that utilize the knowledge about the properties,
// structure and statistics of the language to help recognition.
// Author: Daria Antonova
// Created: Mon Nov 11 11:26:43 PST 2009
//
// (C) Copyright 2009, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
Expand Down Expand Up @@ -803,7 +802,8 @@ LanguageModelDawgInfo *LanguageModel::GenerateDawgInfo(
}

// Deal with hyphenated words.
if (word_end && dict_->has_hyphen_end(b.unichar_id(), curr_col == 0)) {
if (word_end && dict_->has_hyphen_end(&dict_->getUnicharset(),
b.unichar_id(), curr_col == 0)) {
if (language_model_debug_level > 0) tprintf("Hyphenated word found\n");
return new LanguageModelDawgInfo(dawg_args_.active_dawgs, COMPOUND_PERM);
}
Expand Down

0 comments on commit 71d4990

Please sign in to comment.