Skip to content

Commit

Permalink
Fix "Owned by a self-identified native" criterion
Browse files Browse the repository at this point in the history
Closes #2166.

Work around Manticore filter values limitation.
When the number of natives is greater than 4096, Manticore
throws an error. To avoid this, we filter by excluding
non-natives instead. This is possible because filters are
combined with a boolean AND operation, so we can create
multiple filters with 4096 values each.

At the moment, on Tatoeba, there are 4128 English natives
and 5129 non-natives.
  • Loading branch information
jiru committed Mar 1, 2020
1 parent a88c336 commit c26b5db
Showing 1 changed file with 19 additions and 1 deletion.
20 changes: 19 additions & 1 deletion src/Controller/SentencesController.php
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
use App\Lib\LanguagesLib;
use App\Lib\SphinxClient;
use Cake\Core\Configure;
use Cake\Database\Expression\QueryExpression;
use Cake\Event\Event;
use Cake\Utility\Hash;
use Cake\View\ViewBuilder;
Expand Down Expand Up @@ -728,7 +729,24 @@ public function search()
);
$native = '';
} else {
$sphinx['filter'][] = array('user_id', $natives);
$maxAttrValues = 4096; // Manticore limitation
if (count($natives) <= $maxAttrValues) {
$sphinx['filter'][] = array('user_id', $natives);
} else {
$nonNatives = $this->UsersLanguages->find()
->where(function (QueryExpression $exp) use ($from) {
$isNonNative = $exp->or(['level is' => null])->notEq('level', 5);
return $exp->add($isNonNative)
->eq('language_code', $from);
})
->select(['of_user_id'])
->toList();
$nonNatives = Hash::extract($nonNatives, '{n}.of_user_id');
while (count($nonNatives)) {
$excludedIds = array_splice($nonNatives, 0, $maxAttrValues);
$sphinx['filter'][] = array('user_id', $excludedIds, true);
}
}
}
}
}
Expand Down

0 comments on commit c26b5db

Please sign in to comment.