Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add "native" full-text search support (SQLite), refs #1481 #1801

Merged
merged 1 commit into from Aug 18, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
9 changes: 8 additions & 1 deletion DefaultSettings.php
Expand Up @@ -1004,6 +1004,7 @@
# DB back-end to use special fulltext index operations.
#
# - Tested with MySQL/MariaDB
# - Tested with SQLite
#
# @since 2.5
##
Expand Down Expand Up @@ -1041,13 +1042,19 @@
# from MariaDB 10.0.5 with InnoDB tables and from MariaDB 10.0.15
# with Mroonga tables (according to sources)
#
# - SQLite FTS3 has been available since version 3.5, FTS4 were added with
# version 3.7.4, and FTS5 is available with version 3.9.0 (according to
# sources); The setting allows to specify extra arguments after the module
# engine such as array( 'FTS4', 'tokenize=porter' ).
#
# It is possible to extend the option decription (MySQL 5.7+) with
# 'mysql' => array( 'ENGINE=MyISAM, DEFAULT CHARSET=utf8', 'WITH PARSER ngram' )
#
# @since 2.5
##
$GLOBALS['smwgFulltextSearchTableOptions'] = array(
'mysql' => array( 'ENGINE=MyISAM, DEFAULT CHARSET=utf8' )
'mysql' => array( 'ENGINE=MyISAM, DEFAULT CHARSET=utf8' ),
'sqlite' => array( 'FTS4' )
);

##
Expand Down
30 changes: 24 additions & 6 deletions src/DeferredRequestDispatchManager.php
Expand Up @@ -44,7 +44,12 @@ class DeferredRequestDispatchManager {
*
* @var boolean
*/
private $enabledHttpDeferredJobRequestState = true;
private $enabledHttpDeferredRequest = true;

/**
* @var boolean
*/
private $preferredWithJobQueue = false;

/**
* @since 2.3
Expand All @@ -60,16 +65,29 @@ public function __construct( HttpRequest $httpRequest ) {
*/
public function reset() {
self::$canConnectToUrl = null;
$this->enabledHttpDeferredJobRequestState = true;
$this->enabledHttpDeferredRequest = true;
}

/**
* @since 2.3
*
* @param boolean $enabledHttpDeferredJobRequestState
* @param boolean $enabledHttpDeferredRequest
*/
public function setEnabledHttpDeferredRequest( $enabledHttpDeferredRequest ) {
$this->enabledHttpDeferredRequest = (bool)$enabledHttpDeferredRequest;
}

/**
* Certain types of jobs or tasks may prefer to be executed using the job
* queue therefore indicate whether the dispatcher should try opening a
* http request or not.
*
* @since 2.5
*
* @param boolean $preferredWithJobQueue
*/
public function setEnabledHttpDeferredJobRequestState( $enabledHttpDeferredJobRequestState ) {
$this->enabledHttpDeferredJobRequestState = (bool)$enabledHttpDeferredJobRequestState;
public function setPreferredWithJobQueue( $preferredWithJobQueue ) {
$this->preferredWithJobQueue = (bool)$preferredWithJobQueue;
}

/**
Expand Down Expand Up @@ -116,7 +134,7 @@ public function dispatchJobRequestFor( $type, Title $title, $parameters = array(
$parameters['timestamp'] = time();
$parameters['requestToken'] = SpecialDeferredRequestDispatcher::getRequestToken( $parameters['timestamp'] );

if ( $this->enabledHttpDeferredJobRequestState && $this->canConnectToUrl() ) {
if ( !$this->preferredWithJobQueue && $this->enabledHttpDeferredRequest && $this->canConnectToUrl() ) {
return $this->doPostJobWith( $type, $title, $parameters, $dispatchableCallbackJob );
}

Expand Down
9 changes: 8 additions & 1 deletion src/MediaWiki/Hooks/HookRegistry.php
Expand Up @@ -106,10 +106,17 @@ private function addCallbackHandlers( $basePath, $globalVars ) {
$httpRequestFactory->newSocketRequest()
);

$deferredRequestDispatchManager->setEnabledHttpDeferredJobRequestState(
$deferredRequestDispatchManager->setEnabledHttpDeferredRequest(
$applicationFactory->getSettings()->get( 'smwgEnabledHttpDeferredJobRequest' )
);

// SQLite has no lock manager making table lock contention very common
// hence use the JobQueue to enqueue any change request and avoid
// a rollback due to canceled DB transactions
$deferredRequestDispatchManager->setPreferredWithJobQueue(
$GLOBALS['wgDBtype'] === 'sqlite'
);

$permissionPthValidator = new PermissionPthValidator();

/**
Expand Down
148 changes: 148 additions & 0 deletions src/SQLStore/QueryEngine/Fulltext/SQLiteValueMatchConditionBuilder.php
@@ -0,0 +1,148 @@
<?php

namespace SMW\SQLStore\QueryEngine\Fulltext;

use SMW\Query\Language\ValueDescription;

/**
* @license GNU GPL v2+
* @since 2.5
*
* @author mwjames
*/
class SQLiteValueMatchConditionBuilder extends ValueMatchConditionBuilder {

/**
* @var SearchTable
*/
private $searchTable;

/**
* @since 2.5
*
* @param SearchTable $searchTable
*/
public function __construct( SearchTable $searchTable ) {
$this->searchTable = $searchTable;
}

/**
* @since 2.5
*
* @return boolean
*/
public function isEnabled() {
return $this->searchTable->isEnabled();
}

/**
* @since 2.5
*
* @return string
*/
public function getTableName() {
return $this->searchTable->getTableName();
}

/**
* @since 2.5
*
* @param string $value
*
* @return boolean
*/
public function hasMinTokenLength( $value ) {
return mb_strlen( $value ) >= $this->searchTable->getMinTokenSize();
}

/**
* @since 2.5
*
* @param string $temporaryTable
*
* @return string
*/
public function getSortIndexField( $temporaryTable = '' ) {
return ( $temporaryTable !== '' ? $temporaryTable . '.' : '' ) . $this->searchTable->getSortField();
}

/**
* @since 2.5
*
* @param ValueDescription $description
*
* @return boolean
*/
public function canApplyFulltextSearchMatchCondition( ValueDescription $description ) {

if ( !$this->isEnabled() || $description->getProperty() === null ) {
return false;
}

if ( $this->searchTable->isExemptedProperty( $description->getProperty() ) ) {
return false;
}

$matchableText = $this->getMatchableTextFromDescription(
$description
);

$comparator = $description->getComparator();

if ( $matchableText && ( $comparator === SMW_CMP_LIKE || $comparator === SMW_CMP_NLKE ) ) {
return $this->hasMinTokenLength( str_replace( '*', '', $matchableText ) );
}

return false;
}

/**
* @since 2.5
*
* @param ValueDescription $description
* @param string $temporaryTable
*
* @return string
*/
public function getWhereCondition( ValueDescription $description, $temporaryTable = '' ) {

$matchableText = $this->getMatchableTextFromDescription(
$description
);

$value = $this->searchTable->getTextSanitizer()->sanitize(
$matchableText,
true
);

// A leading or trailing minus sign indicates that this word must not
// be present in any of the rows that are returned.
// InnoDB only supports leading minus signs.
if ( $description->getComparator() === SMW_CMP_NLKE ) {
$value = '-' . $value;
}

// Something like [[Has text::!~database]] will cause a
// "malformed MATCH expression" due to "An FTS query may not consist
// entirely of terms or term-prefix queries with unary "-" operators
// attached to them." and doing "NOT database" will result in an empty
// result set

$temporaryTable = $temporaryTable !== '' ? $temporaryTable . '.' : '';
$column = $temporaryTable . $this->searchTable->getIndexField();

$property = $description->getProperty();
$propertyCondition = '';

// Full text is collected in a single table therefore limit the match
// process by adding the PID as an additional condition
if ( $property !== null ) {
$propertyCondition = ' AND ' . $temporaryTable . 'p_id=' . $this->searchTable->addQuotes(
$this->searchTable->getPropertyID( $property )
);
}

return $column . " MATCH " . $this->searchTable->addQuotes( $value ) . "$propertyCondition";
}

}
15 changes: 13 additions & 2 deletions src/SQLStore/QueryEngine/Fulltext/TextByChangeUpdater.php
Expand Up @@ -123,9 +123,13 @@ public function pushUpdatesFromJobParameters( array $parameters ) {
return;
}

$start = microtime( true );

foreach ( $parameters['diff'] as $tableName => $changeOp ) {
$this->doUpdateFromTableChangeOp( new TableChangeOp( $tableName, $changeOp ) );
}

wfDebugLog( 'smw', __METHOD__ . ' procTime (sec): '. round( ( microtime( true ) - $start ), 5 ) );
}

/**
Expand All @@ -145,7 +149,6 @@ public function pushUpdatesFromPropertyTableDiff( CompositePropertyTableDiffIter
$this->doUpdateFromTableChangeOp( $tableChangeOp );
}


wfDebugLog( 'smw', __METHOD__ . ' procTime (sec): '. round( ( microtime( true ) - $start ), 5 ) );
}

Expand Down Expand Up @@ -221,8 +224,13 @@ private function doAggregateFromFieldChangeOp( $type, $fieldChangeOp, &$aggregat
}

private function doUpdateOnAggregatedValues( $inserts, $deletes ) {

// Remove any "deletes" first
$this->doUpdateOnDeletes( $deletes );
$this->doUpdateOnInserts( $inserts );
}

private function doUpdateOnDeletes( $deletes ) {

foreach ( $deletes as $key => $values ) {
list( $sid, $pid ) = explode( ':', $key, 2 );

Expand All @@ -243,6 +251,9 @@ private function doUpdateOnAggregatedValues( $inserts, $deletes ) {

$this->searchTableUpdater->update( $sid, $pid, $text );
}
}

private function doUpdateOnInserts( $inserts ) {

foreach ( $inserts as $key => $value ) {
list( $sid, $pid ) = explode( ':', $key, 2 );
Expand Down
6 changes: 6 additions & 0 deletions src/SQLStore/QueryEngine/FulltextSearchTableFactory.php
Expand Up @@ -6,6 +6,7 @@
use SMW\ApplicationFactory;
use SMW\SQLStore\QueryEngine\Fulltext\ValueMatchConditionBuilder;
use SMW\SQLStore\QueryEngine\Fulltext\MySQLValueMatchConditionBuilder;
use SMW\SQLStore\QueryEngine\Fulltext\SQLiteValueMatchConditionBuilder;
use SMW\SQLStore\QueryEngine\Fulltext\TextByChangeUpdater;
use SMW\SQLStore\QueryEngine\Fulltext\TextSanitizer;
use SMW\SQLStore\QueryEngine\Fulltext\SearchTable;
Expand Down Expand Up @@ -50,6 +51,11 @@ public function newValueMatchConditionBuilderByType( SQLStore $store ) {
$this->newSearchTable( $store )
);
break;
case 'sqlite':
return new SQLiteValueMatchConditionBuilder(
$this->newSearchTable( $store )
);
break;
}

return new ValueMatchConditionBuilder();
Expand Down
Expand Up @@ -266,8 +266,13 @@ private function tryToProcessQueryTestCase( $jsonTestCaseFileHandler ) {
$jsonTestCaseFileHandler->getDebugMode()
);

foreach ( $jsonTestCaseFileHandler->findTestCasesFor( 'query-testcases' ) as $queryCase ) {
$this->queryTestCaseProcessor->processQueryCase( new QueryTestCaseInterpreter( $queryCase ) );
foreach ( $jsonTestCaseFileHandler->findTestCasesFor( 'query-testcases' ) as $case ) {

if ( $jsonTestCaseFileHandler->requiredToSkipFor( $case, $this->connectorId ) ) {
continue;
}

$this->queryTestCaseProcessor->processQueryCase( new QueryTestCaseInterpreter( $case ) );
}

foreach ( $jsonTestCaseFileHandler->findTestCasesFor( 'concept-testcases' ) as $conceptCase ) {
Expand Down
23 changes: 20 additions & 3 deletions tests/phpunit/Integration/ByJsonScript/Fixtures/q-0104.json
@@ -1,5 +1,5 @@
{
"description": "Test `_txt`/`~` with enabled Fulltext search support (only enabled for MySQL)",
"description": "Test `_txt`/`~` with enabled full-text search support (only enabled for MySQL, SQLite)",
"properties": [
{
"name": "Has text",
Expand Down Expand Up @@ -151,6 +151,9 @@
},
{
"about": "#8 free search (wide proximity)",
"skip-on": {
"sqlite": "works different in comparison to MySQL, see #9"
},
"condition": "[[~~with a category]]",
"printouts" : [],
"parameters" : {
Expand All @@ -166,6 +169,21 @@
},
{
"about": "#9 free search (wide proximity)",
"condition": "[[~~with* a category]] [[~Example/Q0104/*]]",
"printouts" : [],
"parameters" : {
"limit" : "10"
},
"queryresult": {
"count": 2,
"results": [
"Example/Q0104/4#0##_5a524a435267f6e6d2d45d64a419c1da",
"Example/Q0104/4#0##_d4fe48d7241e6530c628f32168815beb"
]
}
},
{
"about": "#10 free search (wide proximity)",
"condition": "[[~~with a category]] [[Category:Q0104]]",
"printouts" : [],
"parameters" : {
Expand All @@ -179,7 +197,7 @@
}
},
{
"about": "#10 retain spaces on +/- operators",
"about": "#11 retain spaces on +/- operators",
"condition": "[[Has text::~+*maria* -postgres*]]",
"printouts" : [],
"parameters" : {
Expand All @@ -202,7 +220,6 @@
"meta": {
"skip-on": {
"postgres": "Not supported by PostgreSQL.",
"sqlite": "Not supported by SQLite.",
"sesame": "Not supported by SPARQLStore (Sesame).",
"virtuoso": "Not supported by SPARQLStore (Virtuoso).",
"fuseki": "Not supported by SPARQLStore (Fuskei).",
Expand Down