Skip to content

Commit

Permalink
Add entity specific collation support (smw_sort), refs 2065 (#2429)
Browse files Browse the repository at this point in the history
  • Loading branch information
mwjames committed May 7, 2017
1 parent 6880122 commit 13c0689
Show file tree
Hide file tree
Showing 50 changed files with 1,896 additions and 332 deletions.
33 changes: 33 additions & 0 deletions DefaultSettings.php
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,17 @@
# - SMW_SPARQL_QF_SUBP to resolve subproperties
# - SMW_SPARQL_QF_SUBC to resolve subcategories
#
# - SMW_SPARQL_QF_COLLATION allows to add support for the sorting collation as
# maintained in $smwgEntityCollation. It is not enabled by default as the
# `uca-*` collation generates a UTF-8 string that contains unrecognized
# UTF codepoints that may not be understood by the back-end hence the
# Collator prevents and armors those unrecognized characters by replacing
# them with a ? to avoid a cURL communication failure but of course this
# means that not all elements of the sort string can be transfered to the
# back-end and can therefore cause a sorting distortion for close matches
# as in case of for example "Ennis, Ennis Hill, Ennis Jones, Ennis-Hill,
# Ennis-London"
#
# Please check with your repository provider whether SPARQL 1.1 is fully
# supported or not, and if not SMW_SPARQL_QF_NONE should be set.
#
Expand Down Expand Up @@ -1462,4 +1473,26 @@
'smwgPropertyInvalidCharacterList' => array( '[', ']' , '|' , '<' , '>', '{', '}', '+', '%' ),
##

##
# Entity specific collation
#
# This should correspond to the $wgCategoryCollation setting (also in regards
# to selected argument values), yet it is kept separate to have a better
# control over changes in regards to the collation, sorting, and display of
# values.
#
# This setting is "global" and applies to any entity that is maintained for
# a wiki. In being global means that it cannot be selective (use one collation
# for one query and use another collation for a different query) because the
# field (smw_sort) contains a computed representation of the sort value.
#
# ANY change to this setting requires to run the `updateEntityCollation.php`
# maintenance script.
#
# @since 3.0
# @default identity (as legacy setting)
##
'smwgEntityCollation' => 'identity',
##

);
1 change: 1 addition & 0 deletions includes/Settings.php
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ public static function newFromGlobals() {
'smwgEditProtectionRight' => $GLOBALS['smwgEditProtectionRight'],
'smwgSimilarityLookupExemptionProperty' => $GLOBALS['smwgSimilarityLookupExemptionProperty'],
'smwgPropertyInvalidCharacterList' => $GLOBALS['smwgPropertyInvalidCharacterList'],
'smwgEntityCollation' => $GLOBALS['smwgEntityCollation'],
);

self::initLegacyMapping( $configuration );
Expand Down
5 changes: 2 additions & 3 deletions includes/articlepages/ConceptPage.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
namespace SMW;

use Html;
use SMW\MediaWiki\ByLanguageCollationMapper;
use SMW\Utils\Collator;
use SMW\Query\Language\ConceptDescription;
use SMWDataItem as DataItem;
use SMWPageLister;
Expand Down Expand Up @@ -142,10 +142,9 @@ private function getFirstLetterForCategory( DataItem $dataItem ) {

if ( $dataItem->getDIType() == DataItem::TYPE_WIKIPAGE ) {
$sortKey = ApplicationFactory::getInstance()->getStore()->getWikiPageSortKey( $dataItem );

}

return ByLanguageCollationMapper::getInstance()->findFirstLetterForCategory( $sortKey );
return Collator::singleton()->getFirstLetter( $sortKey );
}

}
5 changes: 3 additions & 2 deletions includes/articlepages/SMW_PropertyPage.php
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
use SMW\DIProperty;
use SMW\Content\PropertyPageMessageHtmlBuilder;
use SMW\PropertySpecificationReqExaminer;
use SMW\Utils\Collator;

/**
* Implementation of MediaWiki's Article that shows additional information on
Expand Down Expand Up @@ -311,8 +312,8 @@ protected function subjectObjectList( array $diWikiPages ) {
$diWikiPage = $diWikiPages[$index];
$dvWikiPage = DataValueFactory::getInstance()->newDataValueByItem( $diWikiPage, null );

$sortkey = $this->store->getWikiPageSortKey( $diWikiPage );
$start_char = $wgContLang->convert( $wgContLang->firstChar( $sortkey ) );
$sortKey = $this->store->getWikiPageSortKey( $diWikiPage );
$start_char = Collator::singleton()->getFirstLetter( $sortKey );

// Header for index letters
if ( $start_char != $prev_start_char ) {
Expand Down
5 changes: 3 additions & 2 deletions includes/export/SMW_Exporter.php
Original file line number Diff line number Diff line change
Expand Up @@ -289,8 +289,9 @@ static public function makeExportDataForSubject( SMWDIWikiPage $diWikiPage, $add
if ( $addStubData ) {
// Add a default sort key; for pages that exist in the wiki,
// this is set during parsing
$defaultSortkey = new ExpLiteral( $diWikiPage->getSortKey() );
$result->addPropertyObjectValue( self::getSpecialPropertyResource( '_SKEY' ), $defaultSortkey );
$property = new DIProperty( '_SKEY' );
$resourceBuilder = self::$dispatchingResourceBuilder->findResourceBuilder( $property );
$resourceBuilder->addResourceValue( $result, $property, $diWikiPage );
}

if ( $diWikiPage->getPageLanguage() ) {
Expand Down
5 changes: 2 additions & 3 deletions includes/queryprinters/CategoryResultPrinter.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

namespace SMW;

use SMW\MediaWiki\ByLanguageCollationMapper;
use SMW\Utils\Collator;
use SMWDataItem;
use SMWQueryResult;

Expand Down Expand Up @@ -227,10 +227,9 @@ private function getFirstLetterForCategory( SMWQueryResult $res, SMWDataItem $da

if ( $dataItem->getDIType() == SMWDataItem::TYPE_WIKIPAGE ) {
$sortKey = $res->getStore()->getWikiPageSortKey( $dataItem );

}

return ByLanguageCollationMapper::getInstance()->findFirstLetterForCategory( $sortKey );
return Collator::singleton()->getFirstLetter( $sortKey );
}

private function addRowFieldsToTemplate( $res, $row, &$first_col, $templateRenderer ) {
Expand Down
32 changes: 22 additions & 10 deletions includes/storage/SQLStore/SMW_SQLStore3_Readers.php
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ private function initSemanticDataCache( $subjectId, DIWikiPage $subject ) {
*
* @return SMWSemanticData
*/
private function getSemanticDataFromTable( $sid, DIWikiPage $subject, TableDefinition $proptable ) {
private function getSemanticDataFromTable( $sid, DIWikiPage $subject, TableDefinition $proptable, SMWRequestOptions $requestOptions = null ) {
// Do not clear the cache when called recursively.
self::$in_getSemanticData++;

Expand All @@ -180,7 +180,7 @@ private function getSemanticDataFromTable( $sid, DIWikiPage $subject, TableDefin
}

// *** Read the data ***//
$data = $this->fetchSemanticData( $sid, $subject, $proptable );
$data = $this->fetchSemanticData( $sid, $subject, $proptable, true, $requestOptions );
foreach ( $data as $d ) {
$this->store->m_semdata[$sid]->addPropertyStubValue( reset( $d ), end( $d ) );
}
Expand Down Expand Up @@ -476,10 +476,10 @@ public function getPropertySubjects( SMWDIProperty $property, SMWDataItem $value

if ( $proptable->usesIdSubject() ) { // join with ID table to get title data
$from = $db->tableName( SMWSql3SmwIds::TABLE_NAME ) . " INNER JOIN " . $db->tableName( $proptable->getName() ) . " AS t1 ON t1.s_id=smw_id";
$select = 'smw_title, smw_namespace, smw_iw, smw_sortkey, smw_subobject';
$select = 'smw_title, smw_namespace, smw_iw, smw_subobject, smw_sortkey, smw_sort';
} else { // no join needed, title+namespace as given in proptable
$from = $db->tableName( $proptable->getName() ) . " AS t1";
$select = 's_title AS smw_title, s_namespace AS smw_namespace, \'\' AS smw_iw, s_title AS smw_sortkey, \'\' AS smw_subobject';
$select = 's_title AS smw_title, s_namespace AS smw_namespace, \'\' AS smw_iw, \'\' AS smw_subobject, s_title AS smw_sortkey, s_title AS smw_sort';
}

if ( !$proptable->isFixedPropertyTable() ) {
Expand All @@ -499,16 +499,28 @@ public function getPropertySubjects( SMWDIProperty $property, SMWDataItem $value
}
}

$res = $db->select( $from, 'DISTINCT ' . $select,
$where . $this->store->getSQLConditions( $requestOptions, 'smw_sortkey', 'smw_sortkey', $where !== '' ),
__METHOD__, $this->store->getSQLOptions( $requestOptions, 'smw_sortkey' ) );
$res = $db->select(
$from,
'DISTINCT ' . $select,
$where . $this->store->getSQLConditions( $requestOptions, 'smw_sortkey', 'smw_sortkey', $where !== '' ),
__METHOD__,
$this->store->getSQLOptions( $requestOptions, 'smw_sort' )
);

$diHandler = $this->store->getDataItemHandlerForDIType( SMWDataItem::TYPE_WIKIPAGE );

foreach ( $res as $row ) {
try {
if ( $row->smw_iw === '' || $row->smw_iw{0} != ':' ) { // filter special objects
$result[] = $diHandler->dataItemFromDBKeys( array_values( (array)$row ) );
$dbkeys = array(
$row->smw_title,
$row->smw_namespace,
$row->smw_iw,
$row->smw_sort,
$row->smw_subobject

);
$result[] = $diHandler->dataItemFromDBKeys( $dbkeys );
}
} catch ( DataItemHandlerException $e ) {
// silently drop data, should be extremely rare and will usually fix itself at next edit
Expand Down Expand Up @@ -714,10 +726,10 @@ public function getInProperties( SMWDataItem $value, SMWRequestOptions $requestO

$where .= " AND smw_iw!=" . $db->addQuotes( SMW_SQL3_SMWIW_OUTDATED ) . " AND smw_iw!=" . $db->addQuotes( SMW_SQL3_SMWDELETEIW );

$res = $db->select( $from, 'DISTINCT smw_title,smw_sortkey,smw_iw',
$res = $db->select( $from, 'DISTINCT smw_title,smw_sortkey,smw_sort,smw_iw',
// select sortkey since it might be used in ordering (needed by Postgres)
$where . $this->store->getSQLConditions( $subOptions, 'smw_sortkey', 'smw_sortkey', $where !== '' ),
__METHOD__, $this->store->getSQLOptions( $subOptions, 'smw_sortkey' ) );
__METHOD__, $this->store->getSQLOptions( $subOptions, 'smw_sort' ) );

foreach ( $res as $row ) {
try {
Expand Down
40 changes: 22 additions & 18 deletions includes/storage/SQLStore/SMW_Sql3SmwIds.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
use SMW\SQLStore\IdToDataItemMatchFinder;
use SMW\SQLStore\PropertyStatisticsTable;
use SMW\SQLStore\RedirectInfoStore;
use SMW\SQLStore\TableFieldUpdater;
use SMW\Utils\Collator;

/**
* @ingroup SMWStore
Expand Down Expand Up @@ -140,6 +142,11 @@ class SMWSql3SmwIds {
*/
private $redirectInfoStore;

/**
* @var TableFieldUpdater
*/
private $tableFieldUpdater;

/**
* Cache for property sortkeys.
*
Expand Down Expand Up @@ -234,6 +241,7 @@ public function __construct( SMWSQLStore3 $store, IdToDataItemMatchFinder $idToD
$this->store->getConnection( 'mw.db' )
);

$this->tableFieldUpdater = new TableFieldUpdater( $store );
$this->intermediaryIdCache = ApplicationFactory::getInstance()->getInMemoryPoolCache()->getPoolCacheById( self::POOLCACHE_ID );
}

Expand Down Expand Up @@ -374,9 +382,9 @@ protected function getDatabaseIdAndSort( $title, $namespace, $iw, $subobjectName
if ( $id != 0 ) {

if ( $fetchHashes ) {
$select = array( 'smw_sortkey', 'smw_proptable_hash' );
$select = array( 'smw_sortkey', 'smw_sort', 'smw_proptable_hash' );
} else {
$select = array( 'smw_sortkey' );
$select = array( 'smw_sortkey', 'smw_sort' );
}

$row = $db->selectRow(
Expand All @@ -401,9 +409,9 @@ protected function getDatabaseIdAndSort( $title, $namespace, $iw, $subobjectName
} else {

if ( $fetchHashes ) {
$select = array( 'smw_id', 'smw_sortkey', 'smw_proptable_hash' );
$select = array( 'smw_id', 'smw_sortkey', 'smw_sort', 'smw_proptable_hash' );
} else {
$select = array( 'smw_id', 'smw_sortkey' );
$select = array( 'smw_id', 'smw_sortkey', 'smw_sort' );
}

$row = $db->selectRow(
Expand Down Expand Up @@ -663,7 +671,7 @@ protected function makeDatabaseId( $title, $namespace, $iw, $subobjectName, $can
$sequenceValue = $db->nextSequenceValue( $this->getIdTable() . '_smw_id_seq' ); // Bug 42659

// #2089 (MySQL 5.7 complained with "Data too long for column")
$sortkey = substr( $sortkey, 0, 254 );
$sortkey = mb_substr( $sortkey, 0, 254 );

$db->insert(
self::TABLE_NAME,
Expand All @@ -673,7 +681,8 @@ protected function makeDatabaseId( $title, $namespace, $iw, $subobjectName, $can
'smw_namespace' => $namespace,
'smw_iw' => $iw,
'smw_subobject' => $subobjectName,
'smw_sortkey' => $sortkey
'smw_sortkey' => $sortkey,
'smw_sort' => Collator::singleton()->getSortKey( $sortkey )
),
__METHOD__
);
Expand All @@ -699,17 +708,10 @@ protected function makeDatabaseId( $title, $namespace, $iw, $subobjectName, $can

} elseif ( $sortkey !== '' && $sortkey != $oldsort ) {

// #2089 (MySQL 5.7 complained with "Data too long for column")
$sortkey = substr( $sortkey, 0, 254 );

$db->update(
self::TABLE_NAME,
array( 'smw_sortkey' => $sortkey ),
array( 'smw_id' => $id ),
__METHOD__
);

$this->tableFieldUpdater->updateCollationField( $id, $sortkey );
$this->setCache( $title, $namespace, $iw, $subobjectName, $id, $sortkey );
} elseif ( $sortkey !== '' && !$this->tableFieldUpdater->isEqualByCollation( $oldsort, $sortkey ) ) {
$this->tableFieldUpdater->updateCollationField( $id, $sortkey );
}

$db->endAtomicTransaction( __METHOD__ );
Expand Down Expand Up @@ -899,7 +901,8 @@ public function moveSMWPageID( $curid, $targetid = 0 ) {
'smw_namespace' => $row->smw_namespace,
'smw_iw' => $row->smw_iw,
'smw_subobject' => $row->smw_subobject,
'smw_sortkey' => $row->smw_sortkey
'smw_sortkey' => $row->smw_sortkey,
'smw_sort' => $row->smw_sort
),
__METHOD__
);
Expand All @@ -913,7 +916,8 @@ public function moveSMWPageID( $curid, $targetid = 0 ) {
'smw_namespace' => $row->smw_namespace,
'smw_iw' => $row->smw_iw,
'smw_subobject' => $row->smw_subobject,
'smw_sortkey' => $row->smw_sortkey
'smw_sortkey' => $row->smw_sortkey,
'smw_sort' => $row->smw_sort
),
__METHOD__
);
Expand Down

0 comments on commit 13c0689

Please sign in to comment.