diff --git a/includes/storage/SQLStore/SMW_Sql3SmwIds.php b/includes/storage/SQLStore/SMW_Sql3SmwIds.php index 4e9b785579..ca947aa24b 100644 --- a/includes/storage/SQLStore/SMW_Sql3SmwIds.php +++ b/includes/storage/SQLStore/SMW_Sql3SmwIds.php @@ -501,7 +501,7 @@ protected function getDatabaseIdAndSort( $title, $namespace, $iw, $subobjectName * * @return [] */ - public function findDuplicateEntries() { + public function findDuplicateEntityRecords() { $connection = $this->store->getConnection( 'mw.db' ); diff --git a/maintenance/removeDuplicateEntities.php b/maintenance/removeDuplicateEntities.php new file mode 100644 index 0000000000..c1927d506d --- /dev/null +++ b/maintenance/removeDuplicateEntities.php @@ -0,0 +1,84 @@ +mDescription = 'Remove duplicates entities without active references.'; + $this->addOption( 's', 'ID starting point', false, true ); + + parent::__construct(); + } + + /** + * @see Maintenance::addDefaultParams + * + * @since 3.0 + */ + protected function addDefaultParams() { + parent::addDefaultParams(); + } + + /** + * @see Maintenance::execute + */ + public function execute() { + + if ( !defined( 'SMW_VERSION' ) ) { + $this->output( "You need to have SMW enabled in order to use this maintenance script!\n\n" ); + exit; + } + + $this->reportMessage( + "\nThe script will only dispose of those duplicate entities that have no active\n" . + "references. The log section 'untouched' contains IDs that have not been\n" . + "removed and the user is asked to verify the content and manually remove\n". + "those listed entities.\n\n" + ); + + $applicationFactory = ApplicationFactory::getInstance(); + $maintenanceFactory = $applicationFactory->newMaintenanceFactory(); + + $duplicateEntitiesDisposer = $maintenanceFactory->newDuplicateEntitiesDisposer( + $applicationFactory->getStore( 'SMW\SQLStore\SQLStore' ), + array( $this, 'reportMessage' ) + ); + + $duplicateEntityRecords = $duplicateEntitiesDisposer->findDuplicateEntityRecords(); + $duplicateEntitiesDisposer->verifyAndDispose( $duplicateEntityRecords ); + + return true; + } + + /** + * @see Maintenance::reportMessage + * + * @since 1.9 + * + * @param string $message + */ + public function reportMessage( $message ) { + $this->output( $message ); + } + +} + +$maintClass = 'SMW\Maintenance\RemoveDuplicateEntities'; +require_once( RUN_MAINTENANCE_IF_MAIN ); diff --git a/src/Maintenance/DuplicateEntitiesDisposer.php b/src/Maintenance/DuplicateEntitiesDisposer.php new file mode 100644 index 0000000000..b36a17cda7 --- /dev/null +++ b/src/Maintenance/DuplicateEntitiesDisposer.php @@ -0,0 +1,111 @@ +store = $store; + } + + /** + * @since 3.0 + */ + public function findDuplicateEntityRecords() { + return $this->store->getObjectIds()->findDuplicateEntityRecords(); + } + + /** + * @since 3.0 + * + * @param array $duplicateEntityRecords + */ + public function verifyAndDispose( array $duplicateEntityRecords ) { + + $count = count( $duplicateEntityRecords ); + $this->messageReporter->reportMessage( "Found: $count duplicates\n" ); + + if ( $count > 0 ) { + $this->doDispose( $duplicateEntityRecords ); + } + } + + private function doDispose( array $duplicateEntityRecords ) { + + $propertyTableIdReferenceDisposer = new PropertyTableIdReferenceDisposer( + $this->store + ); + + $propertyTableIdReferenceDisposer->setRedirectRemoval( true ); + $connection = $this->store->getConnection( 'mw.db' ); + + $log = [ + 'disposed' => [], + 'untouched' => [] + ]; + + $i = 0; + foreach ( $duplicateEntityRecords as $entityRecord ) { + unset( $entityRecord['count'] ); + + if ( ( $i ) % 60 === 0 ) { + $this->messageReporter->reportMessage( "\n" ); + } + + $this->messageReporter->reportMessage( '.' ); + + $res = $connection->select( + SQLStore::ID_TABLE, + [ + 'smw_id', + ], + [ + 'smw_title'=> $entityRecord['smw_title'], + 'smw_namespace'=> $entityRecord['smw_namespace'], + 'smw_iw'=> $entityRecord['smw_iw'], + 'smw_subobject'=> $entityRecord['smw_subobject'] + ], + __METHOD__ + ); + + foreach ( $res as $row ) { + if ( $propertyTableIdReferenceDisposer->isDisposable( $row->smw_id ) ) { + $propertyTableIdReferenceDisposer->cleanUpTableEntriesById( $row->smw_id ); + $log['disposed'][$row->smw_id] = $entityRecord; + } else { + $log['untouched'][$row->smw_id] = $entityRecord; + } + } + + $i++; + } + + $this->messageReporter->reportMessage( + "\n\nLog\n\n" . json_encode( $log, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE ) . "\n" + ); + } + +} diff --git a/src/Maintenance/MaintenanceFactory.php b/src/Maintenance/MaintenanceFactory.php index 2415d7fbff..7683c8b370 100644 --- a/src/Maintenance/MaintenanceFactory.php +++ b/src/Maintenance/MaintenanceFactory.php @@ -6,6 +6,7 @@ use SMW\ApplicationFactory; use SMW\MediaWiki\ManualEntryLogger; use SMW\SQLStore\PropertyStatisticsStore; +use SMW\Maintenance\DuplicateEntitiesDisposer; use SMW\SQLStore\SQLStore; use SMW\Store; @@ -108,6 +109,27 @@ public function newRebuildPropertyStatistics() { return new RebuildPropertyStatistics(); } + /** + * @since 3.0 + * + * @return DuplicateEntitiesDisposer + */ + public function newDuplicateEntitiesDisposer( Store $store, $reporterCallback = null ) { + + $messageReporter = MessageReporterFactory::getInstance()->newObservableMessageReporter(); + $messageReporter->registerReporterCallback( $reporterCallback ); + + $duplicateEntitiesDisposer = new DuplicateEntitiesDisposer( + $store + ); + + $duplicateEntitiesDisposer->setMessageReporter( + $messageReporter + ); + + return $duplicateEntitiesDisposer; + } + /** * @since 2.4 * diff --git a/src/MediaWiki/Api/Task.php b/src/MediaWiki/Api/Task.php index 4f91e25fda..4fe25d6ffa 100644 --- a/src/MediaWiki/Api/Task.php +++ b/src/MediaWiki/Api/Task.php @@ -75,7 +75,7 @@ private function callDupLookupTask( $parameters ) { return $result + ['isFromCache' => true ]; } - $rows = $applicationFactory->getStore()->getObjectIds()->findDuplicateEntries(); + $rows = $applicationFactory->getStore()->getObjectIds()->findDuplicateEntityRecords(); $result = [ 'list' => $rows, diff --git a/src/SQLStore/PropertyTableIdReferenceDisposer.php b/src/SQLStore/PropertyTableIdReferenceDisposer.php index dd1880d3d9..d04d77e81e 100644 --- a/src/SQLStore/PropertyTableIdReferenceDisposer.php +++ b/src/SQLStore/PropertyTableIdReferenceDisposer.php @@ -36,6 +36,11 @@ class PropertyTableIdReferenceDisposer { */ private $onTransactionIdle = false; + /** + * @var boolean + */ + private $redirectRemoval = false; + /** * @since 2.4 * @@ -46,6 +51,15 @@ public function __construct( SQLStore $store ) { $this->connection = $this->store->getConnection( 'mw.db' ); } + /** + * @since 3.0 + * + * @param boolean $redirectRemoval + */ + public function setRedirectRemoval( $redirectRemoval ) { + $this->redirectRemoval = $redirectRemoval; + } + /** * @note MW 1.29+ showed transaction collisions when executed using the * JobQueue in connection with purging the BagOStuff cache, use @@ -57,6 +71,17 @@ public function waitOnTransactionIdle() { $this->onTransactionIdle = true; } + /** + * @since 3.0 + * + * @param integer $id + * + * @return boolean + */ + public function isDisposable( $id ) { + return $this->store->getPropertyTableIdReferenceFinder()->hasResidualReferenceForId( $id ) === false; + } + /** * Use case: After a property changed its type (_wpg -> _txt), object values in the * ID table are not removed at the time of the conversion process. @@ -176,7 +201,7 @@ public function cleanUpTableEntriesById( $id ) { private function doRemoveEntityReferencesById( $id, $isRedirect ) { // When marked as redirect, don't remove the reference - if ( $isRedirect === false ) { + if ( $isRedirect === false || ( $isRedirect && $this->redirectRemoval ) ) { $this->connection->delete( SQLStore::ID_TABLE, array( 'smw_id' => $id ), diff --git a/tests/phpunit/Unit/Maintenance/DuplicateEntitiesDisposerTest.php b/tests/phpunit/Unit/Maintenance/DuplicateEntitiesDisposerTest.php new file mode 100644 index 0000000000..2d901b3cb8 --- /dev/null +++ b/tests/phpunit/Unit/Maintenance/DuplicateEntitiesDisposerTest.php @@ -0,0 +1,146 @@ +store = $this->getMockBuilder( '\SMW\SQLStore\SQLStore' ) + ->disableOriginalConstructor() + ->getMock(); + + $this->propertyTableIdReferenceFinder = $this->getMockBuilder( '\SMW\SQLStore\PropertyTableIdReferenceFinder' ) + ->disableOriginalConstructor() + ->getMock(); + + $this->store->expects( $this->any() ) + ->method( 'getPropertyTableIdReferenceFinder' ) + ->will( $this->returnValue( $this->propertyTableIdReferenceFinder ) ); + + $this->messageReporter = $this->getMockBuilder( '\Onoi\MessageReporter\MessageReporter' ) + ->disableOriginalConstructor() + ->getMock(); + + $this->connection = $this->getMockBuilder( '\SMW\MediaWiki\Database' ) + ->disableOriginalConstructor() + ->getMock(); + } + + public function testCanConstruct() { + + $this->assertInstanceOf( + DuplicateEntitiesDisposer::class, + new DuplicateEntitiesDisposer( $this->store ) + ); + } + + public function testFindDuplicateEntityRecords() { + + $idTable = $this->getMockBuilder( '\stdClss' ) + ->disableOriginalConstructor() + ->setMethods( [ 'findDuplicateEntityRecords' ] ) + ->getMock(); + + $this->store->expects( $this->atLeastOnce() ) + ->method( 'getObjectIds' ) + ->will( $this->returnValue( $idTable ) ); + + $instance = new DuplicateEntitiesDisposer( + $this->store + ); + + $instance->findDuplicateEntityRecords(); + } + + public function testVerifyAndDispose_NoDuplicates() { + + $this->store->expects( $this->never() ) + ->method( 'getConnection' ); + + $instance = new DuplicateEntitiesDisposer( + $this->store + ); + + $instance->setMessageReporter( + $this->messageReporter + ); + + $instance->verifyAndDispose( [] ); + } + + public function testVerifyAndDispose_WithDuplicateRecord() { + + $record = [ + 'smw_title' => 'Foo', + 'smw_namespace' => 0, + 'smw_iw' => '', + 'smw_subobject' => '' + ]; + + $row = new \stdClass; + $row->smw_id = 42; + + $this->connection->expects( $this->atLeastOnce() ) + ->method( 'select' ) + ->with( + $this->anything(), + $this->anything(), + $this->equalTo( $record ), + $this->anything() ) + ->will( $this->returnValue( [ $row ] ) ); + + $this->store->expects( $this->atLeastOnce() ) + ->method( 'getConnection' ) + ->will( $this->returnValue( $this->connection ) ); + + $idTable = $this->getMockBuilder( '\stdClss' ) + ->disableOriginalConstructor() + ->setMethods( [ 'getDataItemById' ] ) + ->getMock(); + + $this->store->expects( $this->atLeastOnce() ) + ->method( 'getObjectIds' ) + ->will( $this->returnValue( $idTable ) ); + + $this->store->expects( $this->any() ) + ->method( 'getPropertyTables' ) + ->will( $this->returnValue( [] ) ); + + $this->propertyTableIdReferenceFinder->expects( $this->atLeastOnce() ) + ->method( 'hasResidualReferenceForId' ) + ->will( $this->returnValue( false ) ); + + $instance = new DuplicateEntitiesDisposer( + $this->store + ); + + $instance->setMessageReporter( + $this->messageReporter + ); + + $duplicates = [ + $record + ]; + + $instance->verifyAndDispose( $duplicates ); + } + +} diff --git a/tests/phpunit/Unit/Maintenance/MaintenanceFactoryTest.php b/tests/phpunit/Unit/Maintenance/MaintenanceFactoryTest.php index 070eba8921..936a9c653c 100644 --- a/tests/phpunit/Unit/Maintenance/MaintenanceFactoryTest.php +++ b/tests/phpunit/Unit/Maintenance/MaintenanceFactoryTest.php @@ -83,6 +83,16 @@ public function testCanConstructRebuildPropertyStatistics() { ); } + public function testCanConstructDuplicateEntitiesDisposer() { + + $instance = new MaintenanceFactory(); + + $this->assertInstanceOf( + '\SMW\Maintenance\DuplicateEntitiesDisposer', + $instance->newDuplicateEntitiesDisposer( $this->store ) + ); + } + public function testCanConstructMaintenanceLogger() { $instance = new MaintenanceFactory(); diff --git a/tests/phpunit/Unit/MediaWiki/Api/TaskTest.php b/tests/phpunit/Unit/MediaWiki/Api/TaskTest.php index de87642caf..11cdc4ccca 100644 --- a/tests/phpunit/Unit/MediaWiki/Api/TaskTest.php +++ b/tests/phpunit/Unit/MediaWiki/Api/TaskTest.php @@ -95,7 +95,7 @@ public function testDupLookupTask() { ->getMock(); $entityTable->expects( $this->atLeastOnce() ) - ->method( 'findDuplicateEntries' ) + ->method( 'findDuplicateEntityRecords' ) ->will( $this->returnValue( [] ) ); $store = $this->getMockBuilder( '\SMW\SQLStore\SQLStore' ) diff --git a/tests/phpunit/Unit/SQLStore/PropertyTableIdReferenceDisposerTest.php b/tests/phpunit/Unit/SQLStore/PropertyTableIdReferenceDisposerTest.php index c32c60fcad..1df026dad3 100644 --- a/tests/phpunit/Unit/SQLStore/PropertyTableIdReferenceDisposerTest.php +++ b/tests/phpunit/Unit/SQLStore/PropertyTableIdReferenceDisposerTest.php @@ -60,11 +60,35 @@ protected function tearDown() { public function testCanConstruct() { $this->assertInstanceOf( - '\SMW\SQLStore\PropertyTableIdReferenceDisposer', + PropertyTableIdReferenceDisposer::class, new PropertyTableIdReferenceDisposer( $this->store ) ); } + public function testIsDisposable() { + + $propertyTableIdReferenceFinder = $connection = $this->getMockBuilder( '\SMW\SQLStore\PropertyTableIdReferenceFinder' ) + ->disableOriginalConstructor() + ->getMock(); + + $propertyTableIdReferenceFinder->expects( $this->any() ) + ->method( 'hasResidualReferenceForId' ) + ->with( $this->equalTo( 42 ) ) + ->will( $this->returnValue( false ) ); + + $this->store->expects( $this->any() ) + ->method( 'getPropertyTableIdReferenceFinder' ) + ->will( $this->returnValue( $propertyTableIdReferenceFinder ) ); + + $instance = new PropertyTableIdReferenceDisposer( + $this->store + ); + + $this->assertTrue( + $instance->isDisposable( 42 ) + ); + } + public function testTryToRemoveOutdatedEntryFromIDTable() { $tableDefinition = $connection = $this->getMockBuilder( '\SMW\SQLStore\TableDefinition' ) diff --git a/tests/phpunit/includes/storage/sqlstore/SQLStoreSmwIdsTest.php b/tests/phpunit/includes/storage/sqlstore/SQLStoreSmwIdsTest.php index 741f126094..1ab20b318b 100644 --- a/tests/phpunit/includes/storage/sqlstore/SQLStoreSmwIdsTest.php +++ b/tests/phpunit/includes/storage/sqlstore/SQLStoreSmwIdsTest.php @@ -355,7 +355,7 @@ public function testFindDuplicateEntries() { $this->assertEquals( [ $expected ], - $instance->findDuplicateEntries() + $instance->findDuplicateEntityRecords() ); }