Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds script to remove duplicates, refs 2883 #2932

Merged
merged 1 commit into from Jan 6, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion includes/storage/SQLStore/SMW_Sql3SmwIds.php
Expand Up @@ -501,7 +501,7 @@ protected function getDatabaseIdAndSort( $title, $namespace, $iw, $subobjectName
*
* @return []
*/
public function findDuplicateEntries() {
public function findDuplicateEntityRecords() {

$connection = $this->store->getConnection( 'mw.db' );

Expand Down
84 changes: 84 additions & 0 deletions maintenance/removeDuplicateEntities.php
@@ -0,0 +1,84 @@
<?php

namespace SMW\Maintenance;

use SMW\ApplicationFactory;

$basePath = getenv( 'MW_INSTALL_PATH' ) !== false ? getenv(
'MW_INSTALL_PATH' ) : __DIR__ . '/../../..';

require_once $basePath . '/maintenance/Maintenance.php';

/**
* @license GNU GPL v2+
* @since 3.0
*
* @author mwjames
*/
class RemoveDuplicateEntities extends \Maintenance {

/**
* @since 3.0
*/
public function __construct() {
$this->mDescription = 'Remove duplicates entities without active references.';
$this->addOption( 's', 'ID starting point', false, true );

parent::__construct();
}

/**
* @see Maintenance::addDefaultParams
*
* @since 3.0
*/
protected function addDefaultParams() {
parent::addDefaultParams();
}

/**
* @see Maintenance::execute
*/
public function execute() {

if ( !defined( 'SMW_VERSION' ) ) {
$this->output( "You need to have SMW enabled in order to use this maintenance script!\n\n" );
exit;
}

$this->reportMessage(
"\nThe script will only dispose of those duplicate entities that have no active\n" .
"references. The log section 'untouched' contains IDs that have not been\n" .
"removed and the user is asked to verify the content and manually remove\n".
"those listed entities.\n\n"
);

$applicationFactory = ApplicationFactory::getInstance();
$maintenanceFactory = $applicationFactory->newMaintenanceFactory();

$duplicateEntitiesDisposer = $maintenanceFactory->newDuplicateEntitiesDisposer(
$applicationFactory->getStore( 'SMW\SQLStore\SQLStore' ),
array( $this, 'reportMessage' )
);

$duplicateEntityRecords = $duplicateEntitiesDisposer->findDuplicateEntityRecords();
$duplicateEntitiesDisposer->verifyAndDispose( $duplicateEntityRecords );

return true;
}

/**
* @see Maintenance::reportMessage
*
* @since 1.9
*
* @param string $message
*/
public function reportMessage( $message ) {
$this->output( $message );
}

}

$maintClass = 'SMW\Maintenance\RemoveDuplicateEntities';
require_once( RUN_MAINTENANCE_IF_MAIN );
111 changes: 111 additions & 0 deletions src/Maintenance/DuplicateEntitiesDisposer.php
@@ -0,0 +1,111 @@
<?php

namespace SMW\Maintenance;

use Onoi\MessageReporter\MessageReporterAwareTrait;
use SMW\Store;
use SMW\SQLStore\SQLStore;
use SMW\SQLStore\PropertyTableIdReferenceDisposer;

/**
* @license GNU GPL v2+
* @since 3.0
*
* @author mwjames
*/
class DuplicateEntitiesDisposer {

use MessageReporterAwareTrait;

/**
* @var Store
*/
private $store = null;

/**
* @since 3.0
*
* @param Store $store
*/
public function __construct( Store $store ) {
$this->store = $store;
}

/**
* @since 3.0
*/
public function findDuplicateEntityRecords() {
return $this->store->getObjectIds()->findDuplicateEntityRecords();
}

/**
* @since 3.0
*
* @param array $duplicateEntityRecords
*/
public function verifyAndDispose( array $duplicateEntityRecords ) {

$count = count( $duplicateEntityRecords );
$this->messageReporter->reportMessage( "Found: $count duplicates\n" );

if ( $count > 0 ) {
$this->doDispose( $duplicateEntityRecords );
}
}

private function doDispose( array $duplicateEntityRecords ) {

$propertyTableIdReferenceDisposer = new PropertyTableIdReferenceDisposer(
$this->store
);

$propertyTableIdReferenceDisposer->setRedirectRemoval( true );
$connection = $this->store->getConnection( 'mw.db' );

$log = [
'disposed' => [],
'untouched' => []
];

$i = 0;
foreach ( $duplicateEntityRecords as $entityRecord ) {
unset( $entityRecord['count'] );

if ( ( $i ) % 60 === 0 ) {
$this->messageReporter->reportMessage( "\n" );
}

$this->messageReporter->reportMessage( '.' );

$res = $connection->select(
SQLStore::ID_TABLE,
[
'smw_id',
],
[
'smw_title'=> $entityRecord['smw_title'],
'smw_namespace'=> $entityRecord['smw_namespace'],
'smw_iw'=> $entityRecord['smw_iw'],
'smw_subobject'=> $entityRecord['smw_subobject']
],
__METHOD__
);

foreach ( $res as $row ) {
if ( $propertyTableIdReferenceDisposer->isDisposable( $row->smw_id ) ) {
$propertyTableIdReferenceDisposer->cleanUpTableEntriesById( $row->smw_id );
$log['disposed'][$row->smw_id] = $entityRecord;
} else {
$log['untouched'][$row->smw_id] = $entityRecord;
}
}

$i++;
}

$this->messageReporter->reportMessage(
"\n\nLog\n\n" . json_encode( $log, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE ) . "\n"
);
}

}
22 changes: 22 additions & 0 deletions src/Maintenance/MaintenanceFactory.php
Expand Up @@ -6,6 +6,7 @@
use SMW\ApplicationFactory;
use SMW\MediaWiki\ManualEntryLogger;
use SMW\SQLStore\PropertyStatisticsStore;
use SMW\Maintenance\DuplicateEntitiesDisposer;
use SMW\SQLStore\SQLStore;
use SMW\Store;

Expand Down Expand Up @@ -108,6 +109,27 @@ public function newRebuildPropertyStatistics() {
return new RebuildPropertyStatistics();
}

/**
* @since 3.0
*
* @return DuplicateEntitiesDisposer
*/
public function newDuplicateEntitiesDisposer( Store $store, $reporterCallback = null ) {

$messageReporter = MessageReporterFactory::getInstance()->newObservableMessageReporter();
$messageReporter->registerReporterCallback( $reporterCallback );

$duplicateEntitiesDisposer = new DuplicateEntitiesDisposer(
$store
);

$duplicateEntitiesDisposer->setMessageReporter(
$messageReporter
);

return $duplicateEntitiesDisposer;
}

/**
* @since 2.4
*
Expand Down
2 changes: 1 addition & 1 deletion src/MediaWiki/Api/Task.php
Expand Up @@ -75,7 +75,7 @@ private function callDupLookupTask( $parameters ) {
return $result + ['isFromCache' => true ];
}

$rows = $applicationFactory->getStore()->getObjectIds()->findDuplicateEntries();
$rows = $applicationFactory->getStore()->getObjectIds()->findDuplicateEntityRecords();

$result = [
'list' => $rows,
Expand Down
27 changes: 26 additions & 1 deletion src/SQLStore/PropertyTableIdReferenceDisposer.php
Expand Up @@ -36,6 +36,11 @@ class PropertyTableIdReferenceDisposer {
*/
private $onTransactionIdle = false;

/**
* @var boolean
*/
private $redirectRemoval = false;

/**
* @since 2.4
*
Expand All @@ -46,6 +51,15 @@ public function __construct( SQLStore $store ) {
$this->connection = $this->store->getConnection( 'mw.db' );
}

/**
* @since 3.0
*
* @param boolean $redirectRemoval
*/
public function setRedirectRemoval( $redirectRemoval ) {
$this->redirectRemoval = $redirectRemoval;
}

/**
* @note MW 1.29+ showed transaction collisions when executed using the
* JobQueue in connection with purging the BagOStuff cache, use
Expand All @@ -57,6 +71,17 @@ public function waitOnTransactionIdle() {
$this->onTransactionIdle = true;
}

/**
* @since 3.0
*
* @param integer $id
*
* @return boolean
*/
public function isDisposable( $id ) {
return $this->store->getPropertyTableIdReferenceFinder()->hasResidualReferenceForId( $id ) === false;
}

/**
* Use case: After a property changed its type (_wpg -> _txt), object values in the
* ID table are not removed at the time of the conversion process.
Expand Down Expand Up @@ -176,7 +201,7 @@ public function cleanUpTableEntriesById( $id ) {
private function doRemoveEntityReferencesById( $id, $isRedirect ) {

// When marked as redirect, don't remove the reference
if ( $isRedirect === false ) {
if ( $isRedirect === false || ( $isRedirect && $this->redirectRemoval ) ) {
$this->connection->delete(
SQLStore::ID_TABLE,
array( 'smw_id' => $id ),
Expand Down