Skip to content

Commit

Permalink
Use ReferenceLookup to determine if a resource is in use (#4093)
Browse files Browse the repository at this point in the history
  • Loading branch information
jastraat committed Jan 29, 2024
1 parent 5a99938 commit 30d823e
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 41 deletions.
1 change: 1 addition & 0 deletions modules/metastore/metastore.services.yml
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ services:
- '@logger.factory'
- '@dkan.metastore.service'
- '@dkan.metastore.resource_mapper'
- '@dkan.metastore.reference_lookup'
tags:
- { name: event_subscriber }

Expand Down
41 changes: 25 additions & 16 deletions modules/metastore/src/EventSubscriber/MetastoreSubscriber.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@

use Drupal\common\DataResource;
use Drupal\common\Events\Event;
use Drupal\common\UrlHostTokenResolver;
use Drupal\Core\Logger\LoggerChannelFactory;
use Drupal\metastore\MetastoreService;
use Drupal\metastore\Plugin\QueueWorker\OrphanReferenceProcessor;
use Drupal\metastore\ReferenceLookupInterface;
use Drupal\metastore\ResourceMapper;
use Symfony\Component\DependencyInjection\ContainerInterface;
use Symfony\Component\EventDispatcher\EventSubscriberInterface;
Expand Down Expand Up @@ -38,6 +38,13 @@ class MetastoreSubscriber implements EventSubscriberInterface {
*/
protected ResourceMapper $resourceMapper;

/**
* The dkan.metastore.reference_lookup service.
*
* @var \Drupal\metastore\ReferenceLookupInterface
*/
private $referenceLookup;

/**
* Inherited.
*
Expand All @@ -47,7 +54,8 @@ public static function create(ContainerInterface $container) {
return new static(
$container->get('logger.factory'),
$container->get('dkan.metastore.service'),
$container->get('dkan.metastore.resource_mapper')
$container->get('dkan.metastore.resource_mapper'),
$container->get('dkan.metastore.reference_lookup')
);
}

Expand All @@ -60,11 +68,14 @@ public static function create(ContainerInterface $container) {
* The dkan.metastore.service service.
* @param \Drupal\metastore\ResourceMapper $resourceMapper
* The dkan.metastore.resource_mapper.
* @param \Drupal\metastore\ReferenceLookupInterface $referenceLookup
* The dkan.metastore.reference_lookup service.
*/
public function __construct(LoggerChannelFactory $logger_factory, MetastoreService $service, ResourceMapper $resourceMapper) {
public function __construct(LoggerChannelFactory $logger_factory, MetastoreService $service, ResourceMapper $resourceMapper, ReferenceLookupInterface $referenceLookup) {
$this->loggerFactory = $logger_factory;
$this->service = $service;
$this->resourceMapper = $resourceMapper;
$this->referenceLookup = $referenceLookup;
}

/**
Expand Down Expand Up @@ -100,10 +111,12 @@ public function cleanResourceMapperTable(Event $event) {
$resource_id = $resourceParams['identifier'] ?? NULL;
$perspective = $resourceParams['perspective'] ?? NULL;
$version = $resourceParams['version'] ?? NULL;
$resource_id_wo_perspective = $resource_id . '__' . $version;
$resource = $this->resourceMapper->get($resource_id, $perspective, $version);

// Ensure a valid ID, perspective, and version were found for the given
// distribution.
if ($resource instanceof DataResource && !$this->resourceInUseElsewhere($distribution_id, $resource->getFilePath())) {
if ($resource instanceof DataResource && !$this->resourceInUseElsewhere($distribution_id, $resource_id_wo_perspective)) {
// Remove resource entry for metadata resource mapper.
$this->resourceMapper->remove($resource);
}
Expand All @@ -115,24 +128,20 @@ public function cleanResourceMapperTable(Event $event) {
*
* @param string $dist_id
* The uuid of the distribution where this resource is know to be in use.
* @param string $file_path
* The file path of the resource being checked.
* @param string $resource_id
* The identifier of the resource.
*
* @return bool
* Whether the resource is in use elsewhere.
*
* @todo Abstract out "distribution" and field_data_type.
*/
private function resourceInUseElsewhere(string $dist_id, string $file_path): bool {
// Iterate over the metadata for all dataset distributions.
foreach ($this->service->getAll('distribution') as $metadata) {
// Attempt to determine the filepath for this distribution's resource.
$dist_file_path = UrlHostTokenResolver::hostify($metadata->{'$.data.downloadURL'} ?? '');
// If the current distribution does is not the excluded distribution, and
// it's resource file path matches the supplied file path...
if ($metadata->{'$.identifier'} !== $dist_id && !empty($dist_file_path) && $dist_file_path === $file_path) {
// Another distribution with the same resource was found, meaning the
// resource is still in use.
private function resourceInUseElsewhere(string $dist_id, string $resource_id): bool {
$distributions = $this->referenceLookup->getReferencers('distribution', $resource_id, 'downloadURL');

// Check if any other distributions reference it.
foreach ($distributions as $distribution) {
if ($distribution != $dist_id) {
return TRUE;
}
}
Expand Down
5 changes: 5 additions & 0 deletions modules/metastore/tests/src/Unit/MetastoreSubscriberTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
use Drupal\common\Storage\JobStore;
use Drupal\metastore\EventSubscriber\MetastoreSubscriber;
use Drupal\metastore\MetastoreService;
use Drupal\metastore\ReferenceLookupInterface;
use Drupal\metastore\ResourceMapper;
use Drupal\Tests\metastore\Unit\MetastoreServiceTest;

Expand Down Expand Up @@ -125,6 +126,7 @@ public function testCleanupWithResourceNotInUseElsewhere(): void {
->add('logger.factory', LoggerChannelFactory::class)
->add('dkan.metastore.service', MetastoreService::class)
->add('dkan.metastore.resource_mapper', ResourceMapper::class)
->add('dkan.metastore.reference_lookup', ReferenceLookupInterface::class)
->add('database', Connection::class)
->index(0);
$chain = (new Chain($this))
Expand All @@ -133,6 +135,7 @@ public function testCleanupWithResourceNotInUseElsewhere(): void {
->add(MetastoreService::class, 'getAll', [$distribution])
->add(ResourceMapper::class, 'get', $resource)
->add(ResourceMapper::class, 'remove', new \Exception($removal_message))
->add(ReferenceLookupInterface::class, 'getReferencers', [$distribution->{'$.identifier'}])
->add(LoggerChannelFactory::class, 'get', LoggerChannelInterface::class)
->add(LoggerChannelInterface::class, 'error', NULL, 'errors');

Expand Down Expand Up @@ -173,6 +176,7 @@ public function testCleanupWithResourceInUseElsewhere(): void {
->add('logger.factory', LoggerChannelFactory::class)
->add('dkan.metastore.service', MetastoreService::class)
->add('dkan.metastore.resource_mapper', ResourceMapper::class)
->add('dkan.metastore.reference_lookup', ReferenceLookupInterface::class)
->add('database', Connection::class)
->index(0);

Expand All @@ -182,6 +186,7 @@ public function testCleanupWithResourceInUseElsewhere(): void {
->add(MetastoreService::class, 'getAll', [$distribution_1, $distribution_2])
->add(ResourceMapper::class, 'get', $resource)
->add(ResourceMapper::class, 'remove', new \LogicException('Erroneous attempt to remove resource which is in use elsewhere'))
->add(ReferenceLookupInterface::class, 'getReferencers', [$distribution_1->{'$.identifier'}, $distribution_2->{'$.identifier'}])
->add(LoggerChannelFactory::class, 'get', LoggerChannelInterface::class)
->add(LoggerChannelInterface::class, 'error', NULL, 'errors');

Expand Down
70 changes: 45 additions & 25 deletions tests/src/Functional/DatasetBTBTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -259,9 +259,7 @@ public function testDraftWorkflowUpdateDistributionTitleSourcePerspective() {

/**
* Test draft moderation workflow with distribution title update and local_url resource perspective.
* Current fails due to faulty logic in MetastoreSubscriber::resourceInUseElsewhere.
* Should use ReferenceLookup similar to ResourcePurger::resourceNotShared.
*/
public function testDraftWorkflowUpdateDistributionTitleLocalPerspective() {
// Set resource perspective to local_url.
$this->config('metastore.settings')
Expand All @@ -270,7 +268,7 @@ public function testDraftWorkflowUpdateDistributionTitleLocalPerspective() {

$this->runDraftWorkflowUpdateDistributionTitle();
}
*/


/**
* Test cleanup of orphaned draft distributions.
Expand Down Expand Up @@ -313,6 +311,7 @@ public function testOrphanDraftDistributionCleanup() {

// Simulate datastore_import and cleanup queues post update.
$this->runQueues([
'localize_import',
'datastore_import',
'orphan_reference_processor',
'orphan_resource_remover',
Expand Down Expand Up @@ -675,6 +674,7 @@ private function createInitialDraftDatasetAndPublish(string $identifier): void {
// Simulate all possible queues post publish.
// Should only include post_import (not included earlier) and resource_purger.
$this->runQueues([
'localize_import',
'datastore_import',
'resource_purger',
'orphan_reference_processor',
Expand Down Expand Up @@ -733,7 +733,7 @@ private function confirmNewDatastoreImportDraftWorkflow(string $identifier): voi
$distributionTablePublishedUpdated = $metadata['published_revision']['distributions'][0]['table_name'] ?? '';

// Load previous distribution node and its moderation state.
$entityManager = \Drupal::entityTypeManager()->getStorage('node');
$entityManager = $this->getNodeStorage();
$distributionNodeOld = $entityManager->loadByProperties(['uuid' => $distributionUuidOld]);
$distributionNodeOld = reset($distributionNodeOld);
$distributionStateOld = $distributionNodeOld->get('moderation_state')->getString();
Expand Down Expand Up @@ -796,56 +796,76 @@ private function runDraftWorkflowUpdateDistributionTitle(): void {
$distribution->format = 'csv';
$distribution->mediaType = 'text/csv';

// Run distribution title update with cron run between update and publish events.
$this->runDistributionTitleUpdate($id_1, $distribution);

// Run distribution title update with cron run only after publish.
$distribution->title = 'Second Update to Distribution #0 for ' . $id_1;
$this->runDistributionTitleUpdate($id_1, $distribution, TRUE);
}

/**
* Separate distribution title update to allow for multiple runs.
*/
private function runDistributionTitleUpdate(string $identifier, \stdClass $distribution, bool $skip_cron = FALSE) {
// Create a new draft with the new distribution title.
$this->getMetastore()->patch('dataset', $id_1, json_encode(
$this->getMetastore()->patch('dataset', $identifier, json_encode(
['distribution' => [$distribution]]
));

// Simulate all possible queues post update.
// Should NOT include datastore_import.
$this->runQueues([
'datastore_import',
'resource_purger',
'orphan_reference_processor',
'orphan_resource_remover',
'post_import',
]);
$datasetInfoService = $this->container->get('dkan.common.dataset_info');
$databaseSchema = $this->container->get('database')->schema();
$entityManager = $this->getNodeStorage();

if (!$skip_cron) {
// Simulate cron by running all possible queues post update.
// Should NOT include datastore_import.
$this->runQueues([
'localize_import',
'datastore_import',
'resource_purger',
'orphan_reference_processor',
'orphan_resource_remover',
'post_import',
]);
}

// Get dataset info.
$datasetInfoService = $this->container->get('dkan.common.dataset_info');
$metadata = $datasetInfoService->gather($id_1);
$distributionTableLatest = $metadata['latest_revision']['distributions'][0]['table_name'];
$distributionTablePublished = $metadata['published_revision']['distributions'][0]['table_name'] ?? '';
$metadata = $datasetInfoService->gather($identifier);
// Store old distribution UUID for later comparison.
$distributionUuidOld = $metadata['published_revision']['distributions'][0]['distribution_uuid'] ?? '';

// Make sure there are both latest and published versions that share the same table.
// Make sure we aren't creating a new datastore table with this update.
$distributionTableLatest = $metadata['latest_revision']['distributions'][0]['table_name'];
$distributionTablePublished = $metadata['published_revision']['distributions'][0]['table_name'] ?? '';
$this->assertNotEmpty($distributionTablePublished, 'Draft revision exists.');
$this->assertEquals($distributionTableLatest, $distributionTablePublished, 'Same distribution used for latest and published revisions.');

// Confirm latest/published distribution table exists.
$databaseSchema = $this->container->get('database')->schema();
$distributionTableLatestExists = $databaseSchema->tableExists($distributionTableLatest);
$this->assertTrue($distributionTableLatestExists, $distributionTableLatest . ' exists.');

// Publish the draft dataset revision.
$this->getMetastore()->publish('dataset', $id_1);
$this->getMetastore()->publish('dataset', $identifier);

// Simulate all possible queues post update.
// Simulate cron by running all possible queues post publish.
// Should NOT include datastore_import.
$this->runQueues([
'localize_import',
'datastore_import',
'resource_purger',
'orphan_reference_processor',
'orphan_resource_remover',
'post_import',
]);

$metadata = $datasetInfoService->gather($id_1);
// Get dataset info again.
$metadata = $datasetInfoService->gather($identifier);
$distributionTableLatestNew = $metadata['latest_revision']['distributions'][0]['table_name'];
$distributionUuidLatestNew = $metadata['latest_revision']['distributions'][0]['distribution_uuid'];
$distributionTablePublishedUpdated = $metadata['published_revision']['distributions'][0]['table_name'] ?? '';

// Load previous distribution node and its moderation state.
$entityManager = \Drupal::entityTypeManager()->getStorage('node');
$distributionNodeOld = $entityManager->loadByProperties(['uuid' => $distributionUuidOld]);
$distributionNodeOld = reset($distributionNodeOld);
$distributionStateOld = $distributionNodeOld->get('moderation_state')->getString();
Expand Down

0 comments on commit 30d823e

Please sign in to comment.