Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#4075: Catch Guzzle Exception to avoid breaking harvest/dataset entry #4084

Merged
merged 5 commits into from
Dec 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions modules/metastore/metastore.services.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ services:
- '@config.factory'
- '@dkan.metastore.storage'
- '@dkan.metastore.url_generator'
- '@http_client'
calls:
- [setLoggerFactory, ['@logger.factory']]

Expand Down
29 changes: 22 additions & 7 deletions modules/metastore/src/Reference/Referencer.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
use Drupal\metastore\MetastoreService;
use Drupal\metastore\ResourceMapper;

use GuzzleHttp\Client as GuzzleClient;
use GuzzleHttp\Client;
use GuzzleHttp\Exception\GuzzleException;

/**
* Metastore referencer service.
Expand All @@ -26,7 +27,7 @@ class Referencer {
*
* @var string
*/
protected const DEFAULT_MIME_TYPE = 'text/plain';
public const DEFAULT_MIME_TYPE = 'text/plain';

/**
* Storage factory interface service.
Expand All @@ -42,18 +43,27 @@ class Referencer {
*/
public MetastoreUrlGenerator $metastoreUrlGenerator;

/**
* Guzzle HTTP client.
*
* @var \GuzzleHttp\Client
*/
private Client $httpClient;

/**
* Constructor.
*/
public function __construct(
ConfigFactoryInterface $configService,
FactoryInterface $storageFactory,
MetastoreUrlGenerator $metastoreUrlGenerator
MetastoreUrlGenerator $metastoreUrlGenerator,
Client $httpClient
) {
$this->setConfigService($configService);
$this->storageFactory = $storageFactory;
$this->setLoggerFactory(\Drupal::service('logger.factory'));
$this->metastoreUrlGenerator = $metastoreUrlGenerator;
$this->httpClient = $httpClient;
}

/**
Expand Down Expand Up @@ -342,8 +352,12 @@ private function getRemoteMimeType(string $downloadUrl): ?string {

// Perform HTTP Head request against the supplied URL in order to determine
// the content type of the remote resource.
$client = new GuzzleClient();
$response = $client->head($downloadUrl);
try {
$response = $this->httpClient->head($downloadUrl);
}
catch (GuzzleException $exception) {
return $mime_type;
}
// Extract the full value of the content type header.
$content_type = $response->getHeader('Content-Type');
// Attempt to extract the mime type from the content type header.
Expand All @@ -366,7 +380,7 @@ private function getRemoteMimeType(string $downloadUrl): ?string {
* @todo Update the UI to set mediaType when a format is selected.
*/
private function getMimeType($distribution): string {
$mimeType = "text/plain";
$mimeType = self::DEFAULT_MIME_TYPE;

// If we have a mediaType set, use that.
if (isset($distribution->mediaType)) {
Expand All @@ -384,7 +398,8 @@ private function getMimeType($distribution): string {
elseif (isset($distribution->downloadURL)) {
// Determine whether the supplied distribution has a local or remote
// resource.
$is_local = $distribution->downloadURL !== UrlHostTokenResolver::hostify($distribution->downloadURL);
$is_local = $distribution->downloadURL !==
UrlHostTokenResolver::hostify($distribution->downloadURL);
$mimeType = $is_local ?
$this->getLocalMimeType($distribution->downloadURL) :
$this->getRemoteMimeType($distribution->downloadURL);
Expand Down
43 changes: 37 additions & 6 deletions modules/metastore/tests/src/Unit/Reference/ReferencerTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
use Drupal\node\Entity\Node;
use Drupal\node\NodeStorage;

use GuzzleHttp\Client;
use GuzzleHttp\Exception\ConnectException;
use MockChain\Chain;
use MockChain\Options;
Expand All @@ -35,6 +36,13 @@
use Symfony\Component\HttpFoundation\Request;
use Symfony\Component\HttpFoundation\RequestStack;

/**
* @covers \Drupal\metastore\Reference\Referencer
* @coversDefaultClass \Drupal\metastore\Reference\Referencer
*
* @group dkan
* @group metastore
*/
class ReferencerTest extends TestCase {

/**
Expand Down Expand Up @@ -110,8 +118,12 @@ private function mockReferencer($existing = TRUE) {
->add(MetastoreUrlGenerator::class, 'uriFromUrl', 'dkan://metastore/schemas/data-dictionary/items/111')
->getMock();

$referencer = new Referencer($configService, $storageFactory, $urlGenerator);
return $referencer;
return new Referencer(
$configService,
$storageFactory,
$urlGenerator,
new Client()
);
}

private function getContainer() {
Expand Down Expand Up @@ -375,6 +387,10 @@ private function getData(string $downloadUrl, string $mediaType = NULL): object

/**
* Test the remote/local file mime type detection logic.
*
* @covers ::getLocalMimeType
* @covers ::getMimeType
* @covers ::getRemoteMimeType
*/
public function testMimeTypeDetection(): void {
// Initialize mock node class.
Expand Down Expand Up @@ -430,7 +446,12 @@ public function getMimeType() { return ReferencerTest::MIME_TYPE; }
->add(MetastoreUrlGenerator::class, 'uriFromUrl', '')
->getMock();

$referencer = new Referencer($configService, $storageFactory, $urlGenerator);
$referencer = new Referencer(
$configService,
$storageFactory,
$urlGenerator,
new Client()
);

// Test Mime Type detection using the resource `mediaType` property.
$data = $this->getData(self::HOST . '/' . self::FILE_PATH, self::MIME_TYPE);
Expand All @@ -444,10 +465,11 @@ public function getMimeType() { return ReferencerTest::MIME_TYPE; }
$data = $this->getData('https://dkan-default-content-files.s3.amazonaws.com/phpunit/district_centerpoints_small.csv');
$referencer->reference($data);
$this->assertEquals(self::MIME_TYPE, $container_chain->getStoredInput('resource')[0]->getMimeType(), 'Unable to fetch MIME type for remote file');
// Test Mime Type detection on a invalid remote file path.
// Test Mime Type detection on a invalid remote file path. Defaults to
// text/plain.
$data = $this->getData('http://invalid');
$this->expectException(ConnectException::class);
$referencer->reference($data);
$this->assertEquals(Referencer::DEFAULT_MIME_TYPE, $container_chain->getStoredInput('resource')[0]->getMimeType(), 'Did not use default MIME type for inaccessible remote file.');
}

/**
Expand Down Expand Up @@ -480,7 +502,16 @@ public function testDistributionHandlingDataDict($distribution, $describedBy) {
)
->getMock();

$referencer = new Referencer($configService, $storageFactory, $urlGenerator);
$http_client = $this->getMockBuilder(Client::class)
->disableOriginalConstructor()
->getMock();

$referencer = new Referencer(
$configService,
$storageFactory,
$urlGenerator,
$http_client
);

if ($describedBy instanceof \Exception) {
$this->expectException(get_class($describedBy));
Expand Down