Skip to content

Commit

Permalink
[BUGFIX] Better handling of requests in ExternalLinktype
Browse files Browse the repository at this point in the history
- Always check for existing response
- If HEAD request fails, a GET request should always be triggered
- Restructured, moved some local variables to class variables
- In case of redirect loop, output exception message instead of
  location and status code

Resolves: #83611
Resolves: #85067
Releases: master, 8.7
Change-Id: I1cf6ef4e3dbaa5fbc683affc7cf96a0dbeea75cd
Reviewed-on: https://review.typo3.org/57356
Tested-by: TYPO3com <no-reply@typo3.com>
Reviewed-by: Stefan Neufeind <typo3.neufeind@speedpartner.de>
Tested-by: Stefan Neufeind <typo3.neufeind@speedpartner.de>
  • Loading branch information
sypets authored and neufeind committed Jun 24, 2018
1 parent c78e9b4 commit 8833bf9
Showing 1 changed file with 94 additions and 54 deletions.
148 changes: 94 additions & 54 deletions typo3/sysext/linkvalidator/Classes/Linktype/ExternalLinktype.php
Expand Up @@ -15,7 +15,6 @@
*/

use GuzzleHttp\Cookie\CookieJar;
use GuzzleHttp\Exception\TooManyRedirectsException;
use Mso\IdnaConvert\IdnaConvert;
use TYPO3\CMS\Core\Http\RequestFactory;
use TYPO3\CMS\Core\Utility\GeneralUtility;
Expand Down Expand Up @@ -46,70 +45,99 @@ class ExternalLinktype extends AbstractLinktype
*/
protected $additionalHeaders = [];

/**
* @var RequestFactory
*/
protected $requestFactory;

/**
* @var array $this->errorParams
*/
protected $errorParams = [];

public function __construct(RequestFactory $requestFactory = null)
{
$this->requestFactory = $requestFactory ?: GeneralUtility::makeInstance(RequestFactory::class);
}

/**
* Checks a given URL for validity
*
* @param string $url The URL to check
* @param string $origUrl The URL to check
* @param array $softRefEntry The soft reference entry which builds the context of that URL
* @param \TYPO3\CMS\Linkvalidator\LinkAnalyzer $reference Parent instance
* @return bool TRUE on success or FALSE on error
* @throws \InvalidArgumentException
*/
public function checkLink($url, $softRefEntry, $reference)
public function checkLink($origUrl, $softRefEntry, $reference)
{
$errorParams = [];
$isValidUrl = true;
if (isset($this->urlReports[$url])) {
if (!$this->urlReports[$url]) {
if (is_array($this->urlErrorParams[$url])) {
$this->setErrorParams($this->urlErrorParams[$url]);
}
}
return $this->urlReports[$url];
// use URL from cache, if available
if (isset($this->urlReports[$origUrl])) {
$this->setErrorParams($this->urlErrorParams[$origUrl]);
return $this->urlReports[$origUrl];
}
$options = [
'cookies' => GeneralUtility::makeInstance(CookieJar::class),
'allow_redirects' => ['strict' => true]
];

$requestFactory = GeneralUtility::makeInstance(RequestFactory::class);
try {
$url = $this->preprocessUrl($url);
$response = $requestFactory->request($url, 'HEAD', $options);
// HEAD was not allowed or threw an error, now trying GET
if ($response->getStatusCode() >= 400) {
$url = $this->preprocessUrl($origUrl);
if (!empty($url)) {
$isValidUrl = $this->requestUrl($url, 'HEAD', $options);
if (!$isValidUrl) {
// HEAD was not allowed or threw an error, now trying GET
$options['headers']['Range'] = 'bytes = 0 - 4048';
$response = $requestFactory->request($url, 'GET', $options);
$isValidUrl = $this->requestUrl($url, 'GET', $options);
}
if ($response->getStatusCode() >= 300) {
$isValidUrl = false;
$errorParams['errorType'] = $response->getStatusCode();
$errorParams['message'] = $this->getErrorMessage($errorParams);
}
$this->urlReports[$origUrl] = $isValidUrl;
$this->urlErrorParams[$origUrl] = $this->errorParams;
return $isValidUrl;
}

/**
* Check URL using the specified request methods
*
* @param string $url
* @param string $method
* @param array $options
* @return bool
*/
protected function requestUrl(string $url, string $method, array $options): bool
{
$this->errorParams = [];
$isValidUrl = false;
try {
$response = $this->requestFactory->request($url, $method, $options);
if ($response->getStatusCode() < 300) {
$isValidUrl = true;
} else {
$this->errorParams['errorType'] = $response->getStatusCode();
$this->errorParams['message'] = $this->getErrorMessage($this->errorParams);
}
} catch (TooManyRedirectsException $e) {
$lastRequest = $e->getRequest();
$response = $e->getResponse();
$errorParams['errorType'] = 'loop';
$errorParams['location'] = (string)$lastRequest->getUri();
$errorParams['errorCode'] = $response->getStatusCode();
$isValidUrl = true;
} catch (\GuzzleHttp\Exception\TooManyRedirectsException $e) {
// redirect loop or too many redirects
// todo: change errorType to 'redirect' (breaking change)
$this->errorParams['errorType'] = 'loop';
$this->errorParams['exception'] = $e->getMessage();
$this->errorParams['message'] = $this->getErrorMessage($this->errorParams);
} catch (\GuzzleHttp\Exception\ClientException $e) {
$isValidUrl = false;
$errorParams['errorType'] = $e->getResponse()->getStatusCode();
$errorParams['message'] = $this->getErrorMessage($errorParams);
if ($e->hasResponse()) {
$this->errorParams['errorType'] = $e->getResponse()->getStatusCode();
} else {
$this->errorParams['errorType'] = 'unknown';
}
$this->errorParams['exception'] = $e->getMessage();
$this->errorParams['message'] = $this->getErrorMessage($this->errorParams);
} catch (\GuzzleHttp\Exception\RequestException $e) {
$isValidUrl = false;
$errorParams['errorType'] = 'network';
$errorParams['message'] = $this->getErrorMessage($errorParams);
$this->errorParams['errorType'] = 'network';
$this->errorParams['message'] = $this->getErrorMessage($this->errorParams);
} catch (\Exception $e) {
// Generic catch for anything else that may go wrong
$isValidUrl = false;
$errorParams['errorType'] = 'exception';
$errorParams['message'] = $e->getMessage();
}
if (!$isValidUrl) {
$this->setErrorParams($errorParams);
$this->errorParams['errorType'] = 'exception';
$this->errorParams['exception'] = $e->getMessage();
$this->errorParams['message'] = $this->getErrorMessage($this->errorParams);
}
$this->urlReports[$url] = $isValidUrl;
$this->urlErrorParams[$url] = $errorParams;
return $isValidUrl;
}

Expand All @@ -125,30 +153,34 @@ public function getErrorMessage($errorParams)
$errorType = $errorParams['errorType'];
switch ($errorType) {
case 300:
$response = sprintf($lang->getLL('list.report.externalerror'), $errorType);
$message = sprintf($lang->getLL('list.report.externalerror'), $errorType);
break;
case 403:
$response = $lang->getLL('list.report.pageforbidden403');
$message = $lang->getLL('list.report.pageforbidden403');
break;
case 404:
$response = $lang->getLL('list.report.pagenotfound404');
$message = $lang->getLL('list.report.pagenotfound404');
break;
case 500:
$response = $lang->getLL('list.report.internalerror500');
$message = $lang->getLL('list.report.internalerror500');
break;
case 'loop':
$response = sprintf($lang->getLL('list.report.redirectloop'), $errorParams['errorCode'], $errorParams['location']);
$message = sprintf(
$lang->getLL('list.report.redirectloop'),
$errorParams['exception'],
''
);
break;
case 'exception':
$response = sprintf($lang->getLL('list.report.httpexception'), $errorParams['message']);
$message = sprintf($lang->getLL('list.report.httpexception'), $errorParams['exception']);
break;
case 'network':
$response = $lang->getLL('list.report.networkexception');
$message = $lang->getLL('list.report.networkexception');
break;
default:
$response = sprintf($lang->getLL('list.report.otherhttpcode'), $errorType, $errorParams['message']);
$message = sprintf($lang->getLL('list.report.otherhttpcode'), $errorType, $errorParams['exception']);
}
return $response;
return $message;
}

/**
Expand Down Expand Up @@ -176,6 +208,14 @@ public function fetchType($value, $type, $key)
*/
protected function preprocessUrl(string $url): string
{
return (new IdnaConvert())->encode($url);
try {
return (new IdnaConvert())->encode($url);
} catch (\Exception $e) {
// in case of any error, return empty url.
$this->errorParams['errorType'] = 'exception';
$this->errorParams['exception'] = $e->getMessage();
$this->errorParams['message'] = $this->getErrorMessage($this->errorParams);
return '';
}
}
}

0 comments on commit 8833bf9

Please sign in to comment.