From 3304e848467b1d35e0214a5c5e22aebeeafa27d2 Mon Sep 17 00:00:00 2001 From: Sybille Peters Date: Tue, 24 Nov 2020 19:05:01 +0100 Subject: [PATCH] [BUGFIX] Handle large number of pages in linkvalidator This patch handles problems which occured with a large number of pages and also for a large number of records to be checked for broken links. The problems previously resulted in "Prepared statement contains too many placeholders" exceptions and consumption of too much RAM and (possibly) out of memory errors. Technical details about the implementation follows because this is handled slightly differently than in other areas of the core. LinkValidator used named placeholders to build database queries, and the EditableRestriction class also used a number of placeholder. Technically 'doctrine/dbal' explodes named placeholders for array values with quote type 'PARAM_INT_ARRAY' or 'PARAM_STR_ARRAY' to single unnamed placeholders for each value, using one placeholder for each array value. If multiple array value placeholder are used with a large number of values this could quickly emit the "Prepared statement contains too many placeholders" error as the limit is reached. The limit differs beetween the various database systems. One way to work around this issue is to do database operations in chunks, but it could become quite difficult to calculate the chunk size beforehand. Array values are mainly used for 'in()' or 'notIn()' query expressions, which naturally do not not have direct limits for the value count in it through all supported dbms systems. This opens the possibility to use direct value lists for these expressions. There are other dbms limitations depending on the used dbms (for example on mariadb/mysql 'max_allowed_packets', max query size setting for SQLite), which may be adjusted in some cases. However we could not safely expect that these values are monitored and adjusted, so we still need to do database operations in chunks, but make it a little easier for 'chunksize' calculations. During testing on an instance with a large number of pages, it was found that LinkValidator used up a lot of memory, reaching the configured memory limit of 2G on that system, which itself was very high already (and far beyond the recommended 256MB limit). Instead of collecting unlimited results in an array before checking links against this array, doing this for each record directly avoids excessive memory consumption during link checking. It was decided to do this in this patch directly instead of a separate patch, as this patch otherwise would not be testable in real instances. This patch does the following: * introduce two method to implode value arrays in QueryHelper * replace placeholder in EditableRestriction with quoted values * replace placeholder in BrokenLinksRepository with quoted values * introduce chunk database operations in BrokenLinkRepository * introduce chunk database operations in LinkAnalyzer * move $results array and call checkLinks() on it to the inner loop to process this for each record (instead of accumulating the results in a huge array. In the future we need to investigate and find a more generic way to handle such scenarios for larger instances. This may take some time and investigation to do this properly and in a reusable way. To not stall the bugfix further, we are doing the workarounds in this patch to solve these issues in the meantime. Resolves: #92493 Releases: master, 11.5 Change-Id: I9a596df6749d07afee9c5ac37266b876ddc6e297 Reviewed-on: https://review.typo3.org/c/Packages/TYPO3.CMS/+/72357 Tested-by: core-ci Tested-by: Christian Kuhn Reviewed-by: Christian Kuhn --- .../Classes/Database/Query/QueryHelper.php | 62 ++++ .../linkvalidator/Classes/LinkAnalyzer.php | 55 ++-- .../QueryRestrictions/EditableRestriction.php | 22 +- .../Repository/BrokenLinkRepository.php | 277 ++++++++++-------- .../Tests/Functional/LinkAnalyzerTest.php | 40 +++ 5 files changed, 304 insertions(+), 152 deletions(-) diff --git a/typo3/sysext/core/Classes/Database/Query/QueryHelper.php b/typo3/sysext/core/Classes/Database/Query/QueryHelper.php index 2a5fe0d715ab..d6df3c1c3611 100644 --- a/typo3/sysext/core/Classes/Database/Query/QueryHelper.php +++ b/typo3/sysext/core/Classes/Database/Query/QueryHelper.php @@ -236,4 +236,66 @@ static function (array $matches) use ($connection) { return $sql; } + + /** + * Implode array to comma separated list with database int-quoted values to be used as direct + * value list for database 'in(...)' or 'notIn(...') expressions. Empty array will return 'NULL' + * as string to avoid database query failure, as 'in()' is invalid, but 'in(null)' will be executed. + * + * This function should be used with care, as it should be preferred to use placeholders, although + * there are use cases in some (system) areas which reaches placeholder limit really fast. + * + * Return value should only be used as value list for database query 'IN()' or 'NOTIN()' . + * + * @param array $values + * @param Connection $connection + * @return string + */ + public static function implodeToIntQuotedValueList(array $values, Connection $connection): string + { + if (empty($values)) { + return 'NULL'; + } + + // Ensure values are all integer + $values = GeneralUtility::intExplode(',', implode(',', $values)); + + // Ensure all values are quoted as int for used dbms + array_walk($values, static function (&$value) use ($connection) { + $value = $connection->quote($value, Connection::PARAM_INT); + }); + + return implode(',', $values); + } + + /** + * Implode array to comma separated list with database string-quoted values to be used as direct + * value list for database 'in(...)' or 'notIn(...') expressions. Empty array will return 'NULL' + * as string to avoid database query failure, as 'in()' is invalid, but 'in(null)' will be executed. + * + * This function should be used with care, as it should be preferred to use placeholders, although + * there are use cases in some (system) areas which reaches placeholder limit really fast. + * + * Return value should only be used as value list for database query 'IN()' or 'NOTIN()' . + * + * @param array $values + * @param Connection $connection + * @return string + */ + public static function implodeToStringQuotedValueList(array $values, Connection $connection): string + { + if (empty($values)) { + return 'NULL'; + } + + // Ensure values are all strings + $values = GeneralUtility::trimExplode(',', implode(',', $values)); + + // Ensure all values are quoted as string values for used dbmns + array_walk($values, static function (&$value) use ($connection) { + $value = $connection->quote($value, Connection::PARAM_STR); + }); + + return implode(',', $values); + } } diff --git a/typo3/sysext/linkvalidator/Classes/LinkAnalyzer.php b/typo3/sysext/linkvalidator/Classes/LinkAnalyzer.php index 9963044edbeb..230f60c444e8 100644 --- a/typo3/sysext/linkvalidator/Classes/LinkAnalyzer.php +++ b/typo3/sysext/linkvalidator/Classes/LinkAnalyzer.php @@ -19,6 +19,8 @@ use TYPO3\CMS\Backend\Utility\BackendUtility; use TYPO3\CMS\Core\Database\Connection; use TYPO3\CMS\Core\Database\ConnectionPool; +use TYPO3\CMS\Core\Database\Platform\PlatformInformation; +use TYPO3\CMS\Core\Database\Query\QueryHelper; use TYPO3\CMS\Core\Database\Query\Restriction\DeletedRestriction; use TYPO3\CMS\Core\Database\Query\Restriction\HiddenRestriction; use TYPO3\CMS\Core\DataHandling\SoftReference\SoftReferenceParserFactory; @@ -123,7 +125,6 @@ public function init(array $searchFields, array $pidList, $tsConfig) */ public function getLinkStatistics(array $linkTypes = [], $considerHidden = false) { - $results = []; if (empty($linkTypes) || empty($this->pids)) { return; } @@ -140,14 +141,6 @@ public function getLinkStatistics(array $linkTypes = [], $considerHidden = false if (!is_array($GLOBALS['TCA'][$table] ?? null)) { continue; } - $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class) - ->getQueryBuilderForTable($table); - - if ($considerHidden) { - $queryBuilder->getRestrictions() - ->removeAll() - ->add(GeneralUtility::makeInstance(DeletedRestriction::class)); - } // Re-init selectFields for table $selectFields = array_merge(['uid', 'pid', $GLOBALS['TCA'][$table]['ctrl']['label']], $fields); @@ -160,22 +153,42 @@ public function getLinkStatistics(array $linkTypes = [], $considerHidden = false } } - $result = $queryBuilder->select(...$selectFields) - ->from($table) - ->where( - $queryBuilder->expr()->in( + $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class) + ->getQueryBuilderForTable($table); + + if ($considerHidden) { + $queryBuilder->getRestrictions() + ->removeAll() + ->add(GeneralUtility::makeInstance(DeletedRestriction::class)); + } + $queryBuilder->select(...$selectFields)->from($table); + + // We need to do the work in chunks, as it may be quite huge and would hit the one + // or other limit depending on the used dbms - and we also avoid placeholder usage + // as they are hard to calculate beforehand because of some magic handling of dbal. + $maxChunk = PlatformInformation::getMaxBindParameters( + GeneralUtility::makeInstance(ConnectionPool::class) + ->getConnectionForTable($table) + ->getDatabasePlatform() + ); + foreach (array_chunk($this->pids, (int)floor($maxChunk / 2)) as $pageIdsChunk) { + $statement = clone $queryBuilder; + $statement->where( + $statement->expr()->in( ($table === 'pages' ? 'uid' : 'pid'), - $queryBuilder->createNamedParameter($this->pids, Connection::PARAM_INT_ARRAY) + QueryHelper::implodeToIntQuotedValueList($pageIdsChunk, $statement->getConnection()) ) - ) - ->execute(); - - // @todo #64091: only select rows that have content in at least one of the relevant fields (via OR) - while ($row = $result->fetchAssociative()) { - $this->analyzeRecord($results, $table, $fields, $row); + ); + $result = $statement->execute(); + + // @todo #64091: only select rows that have content in at least one of the relevant fields (via OR) + while ($row = $result->fetchAssociative()) { + $results = []; + $this->analyzeRecord($results, $table, $fields, $row); + $this->checkLinks($results, $linkTypes); + } } } - $this->checkLinks($results, $linkTypes); } /** diff --git a/typo3/sysext/linkvalidator/Classes/QueryRestrictions/EditableRestriction.php b/typo3/sysext/linkvalidator/Classes/QueryRestrictions/EditableRestriction.php index 45233f5478fc..c225ab0085e6 100644 --- a/typo3/sysext/linkvalidator/Classes/QueryRestrictions/EditableRestriction.php +++ b/typo3/sysext/linkvalidator/Classes/QueryRestrictions/EditableRestriction.php @@ -17,7 +17,6 @@ namespace TYPO3\CMS\Linkvalidator\QueryRestrictions; -use TYPO3\CMS\Core\Database\Connection; use TYPO3\CMS\Core\Database\Query\Expression\CompositeExpression; use TYPO3\CMS\Core\Database\Query\Expression\ExpressionBuilder; use TYPO3\CMS\Core\Database\Query\QueryBuilder; @@ -175,7 +174,7 @@ public function buildExpression(array $queriedTables, ExpressionBuilder $express $expressionBuilder->andX( $expressionBuilder->eq( 'tx_linkvalidator_link.table_name', - $this->queryBuilder->createNamedParameter('pages') + $this->queryBuilder->quote('pages') ), QueryHelper::stripLogicalOperatorPrefix($GLOBALS['BE_USER']->getPagePermsClause(Permission::PAGE_EDIT)) ), @@ -183,7 +182,7 @@ public function buildExpression(array $queriedTables, ExpressionBuilder $express $expressionBuilder->andX( $expressionBuilder->neq( 'tx_linkvalidator_link.table_name', - $this->queryBuilder->createNamedParameter('pages') + $this->queryBuilder->quote('pages') ), QueryHelper::stripLogicalOperatorPrefix($GLOBALS['BE_USER']->getPagePermsClause(Permission::CONTENT_EDIT)) ) @@ -197,11 +196,11 @@ public function buildExpression(array $queriedTables, ExpressionBuilder $express $additionalWhere[] = $expressionBuilder->andX( $expressionBuilder->eq( 'tx_linkvalidator_link.table_name', - $this->queryBuilder->createNamedParameter($table) + $this->queryBuilder->quote($table) ), $expressionBuilder->eq( 'tx_linkvalidator_link.field', - $this->queryBuilder->createNamedParameter($field) + $this->queryBuilder->quote($field) ) ); } @@ -219,19 +218,16 @@ public function buildExpression(array $queriedTables, ExpressionBuilder $express $additionalWhere[] = $expressionBuilder->andX( $expressionBuilder->eq( 'tx_linkvalidator_link.table_name', - $this->queryBuilder->createNamedParameter($table) + $this->queryBuilder->quote($table) ), $expressionBuilder->in( 'tx_linkvalidator_link.element_type', - $this->queryBuilder->createNamedParameter( - array_unique(current($field)), - Connection::PARAM_STR_ARRAY - ) + QueryHelper::implodeToStringQuotedValueList(array_unique(current($field) ?: []), $this->queryBuilder->getConnection()) ) ); $additionalWhere[] = $expressionBuilder->neq( 'tx_linkvalidator_link.table_name', - $this->queryBuilder->createNamedParameter($table) + $this->queryBuilder->quote($table) ); if ($additionalWhere) { $constraints[] = $expressionBuilder->orX(...$additionalWhere); @@ -244,11 +240,11 @@ public function buildExpression(array $queriedTables, ExpressionBuilder $express $additionalWhere[] = $expressionBuilder->orX( $expressionBuilder->eq( 'tx_linkvalidator_link.language', - $this->queryBuilder->createNamedParameter($langId, \PDO::PARAM_INT) + $this->queryBuilder->quote($langId, \PDO::PARAM_INT) ), $expressionBuilder->eq( 'tx_linkvalidator_link.language', - $this->queryBuilder->createNamedParameter(-1, \PDO::PARAM_INT) + $this->queryBuilder->quote(-1, \PDO::PARAM_INT) ) ); } diff --git a/typo3/sysext/linkvalidator/Classes/Repository/BrokenLinkRepository.php b/typo3/sysext/linkvalidator/Classes/Repository/BrokenLinkRepository.php index e32c6c59f490..f0b6d581cdce 100644 --- a/typo3/sysext/linkvalidator/Classes/Repository/BrokenLinkRepository.php +++ b/typo3/sysext/linkvalidator/Classes/Repository/BrokenLinkRepository.php @@ -18,8 +18,9 @@ namespace TYPO3\CMS\Linkvalidator\Repository; use Doctrine\DBAL\Exception\TableNotFoundException; -use TYPO3\CMS\Core\Database\Connection; use TYPO3\CMS\Core\Database\ConnectionPool; +use TYPO3\CMS\Core\Database\Platform\PlatformInformation; +use TYPO3\CMS\Core\Database\Query\QueryHelper; use TYPO3\CMS\Core\Utility\GeneralUtility; use TYPO3\CMS\Linkvalidator\QueryRestrictions\EditableRestriction; @@ -66,49 +67,63 @@ public function isLinkTargetBrokenLink(string $linkTarget): bool */ public function getNumberOfBrokenLinksForRecordsOnPages(array $pageIds, array $searchFields): array { - $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class) - ->getQueryBuilderForTable(static::TABLE); - $queryBuilder->getRestrictions()->removeAll(); - if (!$GLOBALS['BE_USER']->isAdmin()) { - $queryBuilder->getRestrictions() - ->add(GeneralUtility::makeInstance(EditableRestriction::class, $searchFields, $queryBuilder)); - } - $statement = $queryBuilder->select('link_type') - ->addSelectLiteral($queryBuilder->expr()->count(static::TABLE . '.uid', 'amount')) - ->from(static::TABLE) - ->join( - static::TABLE, - 'pages', - 'pages', - $queryBuilder->expr()->eq('record_pid', $queryBuilder->quoteIdentifier('pages.uid')) - ) - ->where( - $queryBuilder->expr()->orX( - $queryBuilder->expr()->andX( - $queryBuilder->expr()->in( - 'record_uid', - $queryBuilder->createNamedParameter($pageIds, Connection::PARAM_INT_ARRAY) - ), - $queryBuilder->expr()->eq('table_name', $queryBuilder->createNamedParameter('pages')) - ), - $queryBuilder->expr()->andX( - $queryBuilder->expr()->in( - 'record_pid', - $queryBuilder->createNamedParameter($pageIds, Connection::PARAM_INT_ARRAY) + $result = [ + 'total' => 0, + ]; + + // We need to do the work in chunks, as it may be quite huge and would hit the one + // or other limit depending on the used dbms - and we also avoid placeholder usage + // as they are hard to calculate beforehand because of some magic handling of dbal. + $maxChunk = PlatformInformation::getMaxBindParameters( + GeneralUtility::makeInstance(ConnectionPool::class) + ->getConnectionForTable(static::TABLE) + ->getDatabasePlatform() + ); + foreach (array_chunk($pageIds, (int)floor($maxChunk / 3)) as $pageIdsChunk) { + $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class) + ->getQueryBuilderForTable(static::TABLE); + $queryBuilder->getRestrictions()->removeAll(); + if (!$GLOBALS['BE_USER']->isAdmin()) { + $queryBuilder->getRestrictions() + ->add(GeneralUtility::makeInstance(EditableRestriction::class, $searchFields, $queryBuilder)); + } + $statement = $queryBuilder->select('link_type') + ->addSelectLiteral($queryBuilder->expr()->count(static::TABLE . '.uid', 'amount')) + ->from(static::TABLE) + ->join( + static::TABLE, + 'pages', + 'pages', + $queryBuilder->expr()->eq('record_pid', $queryBuilder->quoteIdentifier('pages.uid')) + ) + ->where( + $queryBuilder->expr()->orX( + $queryBuilder->expr()->andX( + $queryBuilder->expr()->in( + 'record_uid', + QueryHelper::implodeToIntQuotedValueList($pageIdsChunk, $queryBuilder->getConnection()) + ), + $queryBuilder->expr()->eq('table_name', $queryBuilder->quote('pages')) ), - $queryBuilder->expr()->neq('table_name', $queryBuilder->createNamedParameter('pages')) + $queryBuilder->expr()->andX( + $queryBuilder->expr()->in( + 'record_pid', + QueryHelper::implodeToIntQuotedValueList($pageIdsChunk, $queryBuilder->getConnection()) + ), + $queryBuilder->expr()->neq('table_name', $queryBuilder->quote('pages')) + ) ) ) - ) - ->groupBy('link_type') - ->execute(); + ->groupBy('link_type') + ->execute(); - $result = [ - 'total' => 0, - ]; - while ($row = $statement->fetchAssociative()) { - $result[$row['link_type']] = $row['amount']; - $result['total']+= $row['amount']; + while ($row = $statement->fetchAssociative()) { + if (!isset($result[$row['link_type']])) { + $result[$row['link_type']] = 0; + } + $result[$row['link_type']] += $row['amount']; + $result['total'] += $row['amount']; + } } return $result; } @@ -158,36 +173,46 @@ public function removeBrokenLinksForRecord(string $tableName, int $recordUid): v */ public function removeAllBrokenLinksOfRecordsOnPageIds(array $pageIds, array $linkTypes): void { - $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class) - ->getQueryBuilderForTable(static::TABLE); + // We need to do the work in chunks, as it may be quite huge and would hit the one + // or other limit depending on the used dbms - and we also avoid placeholder usage + // as they are hard to calculate beforehand because of some magic handling of dbal. + $maxChunk = PlatformInformation::getMaxBindParameters( + GeneralUtility::makeInstance(ConnectionPool::class) + ->getConnectionForTable(static::TABLE) + ->getDatabasePlatform() + ); + foreach (array_chunk($pageIds, (int)floor($maxChunk / 3)) as $pageIdsChunk) { + $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class) + ->getQueryBuilderForTable(static::TABLE); - $queryBuilder->delete(static::TABLE) - ->where( - $queryBuilder->expr()->orX( - $queryBuilder->expr()->andX( - $queryBuilder->expr()->in( - 'record_uid', - $queryBuilder->createNamedParameter($pageIds, Connection::PARAM_INT_ARRAY) - ), - $queryBuilder->expr()->eq('table_name', $queryBuilder->createNamedParameter('pages')) - ), - $queryBuilder->expr()->andX( - $queryBuilder->expr()->in( - 'record_pid', - $queryBuilder->createNamedParameter($pageIds, Connection::PARAM_INT_ARRAY) + $queryBuilder->delete(static::TABLE) + ->where( + $queryBuilder->expr()->orX( + $queryBuilder->expr()->andX( + $queryBuilder->expr()->in( + 'record_uid', + QueryHelper::implodeToIntQuotedValueList($pageIdsChunk, $queryBuilder->getConnection()) + ), + $queryBuilder->expr()->eq('table_name', $queryBuilder->quote('pages')) ), - $queryBuilder->expr()->neq( - 'table_name', - $queryBuilder->createNamedParameter('pages') + $queryBuilder->expr()->andX( + $queryBuilder->expr()->in( + 'record_pid', + QueryHelper::implodeToIntQuotedValueList($pageIdsChunk, $queryBuilder->getConnection()) + ), + $queryBuilder->expr()->neq( + 'table_name', + $queryBuilder->quote('pages') + ) ) + ), + $queryBuilder->expr()->in( + 'link_type', + QueryHelper::implodeToStringQuotedValueList($linkTypes, $queryBuilder->getConnection()) ) - ), - $queryBuilder->expr()->in( - 'link_type', - $queryBuilder->createNamedParameter($linkTypes, Connection::PARAM_STR_ARRAY) ) - ) - ->execute(); + ->execute(); + } } /** @@ -208,66 +233,82 @@ public function getAllBrokenLinksForPages( array $searchFields = [], array $languages = [] ): array { - $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class) - ->getQueryBuilderForTable(self::TABLE); - if (!$GLOBALS['BE_USER']->isAdmin()) { - $queryBuilder->getRestrictions() - ->add(GeneralUtility::makeInstance(EditableRestriction::class, $searchFields, $queryBuilder)); - } + $results = []; - $constraints = [ - $queryBuilder->expr()->orX( - $queryBuilder->expr()->andX( - $queryBuilder->expr()->in( - 'record_uid', - $queryBuilder->createNamedParameter($pageIds, Connection::PARAM_INT_ARRAY) + // We need to do the work in chunks, as it may be quite huge and would hit the one + // or other limit depending on the used dbms - and we also avoid placeholder usage + // as they are hard to calculate beforehand because of some magic handling of dbal. + $maxChunk = PlatformInformation::getMaxBindParameters( + GeneralUtility::makeInstance(ConnectionPool::class) + ->getConnectionForTable(static::TABLE) + ->getDatabasePlatform() + ); + foreach (array_chunk($pageIds, (int)floor($maxChunk / 2)) as $pageIdsChunk) { + $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class) + ->getQueryBuilderForTable(self::TABLE); + if (!$GLOBALS['BE_USER']->isAdmin()) { + $queryBuilder->getRestrictions() + ->add(GeneralUtility::makeInstance(EditableRestriction::class, $searchFields, $queryBuilder)); + } + + $constraints = [ + $queryBuilder->expr()->orX( + $queryBuilder->expr()->andX( + $queryBuilder->expr()->in( + 'record_uid', + QueryHelper::implodeToIntQuotedValueList($pageIdsChunk, $queryBuilder->getConnection()) + ), + $queryBuilder->expr()->eq('table_name', $queryBuilder->quote('pages')) ), - $queryBuilder->expr()->eq('table_name', $queryBuilder->createNamedParameter('pages')) + $queryBuilder->expr()->andX( + $queryBuilder->expr()->in( + 'record_pid', + QueryHelper::implodeToIntQuotedValueList($pageIdsChunk, $queryBuilder->getConnection()) + ), + $queryBuilder->expr()->neq('table_name', $queryBuilder->quote('pages')) + ) ), - $queryBuilder->expr()->andX( - $queryBuilder->expr()->in( - 'record_pid', - $queryBuilder->createNamedParameter($pageIds, Connection::PARAM_INT_ARRAY) - ), - $queryBuilder->expr()->neq('table_name', $queryBuilder->createNamedParameter('pages')) - ) - ), - $queryBuilder->expr()->in( - 'link_type', - $queryBuilder->createNamedParameter($linkTypes, Connection::PARAM_STR_ARRAY) - ), - ]; + $queryBuilder->expr()->in( + 'link_type', + QueryHelper::implodeToStringQuotedValueList($linkTypes, $queryBuilder->getConnection()) + ), + ]; - if ($languages !== []) { - $constraints[] = $queryBuilder->expr()->in( - 'language', - $queryBuilder->createNamedParameter($languages, Connection::PARAM_INT_ARRAY) - ); - } + if ($languages !== []) { + $constraints[] = $queryBuilder->expr()->in( + 'language', + QueryHelper::implodeToIntQuotedValueList($languages, $queryBuilder->getConnection()) + ); + } - $records = $queryBuilder - ->select(self::TABLE . '.*') - ->from(self::TABLE) - ->join( - 'tx_linkvalidator_link', - 'pages', - 'pages', - $queryBuilder->expr()->eq('tx_linkvalidator_link.record_pid', $queryBuilder->quoteIdentifier('pages.uid')) - ) - ->where(...$constraints) - ->orderBy('tx_linkvalidator_link.record_uid') - ->addOrderBy('tx_linkvalidator_link.uid') - ->execute() - ->fetchAllAssociative(); - foreach ($records as &$record) { - $response = json_decode($record['url_response'], true); - // Fallback mechanism to still support the old serialized data, could be removed in TYPO3 v12 or later - if ($response === null) { - $response = unserialize($record['url_response'], ['allowed_classes' => false]); + $records = $queryBuilder + ->select(self::TABLE . '.*') + ->from(self::TABLE) + ->join( + 'tx_linkvalidator_link', + 'pages', + 'pages', + $queryBuilder->expr()->eq( + 'tx_linkvalidator_link.record_pid', + $queryBuilder->quoteIdentifier('pages.uid') + ) + ) + ->where(...$constraints) + ->orderBy('tx_linkvalidator_link.record_uid') + ->addOrderBy('tx_linkvalidator_link.uid') + ->execute() + ->fetchAllAssociative(); + foreach ($records as &$record) { + $response = json_decode($record['url_response'], true); + // Fallback mechanism to still support the old serialized data, could be removed in TYPO3 v12 or later + if ($response === null) { + $response = unserialize($record['url_response'], ['allowed_classes' => false]); + } + $record['url_response'] = $response; + $results[] = $record; } - $record['url_response'] = $response; } - return $records; + return $results; } public function addBrokenLink($record, bool $isValid, array $errorParams = null): void diff --git a/typo3/sysext/linkvalidator/Tests/Functional/LinkAnalyzerTest.php b/typo3/sysext/linkvalidator/Tests/Functional/LinkAnalyzerTest.php index b54375be48bf..0fc08a388626 100644 --- a/typo3/sysext/linkvalidator/Tests/Functional/LinkAnalyzerTest.php +++ b/typo3/sysext/linkvalidator/Tests/Functional/LinkAnalyzerTest.php @@ -261,4 +261,44 @@ public function getLinkStatisticsFindOnlyExternalBrokenLinksInBodytext(string $i $this->assertCSVDataSet($expectedOutputFile); } + + public function getLinkStatisticsFindOnlyExternalBrokenLinksInBodytextWithHugeListOfPageIdsDataProvider(): array + { + $lagePageUidList = range(1, 200000, 1); + return [ + // Tests with one broken link + 'Test with one broken external link' => + [ + __DIR__ . '/Fixtures/input_content_with_broken_link_external.xml', + $lagePageUidList, + 'EXT:linkvalidator/Tests/Functional/Fixtures/expected_output_content_with_broken_link_external.csv', + ], + ]; + } + + /** + * @test + * @dataProvider getLinkStatisticsFindOnlyExternalBrokenLinksInBodytextWithHugeListOfPageIdsDataProvider + */ + public function getLinkStatisticsFindOnlyExternalBrokenLinksInBodytextWithHugeListOfPageIds(string $inputFile, array $pidList, string $expectedOutputFile): void + { + $tsConfig = [ + 'searchFields' => [ + 'tt_content' => ['bodytext'], + ], + 'linktypes' => 'external', + 'checkhidden' => '0', + ]; + $linkTypes = explode(',', $tsConfig['linktypes']); + + $searchFields = $tsConfig['searchFields']; + + $this->importDataSet($inputFile); + + $linkAnalyzer = $this->getContainer()->get(LinkAnalyzer::class); + $linkAnalyzer->init($searchFields, $pidList, $tsConfig); + $linkAnalyzer->getLinkStatistics($linkTypes); + + $this->assertCSVDataSet($expectedOutputFile); + } }