Skip to content

Commit

Permalink
[TASK] Refactor Typo3PageIndexer
Browse files Browse the repository at this point in the history
* Add integration tests
* Simplified code
* Added documentation for possible points to extend the indexing
  • Loading branch information
timohund committed Dec 20, 2016
1 parent 72d15ad commit 00deeba
Show file tree
Hide file tree
Showing 8 changed files with 278 additions and 78 deletions.
2 changes: 1 addition & 1 deletion .scrutinizer.yml
Expand Up @@ -39,4 +39,4 @@ tools:

external_code_coverage:
runs: 2
timeout: 1200
timeout: 1800
191 changes: 115 additions & 76 deletions Classes/Typo3PageIndexer.php
Expand Up @@ -26,6 +26,7 @@

use ApacheSolrForTypo3\Solr\Access\Rootline;
use ApacheSolrForTypo3\Solr\ConnectionManager;
use ApacheSolrForTypo3\Solr\FieldProcessor\Service;
use ApacheSolrForTypo3\Solr\IndexQueue\FrontendHelper\PageFieldMappingIndexer;
use ApacheSolrForTypo3\Solr\IndexQueue\Item;
use ApacheSolrForTypo3\Solr\IndexQueue\Queue;
Expand Down Expand Up @@ -96,7 +97,7 @@ class Typo3PageIndexer
*
* @var array
*/
protected $documentsSentToSolr = array();
protected $documentsSentToSolr = [];

/**
* @var TypoScriptConfiguration
Expand Down Expand Up @@ -126,10 +127,7 @@ public function __construct(TypoScriptFrontendController $page)

// TODO extract to a class "ExceptionLogger"
if ($this->configuration->getLoggingExceptions()) {
GeneralUtility::devLog('Exception while trying to index a page',
'solr', 3, array(
$e->__toString()
));
GeneralUtility::devLog('Exception while trying to index a page', 'solr', 3, [$e->__toString()]);
}
}

Expand Down Expand Up @@ -174,14 +172,14 @@ protected function initializeSolrConnection()
* @param array $data Additional data to log
* @return void
*/
protected function log($message, $errorNum = 0, array $data = array())
protected function log($message, $errorNum = 0, array $data = [])
{
if (is_object($GLOBALS['TT'])) {
$GLOBALS['TT']->setTSlogMessage('tx_solr: ' . $message, $errorNum);
}

if ($this->configuration->getLoggingIndexing()) {
$logData = array();
$logData = [];
if (!empty($data)) {
foreach ($data as $value) {
$logData[] = (array)$value;
Expand Down Expand Up @@ -240,7 +238,7 @@ public function setSolrConnection(SolrService $solrConnection)
public function indexPage()
{
$pageIndexed = false;
$documents = array(); // this will become useful as soon as when starting to index individual records instead of whole pages
$documents = []; // this will become useful as soon as when starting to index individual records instead of whole pages

if (is_null($this->solrConnection)) {
// intended early return as it doesn't make sense to continue
Expand All @@ -253,20 +251,7 @@ public function indexPage()
$pageDocument = $this->getPageDocument();
$pageDocument = $this->substitutePageDocument($pageDocument);

if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPagePostProcessPageDocument'])) {
foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPagePostProcessPageDocument'] as $classReference) {
$postProcessor = GeneralUtility::getUserObj($classReference);

if ($postProcessor instanceof PageDocumentPostProcessor) {
$postProcessor->postProcessPageDocument($pageDocument, $this->page);
} else {
throw new \UnexpectedValueException(
get_class($pageDocument) . ' must implement interface ApacheSolrForTypo3\Solr\PageDocumentPostProcessor',
1397739154
);
}
}
}
$this->applyIndexPagePostProcessors($pageDocument);

self::$pageSolrDocument = $pageDocument;
$documents[] = $pageDocument;
Expand All @@ -279,6 +264,27 @@ public function indexPage()
return $pageIndexed;
}

/**
* Applies the configured post processors (indexPagePostProcessPageDocument)
*
* @param \Apache_Solr_Document $pageDocument
*/
protected function applyIndexPagePostProcessors($pageDocument)
{
if (!is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPagePostProcessPageDocument'])) {
return;
}

foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPagePostProcessPageDocument'] as $classReference) {
$postProcessor = GeneralUtility::getUserObj($classReference);
if (!$postProcessor instanceof PageDocumentPostProcessor) {
throw new \UnexpectedValueException(get_class($pageDocument) . ' must implement interface ApacheSolrForTypo3\Solr\PageDocumentPostProcessor', 1397739154);
}

$postProcessor->postProcessPageDocument($pageDocument, $this->page);
}
}

/**
* Builds the Solr document for the current page.
*
Expand Down Expand Up @@ -315,21 +321,12 @@ protected function getPageDocument()
$document->setField('created', $pageRecord['crdate']);
$document->setField('changed', $pageRecord['SYS_LASTCHANGED']);

$rootline = $this->page->id;
$mountPointParameter = $this->getMountPointParameter();
if ($mountPointParameter !== '') {
$rootline .= ',' . $mountPointParameter;
}
$rootline = $this->getRootLineFieldValue();
$document->setField('rootline', $rootline);

// access
$access = (string)$this->pageAccessRootline;
if (trim($access) !== '') {
$document->setField('access', $access);
}
if ($this->page->page['endtime']) {
$document->setField('endtime', $pageRecord['endtime']);
}
$this->addAccessField($document);
$this->addEndtimeField($document, $pageRecord);

// content
$document->setField('title', $this->contentExtractor->getPageTitle());
Expand All @@ -338,27 +335,79 @@ protected function getPageDocument()
$document->setField('author', $pageRecord['author']);
$document->setField('description', $pageRecord['description']);
$document->setField('abstract', $pageRecord['abstract']);
$document->setField('content',
$this->contentExtractor->getIndexableContent());
$document->setField('content', $this->contentExtractor->getIndexableContent());
$document->setField('url', $this->pageUrl);

// keywords, multi valued
$keywords = array_unique(GeneralUtility::trimExplode(
',',
$pageRecord['keywords'],
true
));
$this->addKeywordsField($document, $pageRecord);
$this->addTagContentFields($document);

return $document;
}

/**
* Adds the access field to the document if needed.
*
* @param \Apache_Solr_Document $document
*/
protected function addAccessField(\Apache_Solr_Document $document)
{
$access = (string)$this->pageAccessRootline;
if (trim($access) !== '') {
$document->setField('access', $access);
}
}

/**
* @param $document
* @param $pageRecord
*/
protected function addEndtimeField(\Apache_Solr_Document $document, $pageRecord)
{
if ($this->page->page['endtime']) {
$document->setField('endtime', $pageRecord['endtime']);
}
}

/**
* Adds keywords, multi valued.
*
* @param \Apache_Solr_Document $document
* @param array $pageRecord
*/
protected function addKeywordsField(\Apache_Solr_Document $document, $pageRecord)
{
$keywords = array_unique(GeneralUtility::trimExplode(',', $pageRecord['keywords'], true));
foreach ($keywords as $keyword) {
$document->addField('keywords', $keyword);
}
}

// content from several tags like headers, anchors, ...
/**
* Add content from several tags like headers, anchors, ...
*
* @param \Apache_Solr_Document $document
*/
protected function addTagContentFields(\Apache_Solr_Document $document)
{
$tagContent = $this->contentExtractor->getTagContent();
foreach ($tagContent as $fieldName => $fieldValue) {
$document->setField($fieldName, $fieldValue);
}
}

return $document;
/**
* Builds the content for the rootline field.
*
* @return string
*/
protected function getRootLineFieldValue()
{
$rootline = $this->page->id;
$mountPointParameter = $this->getMountPointParameter();
if ($mountPointParameter !== '') {
$rootline .= ',' . $mountPointParameter;
}
return $rootline;
}

/**
Expand Down Expand Up @@ -458,33 +507,28 @@ protected function getIndexConfigurationNameForCurrentPage()
* should be indexed for the current page.
*
* @param \Apache_Solr_Document $pageDocument The main document representing this page.
* @param array $existingDocuments An array of documents already created for this page.
* @param \Apache_Solr_Document[] $existingDocuments An array of documents already created for this page.
* @return array An array of additional \Apache_Solr_Document objects to index
*/
protected function getAdditionalDocuments(
\Apache_Solr_Document $pageDocument,
array $existingDocuments
) {
protected function getAdditionalDocuments(\Apache_Solr_Document $pageDocument, array $existingDocuments)
{
$documents = $existingDocuments;

if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageAddDocuments'])) {
foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageAddDocuments'] as $classReference) {
$additionalIndexer = GeneralUtility::getUserObj($classReference);

if ($additionalIndexer instanceof AdditionalPageIndexer) {
$additionalDocuments = $additionalIndexer->getAdditionalPageDocuments($pageDocument,
$documents);

if (is_array($additionalDocuments)) {
$documents = array_merge($documents,
$additionalDocuments);
}
} else {
throw new \UnexpectedValueException(
get_class($additionalIndexer) . ' must implement interface ApacheSolrForTypo3\Solr\AdditionalPageIndexer',
1310491024
);
}
if (!is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageAddDocuments'])) {
return $documents;
}

foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageAddDocuments'] as $classReference) {
$additionalIndexer = GeneralUtility::getUserObj($classReference);

if (!$additionalIndexer instanceof AdditionalPageIndexer) {
$message = get_class($additionalIndexer) . ' must implement interface ApacheSolrForTypo3\Solr\AdditionalPageIndexer';
throw new \UnexpectedValueException($message, 1310491024);
}

$additionalDocuments = $additionalIndexer->getAdditionalPageDocuments($pageDocument, $documents);
if (is_array($additionalDocuments)) {
$documents = array_merge($documents, $additionalDocuments);
}
}

Expand All @@ -501,7 +545,7 @@ protected function processDocuments(array $documents)
{
$processingInstructions = $this->configuration->getIndexFieldProcessingInstructionsConfiguration();
if (count($processingInstructions) > 0) {
$service = GeneralUtility::makeInstance('ApacheSolrForTypo3\\Solr\\FieldProcessor\\Service');
$service = GeneralUtility::makeInstance(Service::class);
$service->processDocuments($documents, $processingInstructions);
}
}
Expand All @@ -521,8 +565,7 @@ protected function addDocumentsToSolrIndex(array $documents)
}

try {
$this->log('Adding ' . count($documents) . ' documents.', 0,
$documents);
$this->log('Adding ' . count($documents) . ' documents.', 0, $documents);

// chunk adds by 20
$documentChunks = array_chunk($documents, 20);
Expand All @@ -531,8 +574,7 @@ protected function addDocumentsToSolrIndex(array $documents)

if ($response->getHttpStatus() != 200) {
$transportException = new \Apache_Solr_HttpTransportException($response);
throw new \RuntimeException('Solr Request failed.',
1331834983, $transportException);
throw new \RuntimeException('Solr Request failed.', 1331834983, $transportException);
}
}

Expand All @@ -541,10 +583,7 @@ protected function addDocumentsToSolrIndex(array $documents)
$this->log($e->getMessage() . ' Error code: ' . $e->getCode(), 2);

if ($this->configuration->getLoggingExceptions()) {
GeneralUtility::devLog('Exception while adding documents',
'solr', 3, array(
$e->__toString()
));
GeneralUtility::devLog('Exception while adding documents', 'solr', 3, [$e->__toString()]);
}
}

Expand Down
29 changes: 29 additions & 0 deletions Documentation/Development/Index.rst
@@ -0,0 +1,29 @@
.. ==================================================
.. FOR YOUR INFORMATION
.. --------------------------------------------------
.. -*- coding: utf-8 -*- with BOM.
.. include:: ../Includes.txt


.. _conf-backend:


.. raw:: latex

\newpage

.. raw:: pdf
PageBreak
Development
===========

There are many ways to extend and hook into EXT:solr to customize EXT:solr for your needs.

.. toctree::
:maxdepth: 5
:glob:

Indexing
42 changes: 42 additions & 0 deletions Documentation/Development/Indexing.rst
@@ -0,0 +1,42 @@
.. This file will be replaced from solrfluid later
========
Indexing
========

In this section i describe the possibilities to extend page indexing in EXT:solr with custom code.

Page Indexing
=============

There are several points to extend the Typo3PageIndexer class and register own classes that are used during the indexing.

indexPageAddDocuments
---------------------

Registered classes can be used to add additional documents to solr when a page get's indexed.

Registration with: $GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageAddDocuments']
Required Interface: AdditionalPageIndexer


indexPageSubstitutePageDocument
-------------------------------

Registered classes can be used to replace/substitute a Solr document of a page.


Registration with: $GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageSubstitutePageDocument']
Required Interface: SubstitutePageIndexer

indexPagePostProcessPageDocument
--------------------------------

Registered classes can be used to post process a Solr document of a page.

Registration with: $GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPagePostProcessPageDocument']
Required Interface: PageDocumentPostProcessor




0 comments on commit 00deeba

Please sign in to comment.