Skip to content

Commit

Permalink
Merge pull request #4688 from BookStackApp/include-parser
Browse files Browse the repository at this point in the history
New include tag parser
  • Loading branch information
ssddanbrown committed Nov 27, 2023
2 parents 22a9cf1 + 652d541 commit 1011d61
Show file tree
Hide file tree
Showing 10 changed files with 669 additions and 134 deletions.
142 changes: 54 additions & 88 deletions app/Entities/Tools/PageContent.php
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
use BookStack\Uploads\ImageService;
use BookStack\Util\HtmlContentFilter;
use BookStack\Util\HtmlDocument;
use Closure;
use DOMElement;
use DOMNode;
use DOMNodeList;
Expand Down Expand Up @@ -275,21 +276,65 @@ protected function toPlainText(): string
*/
public function render(bool $blankIncludes = false): string
{
$content = $this->page->html ?? '';
$html = $this->page->html ?? '';

if (empty($html)) {
return $html;
}

$doc = new HtmlDocument($html);
$contentProvider = $this->getContentProviderClosure($blankIncludes);
$parser = new PageIncludeParser($doc, $contentProvider);

$nodesAdded = 1;
for ($includeDepth = 0; $includeDepth < 3 && $nodesAdded !== 0; $includeDepth++) {
$nodesAdded = $parser->parse();
}

if ($includeDepth > 1) {
$idMap = [];
$changeMap = [];
$this->updateIdsRecursively($doc->getBody(), 0, $idMap, $changeMap);
}

if (!config('app.allow_content_scripts')) {
$content = HtmlContentFilter::removeScripts($content);
HtmlContentFilter::removeScriptsFromDocument($doc);
}

if ($blankIncludes) {
$content = $this->blankPageIncludes($content);
} else {
for ($includeDepth = 0; $includeDepth < 3; $includeDepth++) {
$content = $this->parsePageIncludes($content);
return $doc->getBodyInnerHtml();
}

/**
* Get the closure used to fetch content for page includes.
*/
protected function getContentProviderClosure(bool $blankIncludes): Closure
{
$contextPage = $this->page;

return function (PageIncludeTag $tag) use ($blankIncludes, $contextPage): PageIncludeContent {
if ($blankIncludes) {
return PageIncludeContent::fromHtmlAndTag('', $tag);
}

$matchedPage = Page::visible()->find($tag->getPageId());
$content = PageIncludeContent::fromHtmlAndTag($matchedPage->html ?? '', $tag);

if (Theme::hasListeners(ThemeEvents::PAGE_INCLUDE_PARSE)) {
$themeReplacement = Theme::dispatch(
ThemeEvents::PAGE_INCLUDE_PARSE,
$tag->tagContent,
$content->toHtml(),
clone $contextPage,
$matchedPage ? (clone $matchedPage) : null,
);

if ($themeReplacement !== null) {
$content = PageIncludeContent::fromInlineHtml(strval($themeReplacement));
}
}
}

return $content;
return $content;
};
}

/**
Expand Down Expand Up @@ -337,83 +382,4 @@ protected function headerNodesToLevelList(DOMNodeList $nodeList): array

return $tree->toArray();
}

/**
* Remove any page include tags within the given HTML.
*/
protected function blankPageIncludes(string $html): string
{
return preg_replace("/{{@\s?([0-9].*?)}}/", '', $html);
}

/**
* Parse any include tags "{{@<page_id>#section}}" to be part of the page.
*/
protected function parsePageIncludes(string $html): string
{
$matches = [];
preg_match_all("/{{@\s?([0-9].*?)}}/", $html, $matches);

foreach ($matches[1] as $index => $includeId) {
$fullMatch = $matches[0][$index];
$splitInclude = explode('#', $includeId, 2);

// Get page id from reference
$pageId = intval($splitInclude[0]);
if (is_nan($pageId)) {
continue;
}

// Find page to use, and default replacement to empty string for non-matches.
/** @var ?Page $matchedPage */
$matchedPage = Page::visible()->find($pageId);
$replacement = '';

if ($matchedPage && count($splitInclude) === 1) {
// If we only have page id, just insert all page html and continue.
$replacement = $matchedPage->html;
} elseif ($matchedPage && count($splitInclude) > 1) {
// Otherwise, if our include tag defines a section, load that specific content
$innerContent = $this->fetchSectionOfPage($matchedPage, $splitInclude[1]);
$replacement = trim($innerContent);
}

$themeReplacement = Theme::dispatch(
ThemeEvents::PAGE_INCLUDE_PARSE,
$includeId,
$replacement,
clone $this->page,
$matchedPage ? (clone $matchedPage) : null,
);

// Perform the content replacement
$html = str_replace($fullMatch, $themeReplacement ?? $replacement, $html);
}

return $html;
}

/**
* Fetch the content from a specific section of the given page.
*/
protected function fetchSectionOfPage(Page $page, string $sectionId): string
{
$topLevelTags = ['table', 'ul', 'ol', 'pre'];
$doc = new HtmlDocument($page->html);

// Search included content for the id given and blank out if not exists.
$matchingElem = $doc->getElementById($sectionId);
if ($matchingElem === null) {
return '';
}

// Otherwise replace the content with the found content
// Checks if the top-level wrapper should be included by matching on tag types
$isTopLevel = in_array(strtolower($matchingElem->nodeName), $topLevelTags);
if ($isTopLevel) {
return $doc->getNodeOuterHtml($matchingElem);
}

return $doc->getNodeInnerHtml($matchingElem);
}
}
85 changes: 85 additions & 0 deletions app/Entities/Tools/PageIncludeContent.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
<?php

namespace BookStack\Entities\Tools;

use BookStack\Util\HtmlDocument;
use DOMNode;

class PageIncludeContent
{
protected static array $topLevelTags = ['table', 'ul', 'ol', 'pre'];

/**
* @param DOMNode[] $contents
* @param bool $isInline
*/
public function __construct(
protected array $contents,
protected bool $isInline,
) {
}

public static function fromHtmlAndTag(string $html, PageIncludeTag $tag): self
{
if (empty($html)) {
return new self([], true);
}

$doc = new HtmlDocument($html);

$sectionId = $tag->getSectionId();
if (!$sectionId) {
$contents = [...$doc->getBodyChildren()];
return new self($contents, false);
}

$section = $doc->getElementById($sectionId);
if (!$section) {
return new self([], true);
}

$isTopLevel = in_array(strtolower($section->nodeName), static::$topLevelTags);
$contents = $isTopLevel ? [$section] : [...$section->childNodes];
return new self($contents, !$isTopLevel);
}

public static function fromInlineHtml(string $html): self
{
if (empty($html)) {
return new self([], true);
}

$doc = new HtmlDocument($html);

return new self([...$doc->getBodyChildren()], true);
}

public function isInline(): bool
{
return $this->isInline;
}

public function isEmpty(): bool
{
return empty($this->contents);
}

/**
* @return DOMNode[]
*/
public function toDomNodes(): array
{
return $this->contents;
}

public function toHtml(): string
{
$html = '';

foreach ($this->contents as $content) {
$html .= $content->ownerDocument->saveHTML($content);
}

return $html;
}
}
Loading

0 comments on commit 1011d61

Please sign in to comment.