Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New include tag parser #4688

Merged
merged 7 commits into from
Nov 27, 2023
142 changes: 54 additions & 88 deletions app/Entities/Tools/PageContent.php
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
use BookStack\Uploads\ImageService;
use BookStack\Util\HtmlContentFilter;
use BookStack\Util\HtmlDocument;
use Closure;
use DOMElement;
use DOMNode;
use DOMNodeList;
Expand Down Expand Up @@ -275,21 +276,65 @@ protected function toPlainText(): string
*/
public function render(bool $blankIncludes = false): string
{
$content = $this->page->html ?? '';
$html = $this->page->html ?? '';

if (empty($html)) {
return $html;
}

$doc = new HtmlDocument($html);
$contentProvider = $this->getContentProviderClosure($blankIncludes);
$parser = new PageIncludeParser($doc, $contentProvider);

$nodesAdded = 1;
for ($includeDepth = 0; $includeDepth < 3 && $nodesAdded !== 0; $includeDepth++) {
$nodesAdded = $parser->parse();
}

if ($includeDepth > 1) {
$idMap = [];
$changeMap = [];
$this->updateIdsRecursively($doc->getBody(), 0, $idMap, $changeMap);
}

if (!config('app.allow_content_scripts')) {
$content = HtmlContentFilter::removeScripts($content);
HtmlContentFilter::removeScriptsFromDocument($doc);
}

if ($blankIncludes) {
$content = $this->blankPageIncludes($content);
} else {
for ($includeDepth = 0; $includeDepth < 3; $includeDepth++) {
$content = $this->parsePageIncludes($content);
return $doc->getBodyInnerHtml();
}

/**
* Get the closure used to fetch content for page includes.
*/
protected function getContentProviderClosure(bool $blankIncludes): Closure
{
$contextPage = $this->page;

return function (PageIncludeTag $tag) use ($blankIncludes, $contextPage): PageIncludeContent {
if ($blankIncludes) {
return PageIncludeContent::fromHtmlAndTag('', $tag);
}

$matchedPage = Page::visible()->find($tag->getPageId());
$content = PageIncludeContent::fromHtmlAndTag($matchedPage->html ?? '', $tag);

if (Theme::hasListeners(ThemeEvents::PAGE_INCLUDE_PARSE)) {
$themeReplacement = Theme::dispatch(
ThemeEvents::PAGE_INCLUDE_PARSE,
$tag->tagContent,
$content->toHtml(),
clone $contextPage,
$matchedPage ? (clone $matchedPage) : null,
);

if ($themeReplacement !== null) {
$content = PageIncludeContent::fromInlineHtml(strval($themeReplacement));
}
}
}

return $content;
return $content;
};
}

/**
Expand Down Expand Up @@ -337,83 +382,4 @@ protected function headerNodesToLevelList(DOMNodeList $nodeList): array

return $tree->toArray();
}

/**
* Remove any page include tags within the given HTML.
*/
protected function blankPageIncludes(string $html): string
{
return preg_replace("/{{@\s?([0-9].*?)}}/", '', $html);
}

/**
* Parse any include tags "{{@<page_id>#section}}" to be part of the page.
*/
protected function parsePageIncludes(string $html): string
{
$matches = [];
preg_match_all("/{{@\s?([0-9].*?)}}/", $html, $matches);

foreach ($matches[1] as $index => $includeId) {
$fullMatch = $matches[0][$index];
$splitInclude = explode('#', $includeId, 2);

// Get page id from reference
$pageId = intval($splitInclude[0]);
if (is_nan($pageId)) {
continue;
}

// Find page to use, and default replacement to empty string for non-matches.
/** @var ?Page $matchedPage */
$matchedPage = Page::visible()->find($pageId);
$replacement = '';

if ($matchedPage && count($splitInclude) === 1) {
// If we only have page id, just insert all page html and continue.
$replacement = $matchedPage->html;
} elseif ($matchedPage && count($splitInclude) > 1) {
// Otherwise, if our include tag defines a section, load that specific content
$innerContent = $this->fetchSectionOfPage($matchedPage, $splitInclude[1]);
$replacement = trim($innerContent);
}

$themeReplacement = Theme::dispatch(
ThemeEvents::PAGE_INCLUDE_PARSE,
$includeId,
$replacement,
clone $this->page,
$matchedPage ? (clone $matchedPage) : null,
);

// Perform the content replacement
$html = str_replace($fullMatch, $themeReplacement ?? $replacement, $html);
}

return $html;
}

/**
* Fetch the content from a specific section of the given page.
*/
protected function fetchSectionOfPage(Page $page, string $sectionId): string
{
$topLevelTags = ['table', 'ul', 'ol', 'pre'];
$doc = new HtmlDocument($page->html);

// Search included content for the id given and blank out if not exists.
$matchingElem = $doc->getElementById($sectionId);
if ($matchingElem === null) {
return '';
}

// Otherwise replace the content with the found content
// Checks if the top-level wrapper should be included by matching on tag types
$isTopLevel = in_array(strtolower($matchingElem->nodeName), $topLevelTags);
if ($isTopLevel) {
return $doc->getNodeOuterHtml($matchingElem);
}

return $doc->getNodeInnerHtml($matchingElem);
}
}
85 changes: 85 additions & 0 deletions app/Entities/Tools/PageIncludeContent.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
<?php

namespace BookStack\Entities\Tools;

use BookStack\Util\HtmlDocument;
use DOMNode;

class PageIncludeContent
{
protected static array $topLevelTags = ['table', 'ul', 'ol', 'pre'];

/**
* @param DOMNode[] $contents
* @param bool $isInline
*/
public function __construct(
protected array $contents,
protected bool $isInline,
) {
}

public static function fromHtmlAndTag(string $html, PageIncludeTag $tag): self
{
if (empty($html)) {
return new self([], true);
}

$doc = new HtmlDocument($html);

$sectionId = $tag->getSectionId();
if (!$sectionId) {
$contents = [...$doc->getBodyChildren()];
return new self($contents, false);
}

$section = $doc->getElementById($sectionId);
if (!$section) {
return new self([], true);
}

$isTopLevel = in_array(strtolower($section->nodeName), static::$topLevelTags);
$contents = $isTopLevel ? [$section] : [...$section->childNodes];
return new self($contents, !$isTopLevel);
}

public static function fromInlineHtml(string $html): self
{
if (empty($html)) {
return new self([], true);
}

$doc = new HtmlDocument($html);

return new self([...$doc->getBodyChildren()], true);
}

public function isInline(): bool
{
return $this->isInline;
}

public function isEmpty(): bool
{
return empty($this->contents);
}

/**
* @return DOMNode[]
*/
public function toDomNodes(): array
{
return $this->contents;
}

public function toHtml(): string
{
$html = '';

foreach ($this->contents as $content) {
$html .= $content->ownerDocument->saveHTML($content);
}

return $html;
}
}
Loading