Skip to content

Commit

Permalink
New feature: support for {include} syntax. Fixes getpelican#1902.
Browse files Browse the repository at this point in the history
The new {include} syntax makes it possible to include
frequently used text snippets into your content.
  • Loading branch information
atodorov authored and Lucas-C committed Oct 2, 2019
1 parent 047d884 commit 2b6f00d
Show file tree
Hide file tree
Showing 26 changed files with 377 additions and 32 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Expand Up @@ -2,6 +2,7 @@
.*.swp
.*.swo
*.pyc
.cache/
.DS_Store
docs/_build
docs/fr/_build
Expand All @@ -16,3 +17,4 @@ six-*.egg/
venv
samples/output
*.pem
pip-wheel-metadata/
3 changes: 3 additions & 0 deletions RELEASE.md
@@ -0,0 +1,3 @@
Release type: minor

Add support for the ``{include}`` syntax
39 changes: 39 additions & 0 deletions docs/content.rst
Expand Up @@ -369,6 +369,45 @@ Linking to authors, categories, index and tags
You can link to authors, categories, index and tags using the ``{author}name``,
``{category}foobar``, ``{index}`` and ``{tag}tagname`` syntax.

Including common text into your content
---------------------------------------

From Pelican 4.2 onward, you can include common text snippets into your content using
the ``{include}file.ext`` syntax. You can specify semi-absolute paths starting
from the ``PATH`` directory, e.g. ``{include}/pages/disclaimer.html`` or use
relative paths, e.g. ``{include}notice.html``. Relativity is
calculated based on the location of the file containing the ``{include}``.
For example when you have the following content layout::

content
└── notice2.html
└── pages
├── page1.html
└── notice1.html

Then the includes may look like::

<html>
<head>
<title>PAGE 1</title>
</head>
<body>
This is the content of page 1

{include}../notice2.html
</body>
</html>


``notice2.html`` looks like::

{include}pages/notice1.html
This is the second warning about relative paths

When using ``{include}`` it is best to blacklist the included files using the
``IGNORE_FILES`` setting. Otherwise Pelican will try to render them as regular
content and will most likely fail!

Deprecated internal link syntax
-------------------------------

Expand Down
131 changes: 107 additions & 24 deletions pelican/contents.py
Expand Up @@ -11,7 +11,7 @@
import pytz

import six
from six.moves.urllib.parse import urljoin, urlparse, urlunparse
from six.moves.urllib.parse import unquote, urljoin, urlparse, urlunparse

from pelican import signals
from pelican.settings import DEFAULT_CONFIG
Expand Down Expand Up @@ -359,14 +359,7 @@ def get_static_links(self):
path = value.path
if what not in {'static', 'attach'}:
continue
if path.startswith('/'):
path = path[1:]
else:
# relative to the source path of this content
path = self.get_relative_source_path(
os.path.join(self.relative_dir, path)
)
path = path.replace('%20', ' ')
path = relativize_path(self.settings['PATH'], self.relative_dir, path)
static_links.add(path)
return static_links

Expand Down Expand Up @@ -449,24 +442,11 @@ def get_relative_source_path(self, source_path=None):
"""
if not source_path:
source_path = self.source_path
if source_path is None:
return None

return posixize_path(
os.path.relpath(
os.path.abspath(os.path.join(
self.settings['PATH'],
source_path)),
os.path.abspath(self.settings['PATH'])
))
return get_relative_source_path(self.settings['PATH'], source_path)

@property
def relative_dir(self):
return posixize_path(
os.path.dirname(
os.path.relpath(
os.path.abspath(self.source_path),
os.path.abspath(self.settings['PATH']))))
return relative_dir(self.settings['PATH'], self.source_path)

def refresh_metadata_intersite_links(self):
for key in self.settings['FORMATTED_FIELDS']:
Expand Down Expand Up @@ -613,3 +593,106 @@ def _log_reason(reason):

self.override_save_as = new_save_as
self.override_url = new_url


def get_relative_source_path(content_path, source_path):
if source_path is None:
return None

return posixize_path(
os.path.relpath(
os.path.abspath(os.path.join(
content_path,
source_path)),
os.path.abspath(content_path)
))


def relativize_path(content_path, relative_dir, path):
"""
Update path depending on whether this is an absolute
or relative value.
"""
if path.startswith('/'):
path = path[1:]
else:
path = get_relative_source_path(content_path, os.path.join(relative_dir, path))

path = path.replace('%20', ' ')

return path


def relative_dir(content_path, path):
return posixize_path(
os.path.dirname(
os.path.relpath(
os.path.abspath(path),
os.path.abspath(content_path))))


def insert_included_content(content, source_path, content_path, exclude_exts=()):
"""
Replace {include}some.file with the
contents of this file.
Perform conversion to HTML
"""
regex = r"""(?P<indent>[ \t]+)?[{|]include[|}](?P<path>[\w./]+)"""
hrefs = re.compile(regex, re.X)
processed_paths = []
# In Python 3.x we can use the `nonlocal` declaration, in `replacer()`,
# to tell Python we mean to assign to the `source_path` variable from
# `insert_included_content()`.
# In Python 2.x we simply can't assign to `source_path` in `replacer()`.
# However, we work around this by not assigning to the variable itself,
# but using a mutable container to keep track about the current working
# directory while doing the recursion.
source_dir = [relative_dir(content_path, source_path)]

def replacer(m):
path, indent = m.group('path'), m.group('indent')
path = relativize_path(content_path, source_dir[0], path)
path = posixize_path(
os.path.abspath(
os.path.join(content_path, path)
)
)

if not os.path.isfile(path):
logger.warning("Unable to find `%s`, skipping include.", path)
return ''.join(('{include}', m.group('path')))

_, ext = os.path.splitext(path)
# remove leading dot
ext = ext[1:]

if ext in exclude_exts:
return ''.join(('{include}', m.group('path')))

with open(path) as content_file:
text = content_file.read()

if indent:
text = ''.join(indent + line for line in text.splitlines(keepends=True))

# recursion stop
if path in processed_paths:
raise RuntimeError("Circular inclusion detected for '%s'" % path)
processed_paths.append(path)

# if we recurse into another file to perform more includes
# _path_replacer needs to know in which directory
# it operates otherwise it produces wrong paths
source_dir[0] = posixize_path(os.path.dirname(path))
current_source_dir = source_dir[0]

# recursively replace other includes
text = hrefs.sub(replacer, text)

# restore source dir
source_dir[0] = current_source_dir
return text

return hrefs.sub(replacer, content)

36 changes: 31 additions & 5 deletions pelican/readers.py
Expand Up @@ -5,6 +5,7 @@
import os
import re
from collections import OrderedDict
from tempfile import NamedTemporaryFile

import docutils
import docutils.core
Expand All @@ -19,7 +20,8 @@
from pelican import rstdirectives # NOQA
from pelican import signals
from pelican.cache import FileStampDataCacher
from pelican.contents import Author, Category, Page, Tag
from pelican.contents import Author, Category, Page, Tag, \
insert_included_content
from pelican.utils import SafeDatetime, escape_html, get_date, pelican_open, \
posixize_path

Expand Down Expand Up @@ -286,9 +288,23 @@ def _get_publisher(self, source_path):

def read(self, source_path):
"""Parses restructured text"""
pub = self._get_publisher(source_path)
parts = pub.writer.parts
content = parts.get('body')
with open(source_path) as content_file:
content = content_file.read()
exclude_exts = set(Readers(self.settings).extensions) - set(self.file_extensions)
content = insert_included_content(content, source_path, self.settings['PATH'], exclude_exts)
# We have pre-processed the file content, but docutils require a file as input,
# so with use a temporary one:
with NamedTemporaryFile('w+') as tmp_file:
tmp_file.write(content)
tmp_file.seek(0)
try:
pub = self._get_publisher(tmp_file.name)
parts = pub.writer.parts
content = parts.get('body')
except docutils.ApplicationError as err:
# We fix any potential error message to reference the original file:
err.args = (err.args[0].replace(tmp_file.name, source_path),)
raise err

metadata = self._parse_metadata(pub.document, source_path)
metadata.setdefault('title', parts.get('title'))
Expand Down Expand Up @@ -349,6 +365,8 @@ def read(self, source_path):
self._source_path = source_path
self._md = Markdown(**self.settings['MARKDOWN'])
with pelican_open(source_path) as text:
exclude_exts = set(Readers(self.settings).extensions) - set(self.file_extensions)
text = insert_included_content(text, source_path, self.settings['PATH'], exclude_exts)
content = self._md.convert(text)

if hasattr(self._md, 'Meta'):
Expand Down Expand Up @@ -500,7 +518,12 @@ def read(self, filename):
metadata = {}
for k in parser.metadata:
metadata[k] = self.process_metadata(k, parser.metadata[k])
return parser.body, metadata

if parser.body:
return parser.body, metadata
else:
# in case we're parsing HTML includes
return content, metadata


class Readers(FileStampDataCacher):
Expand Down Expand Up @@ -596,6 +619,9 @@ def read_file(self, base_path, path, content_class=Page, fmt=None,
metadata.update(_filter_discardable_metadata(reader_metadata))

if content:
# We excluded file extensions already processed by the dedicated readers:
exclude_exts = MarkdownReader.file_extensions + RstReader.file_extensions
content = insert_included_content(content, path, self.settings['PATH'], exclude_exts)
# find images with empty alt
find_empty_alt(content, path)

Expand Down
5 changes: 5 additions & 0 deletions pelican/tests/content/include/html_from_subdir_includer.md
@@ -0,0 +1,5 @@
_includes HTML_:

{include}subdir/include_other.html

^Included content above^
5 changes: 5 additions & 0 deletions pelican/tests/content/include/html_includer.md
@@ -0,0 +1,5 @@
_includes HTML_:

{include}included.html

^Included content above^
6 changes: 6 additions & 0 deletions pelican/tests/content/include/html_includer.rst
@@ -0,0 +1,6 @@
Article including some HTML file
################################

{include}included.html

^Included content above^
5 changes: 5 additions & 0 deletions pelican/tests/content/include/html_includer_with_full_path.md
@@ -0,0 +1,5 @@
_includes HTML_:

{include}/pelican/tests/content/include/included.html

^Included content above^
1 change: 1 addition & 0 deletions pelican/tests/content/include/include_other.html
@@ -0,0 +1 @@
{include}include_sibling.html
1 change: 1 addition & 0 deletions pelican/tests/content/include/include_sibling.html
@@ -0,0 +1 @@
{include}include_other.html
1 change: 1 addition & 0 deletions pelican/tests/content/include/included.html
@@ -0,0 +1 @@
<span>this content has been included</span>
2 changes: 2 additions & 0 deletions pelican/tests/content/include/included.md
@@ -0,0 +1,2 @@
**this is Markdown**
Here is a [link](https://docs.getpelican.com).
3 changes: 3 additions & 0 deletions pelican/tests/content/include/included.py
@@ -0,0 +1,3 @@
from __future__ import braces
import antigravity
import this
2 changes: 2 additions & 0 deletions pelican/tests/content/include/included.rst
@@ -0,0 +1,2 @@
**this is reStructuredText**
Here is a `link <https://docs.getpelican.com>`_.
5 changes: 5 additions & 0 deletions pelican/tests/content/include/includer_of_md_includer.md
@@ -0,0 +1,5 @@
START

{include}md_includer.md

END
5 changes: 5 additions & 0 deletions pelican/tests/content/include/inexisting_file_includer.md
@@ -0,0 +1,5 @@
_includes HTML_:

{include}inexisting_file.html

^Included content above^
2 changes: 2 additions & 0 deletions pelican/tests/content/include/md_includer.html
@@ -0,0 +1,2 @@
<em>includes Markdown</em>: {include}included.md
^Included content above^
2 changes: 2 additions & 0 deletions pelican/tests/content/include/md_includer.md
@@ -0,0 +1,2 @@
_inline includes Markdown_: {include}included.md
^Included content above^
3 changes: 3 additions & 0 deletions pelican/tests/content/include/py_includer.md
@@ -0,0 +1,3 @@
```
{include}included.py
```
6 changes: 6 additions & 0 deletions pelican/tests/content/include/py_includer.rst
@@ -0,0 +1,6 @@
Article with an indented code block
###################################

.. code-block:: python
{include}included.py
5 changes: 5 additions & 0 deletions pelican/tests/content/include/rst_includer.rst
@@ -0,0 +1,5 @@
Article with an inline included reStructuredText file
#####################################################

*inline includes reStructuredText*: {include}included.rst
^Included content above^
2 changes: 2 additions & 0 deletions pelican/tests/content/include/subdir/include_other.html
@@ -0,0 +1,2 @@
this file includes another via absolute path
{include}/pelican/tests/content/include/subdir/include_parent.html
2 changes: 2 additions & 0 deletions pelican/tests/content/include/subdir/include_parent.html
@@ -0,0 +1,2 @@
this file includes another in a parent directory
{include}../included.html
5 changes: 4 additions & 1 deletion pelican/tests/test_cache.py
Expand Up @@ -162,8 +162,11 @@ def test_article_object_caching(self):
- 2012-11-30_md_w_filename_meta#foo-bar.md
- empty.md
- empty_with_bom.md
There are 5 more include* files which are HTML or Markdown snippets
and also not valid.
"""
self.assertEqual(generator.readers.read_file.call_count, 6)
self.assertEqual(generator.readers.read_file.call_count, 11)

@unittest.skipUnless(MagicMock, 'Needs Mock module')
def test_article_reader_content_caching(self):
Expand Down

0 comments on commit 2b6f00d

Please sign in to comment.