Skip to content

Commit

Permalink
New feature: support for {include} syntax. Fixes getpelican#1902.
Browse files Browse the repository at this point in the history
The new {include} syntax makes it possible to include
frequently used text snippets into your content.
  • Loading branch information
atodorov authored and Lucas-C committed Oct 1, 2019
1 parent 047d884 commit c7bfb5c
Show file tree
Hide file tree
Showing 15 changed files with 366 additions and 8 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Expand Up @@ -2,6 +2,7 @@
.*.swp
.*.swo
*.pyc
.cache/
.DS_Store
docs/_build
docs/fr/_build
Expand All @@ -16,3 +17,4 @@ six-*.egg/
venv
samples/output
*.pem
pip-wheel-metadata/
3 changes: 3 additions & 0 deletions RELEASE.md
@@ -0,0 +1,3 @@
Release type: minor

Add support for the ``{include}`` syntax
39 changes: 39 additions & 0 deletions docs/content.rst
Expand Up @@ -369,6 +369,45 @@ Linking to authors, categories, index and tags
You can link to authors, categories, index and tags using the ``{author}name``,
``{category}foobar``, ``{index}`` and ``{tag}tagname`` syntax.

Including common text into your content
---------------------------------------

From Pelican 4.2 onward, you can include common text snippets into your content using
the ``{include}file.ext`` syntax. You can specify semi-absolute paths starting
from the ``PATH`` directory, e.g. ``{include}/pages/disclaimer.html`` or use
relative paths, e.g. ``{include}notice.html``. Relativity is
calculated based on the location of the file containing the ``{include}``.
For example when you have the following content layout::

content
└── notice2.html
└── pages
├── page1.html
└── notice1.html

Then the includes may look like::

<html>
<head>
<title>PAGE 1</title>
</head>
<body>
This is the content of page 1

{include}../notice2.html
</body>
</html>


``notice2.html`` looks like::

{include}pages/notice1.html
This is the second warning about relative paths

When using ``{include}`` it is best to blacklist the included files using the
``IGNORE_FILES`` setting. Otherwise Pelican will try to render them as regular
content and will most likely fail!

Deprecated internal link syntax
-------------------------------

Expand Down
101 changes: 99 additions & 2 deletions pelican/contents.py
Expand Up @@ -11,7 +11,7 @@
import pytz

import six
from six.moves.urllib.parse import urljoin, urlparse, urlunparse
from six.moves.urllib.parse import unquote, urljoin, urlparse, urlunparse

from pelican import signals
from pelican.settings import DEFAULT_CONFIG
Expand All @@ -36,14 +36,15 @@ class Content(object):
:param settings: the settings dictionary (optional).
:param source_path: The location of the source of this content (if any).
:param context: The shared context between generators.
:param readers: readers.Readers() instance used for rendering includes.
"""
@deprecated_attribute(old='filename', new='source_path', since=(3, 2, 0))
def filename():
return None

def __init__(self, content, metadata=None, settings=None,
source_path=None, context=None):
source_path=None, context=None, readers=None):
if metadata is None:
metadata = {}
if settings is None:
Expand Down Expand Up @@ -148,8 +149,15 @@ def __init__(self, content, metadata=None, settings=None,
if 'summary' in metadata:
self._summary = metadata['summary']

# used for rendering {includes}
self._readers = readers

signals.content_object_init.send(self)

@property
def readers(self):
return self._readers

def __str__(self):
return self.source_path or repr(self)

Expand Down Expand Up @@ -257,6 +265,8 @@ def _link_replacer(self, siteurl, m):
siteurl += '/'

# XXX Put this in a different location.
if what == 'include':
import pdb; pdb.set_trace()
if what in {'filename', 'static', 'attach'}:
if path.startswith('/'):
path = path[1:]
Expand Down Expand Up @@ -334,6 +344,30 @@ def _get_intrasite_link_regex(self):
\2""".format(intrasite_link_regex)
return re.compile(regex, re.X)

def _path_replacer(self, path, relative_dir=None):
"""
Update path depending on whether this is an absolute
or relative value.
"""
if not relative_dir:
relative_dir = self.relative_dir

if path.startswith('/'):
path = path[1:]
else:
# relative to the source path of this content
path = self.get_relative_source_path(
os.path.join(relative_dir, path)
)

if path not in self._context['filenames']:
unquoted_path = unquote(path)

if unquoted_path in self._context['filenames']:
path = unquoted_path

return path.replace('%20', ' ')

def _update_content(self, content, siteurl):
"""Update the content attribute.
Expand All @@ -348,6 +382,7 @@ def _update_content(self, content, siteurl):
return content

hrefs = self._get_intrasite_link_regex()
import pdb; pdb.set_trace()
return hrefs.sub(lambda m: self._link_replacer(siteurl, m), content)

def get_static_links(self):
Expand All @@ -367,12 +402,74 @@ def get_static_links(self):
os.path.join(self.relative_dir, path)
)
path = path.replace('%20', ' ')
# path = self._path_replacer(path)
static_links.add(path)
return static_links

def get_siteurl(self):
return self._context.get('localsiteurl', '')

def _update_includes(self, content, source_path=None):
"""
Replace {include}some.file with the
contents of this file.
"""
regex = r"""[{|]include[|}](?P<path>[\w./]+)"""
hrefs = re.compile(regex, re.X)
processed_paths = []
# In Python 3.x we can use the `nonlocal` declaration, in `replacer()`,
# to tell Python we mean to assign to the `source_path` variable from
# `_update_includes()`.
# In Python 2.x we simply can't assign to `source_path` in `replacer()`.
# However, we work around this by not assigning to the variable itself,
# but using a mutable container to keep track about the current working
# directory while doing the recursion.
source_dir = [source_path]

def replacer(m):
path = m.group('path')
path = self._path_replacer(path, source_dir[0])
path = posixize_path(
os.path.abspath(
os.path.join(self.settings['PATH'], path)
)
)

if not os.path.isfile(path):
logger.warning("Unable to find `%s`, skipping include.", path)
return ''.join(('{include}', m.group('path')))

_, ext = os.path.splitext(path)
# remove leading dot
ext = ext[1:]

if ext not in self.readers.reader_classes.keys():
logger.warning("Unable to read `%s`, skipping include.", path)
return ''.join(('{include}', m.group('path')))

# recursion stop
if path in processed_paths:
raise RuntimeError("Circular inclusion detected for '%s'" % path)
processed_paths.append(path)

reader = self.readers.reader_classes[ext](self.settings)
text, meta = reader.read(path)

# if we recurse into another file to perform more includes
# self._path_replacer needs to know in which directory
# it operates otherwise it produces wrong paths
source_dir[0] = posixize_path(os.path.dirname(path))
current_source_dir = source_dir[0]

# recursively replace other includes
text = hrefs.sub(replacer, text)

# restore source dir
source_dir[0] = current_source_dir
return text

return hrefs.sub(replacer, content)

@memoized
def get_content(self, siteurl):
if hasattr(self, '_get_content'):
Expand Down
9 changes: 7 additions & 2 deletions pelican/readers.py
Expand Up @@ -500,7 +500,12 @@ def read(self, filename):
metadata = {}
for k in parser.metadata:
metadata[k] = self.process_metadata(k, parser.metadata[k])
return parser.body, metadata

if parser.body:
return parser.body, metadata
else:
# in case we're parsing HTML includes
return content, metadata


class Readers(FileStampDataCacher):
Expand Down Expand Up @@ -637,7 +642,7 @@ def typogrify_wrapper(text):

return content_class(content=content, metadata=metadata,
settings=self.settings, source_path=path,
context=context)
context=context, readers=self)


def find_empty_alt(content, path):
Expand Down
2 changes: 2 additions & 0 deletions pelican/tests/content/include.md
@@ -0,0 +1,2 @@
**this is Markdown**
Here is a [link](https://docs.getpelican.com).
2 changes: 2 additions & 0 deletions pelican/tests/content/include.unknown
@@ -0,0 +1,2 @@
**this is Markdown**
Here is a [link](https://docs.getpelican.com).
2 changes: 2 additions & 0 deletions pelican/tests/content/include/include3.html
@@ -0,0 +1,2 @@
this file includes another in a different directory
{include}../include1.html
2 changes: 2 additions & 0 deletions pelican/tests/content/include/include4.html
@@ -0,0 +1,2 @@
this file includes another via absolute path
{include}/include1.html
1 change: 1 addition & 0 deletions pelican/tests/content/include1.html
@@ -0,0 +1 @@
<span>this content has been included</span>
2 changes: 2 additions & 0 deletions pelican/tests/content/include2.html
@@ -0,0 +1,2 @@
this file includes another
{include}include1.html
1 change: 1 addition & 0 deletions pelican/tests/content/include5.html
@@ -0,0 +1 @@
{include}include6.html
1 change: 1 addition & 0 deletions pelican/tests/content/include6.html
@@ -0,0 +1 @@
{include}include5.html
5 changes: 4 additions & 1 deletion pelican/tests/test_cache.py
Expand Up @@ -162,8 +162,11 @@ def test_article_object_caching(self):
- 2012-11-30_md_w_filename_meta#foo-bar.md
- empty.md
- empty_with_bom.md
There are 5 more include* files which are HTML or Markdown snippets
and also not valid.
"""
self.assertEqual(generator.readers.read_file.call_count, 6)
self.assertEqual(generator.readers.read_file.call_count, 11)

@unittest.skipUnless(MagicMock, 'Needs Mock module')
def test_article_reader_content_caching(self):
Expand Down

0 comments on commit c7bfb5c

Please sign in to comment.