diff --git a/.gitignore b/.gitignore
index 459469464c..07c9956d08 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,7 @@
.*.swp
.*.swo
*.pyc
+.cache/
.DS_Store
docs/_build
docs/fr/_build
@@ -16,3 +17,4 @@ six-*.egg/
venv
samples/output
*.pem
+pip-wheel-metadata/
diff --git a/RELEASE.md b/RELEASE.md
new file mode 100644
index 0000000000..4aafe7421c
--- /dev/null
+++ b/RELEASE.md
@@ -0,0 +1,3 @@
+Release type: minor
+
+Add support for the ``{include}`` syntax
diff --git a/docs/content.rst b/docs/content.rst
index f0022c35e1..be4ab0ce34 100644
--- a/docs/content.rst
+++ b/docs/content.rst
@@ -369,6 +369,45 @@ Linking to authors, categories, index and tags
You can link to authors, categories, index and tags using the ``{author}name``,
``{category}foobar``, ``{index}`` and ``{tag}tagname`` syntax.
+Including common text into your content
+---------------------------------------
+
+From Pelican 4.2 onward, you can include common text snippets into your content using
+the ``{include}file.ext`` syntax. You can specify semi-absolute paths starting
+from the ``PATH`` directory, e.g. ``{include}/pages/disclaimer.html`` or use
+relative paths, e.g. ``{include}notice.html``. Relativity is
+calculated based on the location of the file containing the ``{include}``.
+For example when you have the following content layout::
+
+ content
+ └── notice2.html
+ └── pages
+ ├── page1.html
+ └── notice1.html
+
+Then the includes may look like::
+
+
+
+ PAGE 1
+
+
+ This is the content of page 1
+
+ {include}../notice2.html
+
+
+
+
+``notice2.html`` looks like::
+
+ {include}pages/notice1.html
+ This is the second warning about relative paths
+
+When using ``{include}`` it is best to blacklist the included files using the
+``IGNORE_FILES`` setting. Otherwise Pelican will try to render them as regular
+content and will most likely fail!
+
Deprecated internal link syntax
-------------------------------
diff --git a/pelican/contents.py b/pelican/contents.py
index a862db2d66..906d8a668c 100644
--- a/pelican/contents.py
+++ b/pelican/contents.py
@@ -11,7 +11,7 @@
import pytz
import six
-from six.moves.urllib.parse import urljoin, urlparse, urlunparse
+from six.moves.urllib.parse import unquote, urljoin, urlparse, urlunparse
from pelican import signals
from pelican.settings import DEFAULT_CONFIG
@@ -36,6 +36,7 @@ class Content(object):
:param settings: the settings dictionary (optional).
:param source_path: The location of the source of this content (if any).
:param context: The shared context between generators.
+ :param readers: readers.Readers() instance used for rendering includes.
"""
@deprecated_attribute(old='filename', new='source_path', since=(3, 2, 0))
@@ -43,7 +44,7 @@ def filename():
return None
def __init__(self, content, metadata=None, settings=None,
- source_path=None, context=None):
+ source_path=None, context=None, readers=None):
if metadata is None:
metadata = {}
if settings is None:
@@ -148,8 +149,15 @@ def __init__(self, content, metadata=None, settings=None,
if 'summary' in metadata:
self._summary = metadata['summary']
+ # used for rendering {includes}
+ self._readers = readers
+
signals.content_object_init.send(self)
+ @property
+ def readers(self):
+ return self._readers
+
def __str__(self):
return self.source_path or repr(self)
@@ -257,6 +265,8 @@ def _link_replacer(self, siteurl, m):
siteurl += '/'
# XXX Put this in a different location.
+ if what == 'include':
+ import pdb; pdb.set_trace()
if what in {'filename', 'static', 'attach'}:
if path.startswith('/'):
path = path[1:]
@@ -334,6 +344,30 @@ def _get_intrasite_link_regex(self):
\2""".format(intrasite_link_regex)
return re.compile(regex, re.X)
+ def _path_replacer(self, path, relative_dir=None):
+ """
+ Update path depending on whether this is an absolute
+ or relative value.
+ """
+ if not relative_dir:
+ relative_dir = self.relative_dir
+
+ if path.startswith('/'):
+ path = path[1:]
+ else:
+ # relative to the source path of this content
+ path = self.get_relative_source_path(
+ os.path.join(relative_dir, path)
+ )
+
+ if path not in self._context['filenames']:
+ unquoted_path = unquote(path)
+
+ if unquoted_path in self._context['filenames']:
+ path = unquoted_path
+
+ return path.replace('%20', ' ')
+
def _update_content(self, content, siteurl):
"""Update the content attribute.
@@ -348,6 +382,7 @@ def _update_content(self, content, siteurl):
return content
hrefs = self._get_intrasite_link_regex()
+ import pdb; pdb.set_trace()
return hrefs.sub(lambda m: self._link_replacer(siteurl, m), content)
def get_static_links(self):
@@ -367,12 +402,74 @@ def get_static_links(self):
os.path.join(self.relative_dir, path)
)
path = path.replace('%20', ' ')
+ # path = self._path_replacer(path)
static_links.add(path)
return static_links
def get_siteurl(self):
return self._context.get('localsiteurl', '')
+ def _update_includes(self, content, source_path=None):
+ """
+ Replace {include}some.file with the
+ contents of this file.
+ """
+ regex = r"""[{|]include[|}](?P[\w./]+)"""
+ hrefs = re.compile(regex, re.X)
+ processed_paths = []
+ # In Python 3.x we can use the `nonlocal` declaration, in `replacer()`,
+ # to tell Python we mean to assign to the `source_path` variable from
+ # `_update_includes()`.
+ # In Python 2.x we simply can't assign to `source_path` in `replacer()`.
+ # However, we work around this by not assigning to the variable itself,
+ # but using a mutable container to keep track about the current working
+ # directory while doing the recursion.
+ source_dir = [source_path]
+
+ def replacer(m):
+ path = m.group('path')
+ path = self._path_replacer(path, source_dir[0])
+ path = posixize_path(
+ os.path.abspath(
+ os.path.join(self.settings['PATH'], path)
+ )
+ )
+
+ if not os.path.isfile(path):
+ logger.warning("Unable to find `%s`, skipping include.", path)
+ return ''.join(('{include}', m.group('path')))
+
+ _, ext = os.path.splitext(path)
+ # remove leading dot
+ ext = ext[1:]
+
+ if ext not in self.readers.reader_classes.keys():
+ logger.warning("Unable to read `%s`, skipping include.", path)
+ return ''.join(('{include}', m.group('path')))
+
+ # recursion stop
+ if path in processed_paths:
+ raise RuntimeError("Circular inclusion detected for '%s'" % path)
+ processed_paths.append(path)
+
+ reader = self.readers.reader_classes[ext](self.settings)
+ text, meta = reader.read(path)
+
+ # if we recurse into another file to perform more includes
+ # self._path_replacer needs to know in which directory
+ # it operates otherwise it produces wrong paths
+ source_dir[0] = posixize_path(os.path.dirname(path))
+ current_source_dir = source_dir[0]
+
+ # recursively replace other includes
+ text = hrefs.sub(replacer, text)
+
+ # restore source dir
+ source_dir[0] = current_source_dir
+ return text
+
+ return hrefs.sub(replacer, content)
+
@memoized
def get_content(self, siteurl):
if hasattr(self, '_get_content'):
diff --git a/pelican/readers.py b/pelican/readers.py
index 0edfed0eca..5c615bf7d4 100644
--- a/pelican/readers.py
+++ b/pelican/readers.py
@@ -500,7 +500,12 @@ def read(self, filename):
metadata = {}
for k in parser.metadata:
metadata[k] = self.process_metadata(k, parser.metadata[k])
- return parser.body, metadata
+
+ if parser.body:
+ return parser.body, metadata
+ else:
+ # in case we're parsing HTML includes
+ return content, metadata
class Readers(FileStampDataCacher):
@@ -637,7 +642,7 @@ def typogrify_wrapper(text):
return content_class(content=content, metadata=metadata,
settings=self.settings, source_path=path,
- context=context)
+ context=context, readers=self)
def find_empty_alt(content, path):
diff --git a/pelican/tests/content/include.md b/pelican/tests/content/include.md
new file mode 100644
index 0000000000..08f4b65992
--- /dev/null
+++ b/pelican/tests/content/include.md
@@ -0,0 +1,2 @@
+**this is Markdown**
+Here is a [link](https://docs.getpelican.com).
diff --git a/pelican/tests/content/include.unknown b/pelican/tests/content/include.unknown
new file mode 100644
index 0000000000..08f4b65992
--- /dev/null
+++ b/pelican/tests/content/include.unknown
@@ -0,0 +1,2 @@
+**this is Markdown**
+Here is a [link](https://docs.getpelican.com).
diff --git a/pelican/tests/content/include/include3.html b/pelican/tests/content/include/include3.html
new file mode 100644
index 0000000000..d65793c17e
--- /dev/null
+++ b/pelican/tests/content/include/include3.html
@@ -0,0 +1,2 @@
+this file includes another in a different directory
+{include}../include1.html
diff --git a/pelican/tests/content/include/include4.html b/pelican/tests/content/include/include4.html
new file mode 100644
index 0000000000..5aed913bad
--- /dev/null
+++ b/pelican/tests/content/include/include4.html
@@ -0,0 +1,2 @@
+this file includes another via absolute path
+{include}/include1.html
diff --git a/pelican/tests/content/include1.html b/pelican/tests/content/include1.html
new file mode 100644
index 0000000000..5d27544c65
--- /dev/null
+++ b/pelican/tests/content/include1.html
@@ -0,0 +1 @@
+this content has been included
diff --git a/pelican/tests/content/include2.html b/pelican/tests/content/include2.html
new file mode 100644
index 0000000000..69232fe48b
--- /dev/null
+++ b/pelican/tests/content/include2.html
@@ -0,0 +1,2 @@
+this file includes another
+{include}include1.html
diff --git a/pelican/tests/content/include5.html b/pelican/tests/content/include5.html
new file mode 100644
index 0000000000..b5018e3682
--- /dev/null
+++ b/pelican/tests/content/include5.html
@@ -0,0 +1 @@
+{include}include6.html
diff --git a/pelican/tests/content/include6.html b/pelican/tests/content/include6.html
new file mode 100644
index 0000000000..e70cf4e653
--- /dev/null
+++ b/pelican/tests/content/include6.html
@@ -0,0 +1 @@
+{include}include5.html
diff --git a/pelican/tests/test_cache.py b/pelican/tests/test_cache.py
index ceba649e30..08988e282c 100644
--- a/pelican/tests/test_cache.py
+++ b/pelican/tests/test_cache.py
@@ -162,8 +162,11 @@ def test_article_object_caching(self):
- 2012-11-30_md_w_filename_meta#foo-bar.md
- empty.md
- empty_with_bom.md
+
+ There are 5 more include* files which are HTML or Markdown snippets
+ and also not valid.
"""
- self.assertEqual(generator.readers.read_file.call_count, 6)
+ self.assertEqual(generator.readers.read_file.call_count, 11)
@unittest.skipUnless(MagicMock, 'Needs Mock module')
def test_article_reader_content_caching(self):
diff --git a/pelican/tests/test_contents.py b/pelican/tests/test_contents.py
index 104bc88902..d1b5752cb9 100644
--- a/pelican/tests/test_contents.py
+++ b/pelican/tests/test_contents.py
@@ -11,7 +11,8 @@
import six
-from pelican.contents import Article, Author, Category, Page, Static
+from pelican.contents import Article, Author, Category, Page, Static, Tag
+from pelican.readers import Readers
from pelican.settings import DEFAULT_CONFIG
from pelican.signals import content_object_init
from pelican.tests.support import LoggedTestCase, get_context, get_settings,\
@@ -23,6 +24,8 @@
TEST_CONTENT = str(generate_lorem_ipsum(n=1))
TEST_SUMMARY = generate_lorem_ipsum(n=1, html=False)
+CONTENT_PATH = os.path.join(os.path.dirname(__file__), 'content')
+
class TestPage(LoggedTestCase):
@@ -534,6 +537,199 @@ def test_intrasite_link_source_and_generated(self):
'source'
)
+ def test_include_markdown_from_html(self):
+ args = self.page_kwargs.copy()
+ args['source_path'] = 'fakepage.html'
+ args['content'] = (
+ 'HTML includes Markdown '
+ '{include}include.md\n'
+ 'Included content is above'
+ )
+ content = Page(**args).get_content('')
+ self.assertEqual(
+ content,
+ 'HTML includes Markdown '
+ 'this is Markdown\n'
+ 'Here is a link.
\n'
+ 'Included content is above'
+ )
+
+ def test_include_markdown_from_markdown(self):
+ args = self.page_kwargs.copy()
+ args['source_path'] = 'fakepage.md'
+ args['content'] = (
+ '_HTML_ includes Markdown '
+ '{include}include.md\n'
+ 'Included content is above'
+ )
+ content = Page(**args).get_content('')
+ self.assertEqual(
+ content,
+ 'HTML includes Markdown '
+ 'this is Markdown\n'
+ 'Here is a link.
\n'
+ 'Included content is above'
+ )
+
+ def test_include_unknown_type(self):
+ args = self.page_kwargs.copy()
+ args['settings'] = get_settings()
+ args['source_path'] = CONTENT_PATH
+ args['context']['filenames'] = {}
+ settings = get_settings()
+ settings['PATH'] = CONTENT_PATH
+ args['settings'] = settings
+ args['readers'] = Readers(settings)
+ args['content'] = (
+ 'HTML includes Unknown '
+ '{include}include.unknown'
+ )
+ content = Page(**args).get_content('')
+ # we have a warning in this case
+ self.assertLogCountEqual(
+ count=1,
+ msg="Unable to read `.*`, skipping include\.",
+ level=logging.WARNING)
+ self.assertEqual(
+ content,
+ 'HTML includes Unknown '
+ '{include}include.unknown'
+ )
+
+ def test_include_html_with_relative_path(self):
+ args = self.page_kwargs.copy()
+ args['settings'] = get_settings()
+ args['source_path'] = CONTENT_PATH
+ args['context']['filenames'] = {}
+ settings = get_settings()
+ settings['PATH'] = CONTENT_PATH
+ args['settings'] = settings
+ args['readers'] = Readers(settings)
+ args['content'] = (
+ 'There is a simple include here '
+ '{include}include1.html\n'
+ 'Included content is above'
+ )
+ content = Page(**args).get_content('')
+ self.assertEqual(
+ content,
+ 'There is a simple include here '
+ 'this content has been included\n\n'
+ 'Included content is above'
+ )
+
+ def test_include_nested_html(self):
+ args = self.page_kwargs.copy()
+ args['settings'] = get_settings()
+ args['source_path'] = CONTENT_PATH
+ args['context']['filenames'] = {}
+ settings = get_settings()
+ settings['PATH'] = CONTENT_PATH
+ args['settings'] = settings
+ args['readers'] = Readers(settings)
+ args['content'] = (
+ 'There is a simple include here '
+ '{include}include2.html\n'
+ 'Included content is above'
+ )
+ content = Page(**args).get_content('')
+ self.assertEqual(
+ content,
+ 'There is a simple include here '
+ 'this file includes another\n'
+ 'this content has been included\n\n\n'
+ 'Included content is above'
+ )
+
+ def test_include_html_with_full_path(self):
+ args = self.page_kwargs.copy()
+ args['settings'] = get_settings()
+ args['source_path'] = CONTENT_PATH
+ args['context']['filenames'] = {}
+ settings = get_settings()
+ settings['PATH'] = CONTENT_PATH
+ args['settings'] = settings
+ args['readers'] = Readers(settings)
+ args['content'] = (
+ 'There is a simple include here '
+ '{include}/include1.html'
+ ' Included content is above'
+ )
+ content = Page(**args).get_content('')
+ self.assertEqual(
+ content,
+ 'There is a simple include here '
+ 'this content has been included\n'
+ ' Included content is above'
+ )
+
+ def test_include_html_in_other_directory(self):
+ args = self.page_kwargs.copy()
+ args['settings'] = get_settings()
+ args['source_path'] = CONTENT_PATH
+ args['context']['filenames'] = {}
+ settings = get_settings()
+ settings['PATH'] = CONTENT_PATH
+ args['settings'] = settings
+ args['readers'] = Readers(settings)
+ args['content'] = (
+ 'There is a simple include here '
+ '{include}include/include3.html'
+ ' Included content is above'
+ )
+ content = Page(**args).get_content('')
+ self.assertEqual(
+ content,
+ 'There is a simple include here '
+ 'this file includes another in a different directory\n'
+ 'this content has been included\n\n'
+ ' Included content is above'
+ )
+
+ def test_include_non_existing_file(self):
+ args = self.page_kwargs.copy()
+ args['settings'] = get_settings()
+ args['source_path'] = CONTENT_PATH
+ args['context']['filenames'] = {}
+ settings = get_settings()
+ settings['PATH'] = CONTENT_PATH
+ args['settings'] = settings
+ args['readers'] = Readers(settings)
+ args['content'] = (
+ 'There is a simple include here '
+ '{include}missing.html'
+ ' Included content is above'
+ )
+ content = Page(**args).get_content('')
+ # we have a warning in this case
+ self.assertLogCountEqual(
+ count=1,
+ msg="Unable to find `.*`, skipping include\.",
+ level=logging.WARNING)
+ self.assertEqual(
+ content,
+ 'There is a simple include here '
+ '{include}missing.html'
+ ' Included content is above'
+ )
+
+ def test_include_with_recursion_loop(self):
+ args = self.page_kwargs.copy()
+ args['settings'] = get_settings()
+ args['source_path'] = CONTENT_PATH
+ args['context']['filenames'] = {}
+ settings = get_settings()
+ settings['PATH'] = CONTENT_PATH
+ args['settings'] = settings
+ args['readers'] = Readers(settings)
+ args['content'] = (
+ 'There is a simple include here '
+ '{include}include5.html'
+ )
+ with self.assertRaisesRegex(RuntimeError, 'Circular inclusion detected'):
+ Page(**args).get_content('')
+
+
def test_intrasite_link_to_static_content_with_filename(self):
"""Test linking to a static resource with deprecated {filename}
"""
@@ -550,11 +746,11 @@ def test_intrasite_link_to_static_content_with_filename(self):
'A simple test, with a link to a'
'poster'
)
- content = Page(**args).get_content('http://notmyidea.org')
+ content = Page(**args).get_content('')
self.assertEqual(
content,
'A simple test, with a link to a'
- 'poster'
+ 'poster'
)
def test_multiple_authors(self):