From 79101186ddfb7d9b6491e4e790fbec857035f97e Mon Sep 17 00:00:00 2001 From: "Mr. Senko" Date: Thu, 26 May 2016 17:36:48 +0300 Subject: [PATCH] New feature: support for {include} syntax. Fixes #1902. The new {include} syntax makes it possible to include frequently used text snippets into your content. --- docs/changelog.rst | 1 + docs/content.rst | 39 ++++++ pelican/contents.py | 93 +++++++++++-- pelican/readers.py | 7 +- pelican/tests/content/include.markdown | 2 + pelican/tests/content/include.unknown | 2 + pelican/tests/content/include/include3.html | 2 + pelican/tests/content/include/include4.html | 2 + pelican/tests/content/include1.html | 1 + pelican/tests/content/include2.html | 2 + pelican/tests/test_cache.py | 5 +- pelican/tests/test_contents.py | 138 ++++++++++++++++++++ 12 files changed, 279 insertions(+), 15 deletions(-) create mode 100644 pelican/tests/content/include.markdown create mode 100644 pelican/tests/content/include.unknown create mode 100644 pelican/tests/content/include/include3.html create mode 100644 pelican/tests/content/include/include4.html create mode 100644 pelican/tests/content/include1.html create mode 100644 pelican/tests/content/include2.html diff --git a/docs/changelog.rst b/docs/changelog.rst index 6a4d65a4ec..8fd46f28a0 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -15,6 +15,7 @@ Next release * Author slugs can be controlled with greater precision using the ``AUTHOR_SUBSTITUTIONS`` setting. Keeping non-alphanum characters is supported as well but discouraged. +* Add support for the ``{include}`` syntax 3.6.3 (2015-08-14) ================== diff --git a/docs/content.rst b/docs/content.rst index 0fa8992108..ec68b30ade 100644 --- a/docs/content.rst +++ b/docs/content.rst @@ -335,6 +335,45 @@ Linking to authors, categories, index and tags You can link to authors, categories, index and tags using the ``{author}name``, ``{category}foobar``, ``{index}`` and ``{tag}tagname`` syntax. +Including common text into your content +--------------------------------------- + +From Pelican 3.6.4 you can include common text snippets into your content using +the ``{include}file.ext`` syntax. You can specify semi-absolute paths starting +from the ``PATH`` directory, e.g. ``{include}/pages/disclaimer.html`` or use +relative paths, e.g. ``{include}notice.html``. Relativity is +calculated based on the location of the file containing the ``{include}``. +For example when you have the following content layout:: + + content + └── notice2.html + └── pages + ├── page1.html + └── notice1.html + +Then the includes may look like:: + + + + PAGE 1 + + + This is the content of page 1 + + {include}../notice2.html + + + + +``notice2.html`` looks like:: + + {include}pages/notice1.html + This is the second warning about relative paths + +When using ``{include}`` it is best to blacklist the included files using the +``IGNORE_FILES`` setting. Otherwise Pelican will try to render them as regular +content and will most likely fail! + Deprecated internal link syntax ------------------------------- diff --git a/pelican/contents.py b/pelican/contents.py index 9b6aa971d0..8d04885483 100644 --- a/pelican/contents.py +++ b/pelican/contents.py @@ -150,8 +150,20 @@ def __init__(self, content, metadata=None, settings=None, if 'summary' in metadata: self._summary = metadata['summary'] + # used for rendering {includes} + self._readers = None + signals.content_object_init.send(self) + @property + def readers(self): + if self._readers is None: + # import here due to circular imports + from pelican.readers import Readers + self._readers = Readers(self.settings) + + return self._readers + def __str__(self): return self.source_path or repr(self) @@ -187,6 +199,30 @@ def get_url_setting(self, key): key = key if self.in_default_lang else 'lang_%s' % key return self._expand_settings(key) + def _path_replacer(self, path, relative_dir=None): + """ + Update path depending on whether this is an absolute + or relative value. + """ + if not relative_dir: + relative_dir = self.relative_dir + + if path.startswith('/'): + path = path[1:] + else: + # relative to the source path of this content + path = self.get_relative_source_path( + os.path.join(relative_dir, path) + ) + + if path not in self._context['filenames']: + unquoted_path = path.replace('%20', ' ') + + if unquoted_path in self._context['filenames']: + path = unquoted_path + + return path + def _update_content(self, content, siteurl): """Update the content attribute. @@ -218,19 +254,7 @@ def replacer(m): # XXX Put this in a different location. if what in {'filename', 'attach'}: - if path.startswith('/'): - path = path[1:] - else: - # relative to the source path of this content - path = self.get_relative_source_path( - os.path.join(self.relative_dir, path) - ) - - if path not in self._context['filenames']: - unquoted_path = path.replace('%20', ' ') - - if unquoted_path in self._context['filenames']: - path = unquoted_path + path = self._path_replacer(path) linked_content = self._context['filenames'].get(path) if linked_content: @@ -277,12 +301,55 @@ def replacer(m): def get_siteurl(self): return self._context.get('localsiteurl', '') + def _update_includes(self, content, source_path=None): + """ + Replace {include}some.file with the + contents of this file. + """ + regex = r"""[{|]include[|}](?P[\w./]+)""" + hrefs = re.compile(regex, re.X) + + def replacer(m): + path = m.group('path') + path = self._path_replacer(path, source_path) + path = posixize_path( + os.path.abspath( + os.path.join(self.settings['PATH'], path) + ) + ) + + if not os.path.isfile(path): + logger.warning("Unable to find `%s`, skipping include.", path) + return ''.join(('{include}', m.group('path'))) + + _, ext = os.path.splitext(path) + # remove leading dot + ext = ext[1:] + + if ext not in self.readers.reader_classes.keys(): + logger.warning("Unable to read `%s`, skipping include.", path) + return ''.join(('{include}', m.group('path'))) + + reader = self.readers.reader_classes[ext](self.settings) + text, meta = reader.read(path) + + # if we recurse into another file to perform more includes + # self._path_replacer needs to know in which directory + # it operates otherwise it produces wrong paths + source_dir = posixize_path(os.path.dirname(path)) + + text = self._update_includes(text, source_dir) + return text + + return hrefs.sub(replacer, content) + @memoized def get_content(self, siteurl): if hasattr(self, '_get_content'): content = self._get_content() else: content = self._content + content = self._update_includes(content) return self._update_content(content, siteurl) @property diff --git a/pelican/readers.py b/pelican/readers.py index 585a6e7969..b0ec66d728 100644 --- a/pelican/readers.py +++ b/pelican/readers.py @@ -424,7 +424,12 @@ def read(self, filename): metadata = {} for k in parser.metadata: metadata[k] = self.process_metadata(k, parser.metadata[k]) - return parser.body, metadata + + if parser.body: + return parser.body, metadata + else: + # in case we're parsing HTML includes + return content, metadata class Readers(FileStampDataCacher): diff --git a/pelican/tests/content/include.markdown b/pelican/tests/content/include.markdown new file mode 100644 index 0000000000..9055424a1b --- /dev/null +++ b/pelican/tests/content/include.markdown @@ -0,0 +1,2 @@ +**this is Markdown** +Here is a [link](http://MrSenko.com). diff --git a/pelican/tests/content/include.unknown b/pelican/tests/content/include.unknown new file mode 100644 index 0000000000..9055424a1b --- /dev/null +++ b/pelican/tests/content/include.unknown @@ -0,0 +1,2 @@ +**this is Markdown** +Here is a [link](http://MrSenko.com). diff --git a/pelican/tests/content/include/include3.html b/pelican/tests/content/include/include3.html new file mode 100644 index 0000000000..6933bccea8 --- /dev/null +++ b/pelican/tests/content/include/include3.html @@ -0,0 +1,2 @@ +this file includes another in a different directory +{include}../include1.html \ No newline at end of file diff --git a/pelican/tests/content/include/include4.html b/pelican/tests/content/include/include4.html new file mode 100644 index 0000000000..aa2181bdc6 --- /dev/null +++ b/pelican/tests/content/include/include4.html @@ -0,0 +1,2 @@ +this file includes another via absolute path +{include}/include1.html \ No newline at end of file diff --git a/pelican/tests/content/include1.html b/pelican/tests/content/include1.html new file mode 100644 index 0000000000..b307a825fc --- /dev/null +++ b/pelican/tests/content/include1.html @@ -0,0 +1 @@ +this content has been included \ No newline at end of file diff --git a/pelican/tests/content/include2.html b/pelican/tests/content/include2.html new file mode 100644 index 0000000000..b8c46657fb --- /dev/null +++ b/pelican/tests/content/include2.html @@ -0,0 +1,2 @@ +this file includes another +{include}include1.html \ No newline at end of file diff --git a/pelican/tests/test_cache.py b/pelican/tests/test_cache.py index 3da3f7897f..8fb085435e 100644 --- a/pelican/tests/test_cache.py +++ b/pelican/tests/test_cache.py @@ -60,8 +60,11 @@ def test_article_object_caching(self): - article_with_comments.html - article_with_null_attributes.html - 2012-11-30_md_w_filename_meta#foo-bar.md + + There are 5 more include* files which are HTML or Markdown snippets + and also not valid. """ - self.assertEqual(generator.readers.read_file.call_count, 4) + self.assertEqual(generator.readers.read_file.call_count, 9) @unittest.skipUnless(MagicMock, 'Needs Mock module') def test_article_reader_content_caching(self): diff --git a/pelican/tests/test_contents.py b/pelican/tests/test_contents.py index 2f774a6e0e..25e1d2283b 100644 --- a/pelican/tests/test_contents.py +++ b/pelican/tests/test_contents.py @@ -22,6 +22,8 @@ TEST_CONTENT = str(generate_lorem_ipsum(n=1)) TEST_SUMMARY = generate_lorem_ipsum(n=1, html=False) +CONTENT_PATH = os.path.join(os.path.dirname(__file__), 'content') + class TestPage(LoggedTestCase): @@ -418,6 +420,142 @@ def test_intrasite_link_markdown_spaces(self): 'link' ) + def test_includes(self): + args = self.page_kwargs.copy() + args['settings'] = get_settings() + args['source_path'] = CONTENT_PATH + args['context']['filenames'] = {} + settings = get_settings() + settings['PATH'] = CONTENT_PATH + args['settings'] = settings + + # test inclusion b/w files of different types + # HTML includes Markdown + args['content'] = ( + 'HTML includes Markdown ' + '{include}include.markdown' + ' Included content is above' + ) + content = Page(**args).get_content('http://notmyidea.org') + self.assertEqual( + content, + 'HTML includes Markdown ' + '

this is Markdown\n' + 'Here is a link.

' + ' Included content is above' + ) + + # test inclusion b/w files of different types + # where we don't know how to render the included type + args['content'] = ( + 'HTML includes Unknown ' + '{include}include.unknown' + ) + content = Page(**args).get_content('http://notmyidea.org') + # we have a warning in this case + self.assertLogCountEqual( + count=1, + msg="Unable to read `.*`, skipping include\.", + level=logging.WARNING) + self.assertEqual( + content, + 'HTML includes Unknown ' + '{include}include.unknown' + ) + + # one include via relative path + args['content'] = ( + 'There is a simple include here ' + '{include}include1.html' + ' Included content is above' + ) + content = Page(**args).get_content('http://notmyidea.org') + self.assertEqual( + content, + 'There is a simple include here ' + 'this content has been included' + ' Included content is above' + ) + + # two nested includes via relative paths + args['content'] = ( + 'There is a simple include here ' + '{include}include2.html' + ' Included content is above' + ) + content = Page(**args).get_content('http://notmyidea.org') + self.assertEqual( + content, + 'There is a simple include here ' + 'this file includes another\n' + 'this content has been included' + ' Included content is above' + ) + + # include via full path + args['content'] = ( + 'There is a simple include here ' + '{include}/include1.html' + ' Included content is above' + ) + content = Page(**args).get_content('http://notmyidea.org') + self.assertEqual( + content, + 'There is a simple include here ' + 'this content has been included' + ' Included content is above' + ) + + # 2nd include is in different directory + # include paths are relative to the caller directory + args['content'] = ( + 'There is a simple include here ' + '{include}include/include3.html' + ' Included content is above' + ) + content = Page(**args).get_content('http://notmyidea.org') + self.assertEqual( + content, + 'There is a simple include here ' + 'this file includes another in a different directory\n' + 'this content has been included' + ' Included content is above' + ) + + # 2nd include using absolute path in the included file + args['content'] = ( + 'There is a simple include here ' + '{include}include/include4.html' + ' Included content is above' + ) + content = Page(**args).get_content('http://notmyidea.org') + self.assertEqual( + content, + 'There is a simple include here ' + 'this file includes another via absolute path\n' + 'this content has been included' + ' Included content is above' + ) + + # include non-existing file => inclusion is skipped + args['content'] = ( + 'There is a simple include here ' + '{include}missing.html' + ' Included content is above' + ) + content = Page(**args).get_content('http://notmyidea.org') + # we have a warning in this case + self.assertLogCountEqual( + count=1, + msg="Unable to find `.*`, skipping include\.", + level=logging.WARNING) + self.assertEqual( + content, + 'There is a simple include here ' + '{include}missing.html' + ' Included content is above' + ) + def test_multiple_authors(self): """Test article with multiple authors.""" args = self.page_kwargs.copy()