diff --git a/.gitignore b/.gitignore index 459469464c..07c9956d08 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ .*.swp .*.swo *.pyc +.cache/ .DS_Store docs/_build docs/fr/_build @@ -16,3 +17,4 @@ six-*.egg/ venv samples/output *.pem +pip-wheel-metadata/ diff --git a/RELEASE.md b/RELEASE.md new file mode 100644 index 0000000000..4aafe7421c --- /dev/null +++ b/RELEASE.md @@ -0,0 +1,3 @@ +Release type: minor + +Add support for the ``{include}`` syntax diff --git a/docs/content.rst b/docs/content.rst index f0022c35e1..be4ab0ce34 100644 --- a/docs/content.rst +++ b/docs/content.rst @@ -369,6 +369,45 @@ Linking to authors, categories, index and tags You can link to authors, categories, index and tags using the ``{author}name``, ``{category}foobar``, ``{index}`` and ``{tag}tagname`` syntax. +Including common text into your content +--------------------------------------- + +From Pelican 4.2 onward, you can include common text snippets into your content using +the ``{include}file.ext`` syntax. You can specify semi-absolute paths starting +from the ``PATH`` directory, e.g. ``{include}/pages/disclaimer.html`` or use +relative paths, e.g. ``{include}notice.html``. Relativity is +calculated based on the location of the file containing the ``{include}``. +For example when you have the following content layout:: + + content + └── notice2.html + └── pages + ├── page1.html + └── notice1.html + +Then the includes may look like:: + + + + PAGE 1 + + + This is the content of page 1 + + {include}../notice2.html + + + + +``notice2.html`` looks like:: + + {include}pages/notice1.html + This is the second warning about relative paths + +When using ``{include}`` it is best to blacklist the included files using the +``IGNORE_FILES`` setting. Otherwise Pelican will try to render them as regular +content and will most likely fail! + Deprecated internal link syntax ------------------------------- diff --git a/pelican/contents.py b/pelican/contents.py index a862db2d66..906d8a668c 100644 --- a/pelican/contents.py +++ b/pelican/contents.py @@ -11,7 +11,7 @@ import pytz import six -from six.moves.urllib.parse import urljoin, urlparse, urlunparse +from six.moves.urllib.parse import unquote, urljoin, urlparse, urlunparse from pelican import signals from pelican.settings import DEFAULT_CONFIG @@ -36,6 +36,7 @@ class Content(object): :param settings: the settings dictionary (optional). :param source_path: The location of the source of this content (if any). :param context: The shared context between generators. + :param readers: readers.Readers() instance used for rendering includes. """ @deprecated_attribute(old='filename', new='source_path', since=(3, 2, 0)) @@ -43,7 +44,7 @@ def filename(): return None def __init__(self, content, metadata=None, settings=None, - source_path=None, context=None): + source_path=None, context=None, readers=None): if metadata is None: metadata = {} if settings is None: @@ -148,8 +149,15 @@ def __init__(self, content, metadata=None, settings=None, if 'summary' in metadata: self._summary = metadata['summary'] + # used for rendering {includes} + self._readers = readers + signals.content_object_init.send(self) + @property + def readers(self): + return self._readers + def __str__(self): return self.source_path or repr(self) @@ -257,6 +265,8 @@ def _link_replacer(self, siteurl, m): siteurl += '/' # XXX Put this in a different location. + if what == 'include': + import pdb; pdb.set_trace() if what in {'filename', 'static', 'attach'}: if path.startswith('/'): path = path[1:] @@ -334,6 +344,30 @@ def _get_intrasite_link_regex(self): \2""".format(intrasite_link_regex) return re.compile(regex, re.X) + def _path_replacer(self, path, relative_dir=None): + """ + Update path depending on whether this is an absolute + or relative value. + """ + if not relative_dir: + relative_dir = self.relative_dir + + if path.startswith('/'): + path = path[1:] + else: + # relative to the source path of this content + path = self.get_relative_source_path( + os.path.join(relative_dir, path) + ) + + if path not in self._context['filenames']: + unquoted_path = unquote(path) + + if unquoted_path in self._context['filenames']: + path = unquoted_path + + return path.replace('%20', ' ') + def _update_content(self, content, siteurl): """Update the content attribute. @@ -348,6 +382,7 @@ def _update_content(self, content, siteurl): return content hrefs = self._get_intrasite_link_regex() + import pdb; pdb.set_trace() return hrefs.sub(lambda m: self._link_replacer(siteurl, m), content) def get_static_links(self): @@ -367,12 +402,74 @@ def get_static_links(self): os.path.join(self.relative_dir, path) ) path = path.replace('%20', ' ') + # path = self._path_replacer(path) static_links.add(path) return static_links def get_siteurl(self): return self._context.get('localsiteurl', '') + def _update_includes(self, content, source_path=None): + """ + Replace {include}some.file with the + contents of this file. + """ + regex = r"""[{|]include[|}](?P[\w./]+)""" + hrefs = re.compile(regex, re.X) + processed_paths = [] + # In Python 3.x we can use the `nonlocal` declaration, in `replacer()`, + # to tell Python we mean to assign to the `source_path` variable from + # `_update_includes()`. + # In Python 2.x we simply can't assign to `source_path` in `replacer()`. + # However, we work around this by not assigning to the variable itself, + # but using a mutable container to keep track about the current working + # directory while doing the recursion. + source_dir = [source_path] + + def replacer(m): + path = m.group('path') + path = self._path_replacer(path, source_dir[0]) + path = posixize_path( + os.path.abspath( + os.path.join(self.settings['PATH'], path) + ) + ) + + if not os.path.isfile(path): + logger.warning("Unable to find `%s`, skipping include.", path) + return ''.join(('{include}', m.group('path'))) + + _, ext = os.path.splitext(path) + # remove leading dot + ext = ext[1:] + + if ext not in self.readers.reader_classes.keys(): + logger.warning("Unable to read `%s`, skipping include.", path) + return ''.join(('{include}', m.group('path'))) + + # recursion stop + if path in processed_paths: + raise RuntimeError("Circular inclusion detected for '%s'" % path) + processed_paths.append(path) + + reader = self.readers.reader_classes[ext](self.settings) + text, meta = reader.read(path) + + # if we recurse into another file to perform more includes + # self._path_replacer needs to know in which directory + # it operates otherwise it produces wrong paths + source_dir[0] = posixize_path(os.path.dirname(path)) + current_source_dir = source_dir[0] + + # recursively replace other includes + text = hrefs.sub(replacer, text) + + # restore source dir + source_dir[0] = current_source_dir + return text + + return hrefs.sub(replacer, content) + @memoized def get_content(self, siteurl): if hasattr(self, '_get_content'): diff --git a/pelican/readers.py b/pelican/readers.py index 0edfed0eca..5c615bf7d4 100644 --- a/pelican/readers.py +++ b/pelican/readers.py @@ -500,7 +500,12 @@ def read(self, filename): metadata = {} for k in parser.metadata: metadata[k] = self.process_metadata(k, parser.metadata[k]) - return parser.body, metadata + + if parser.body: + return parser.body, metadata + else: + # in case we're parsing HTML includes + return content, metadata class Readers(FileStampDataCacher): @@ -637,7 +642,7 @@ def typogrify_wrapper(text): return content_class(content=content, metadata=metadata, settings=self.settings, source_path=path, - context=context) + context=context, readers=self) def find_empty_alt(content, path): diff --git a/pelican/tests/content/include.md b/pelican/tests/content/include.md new file mode 100644 index 0000000000..08f4b65992 --- /dev/null +++ b/pelican/tests/content/include.md @@ -0,0 +1,2 @@ +**this is Markdown** +Here is a [link](https://docs.getpelican.com). diff --git a/pelican/tests/content/include.unknown b/pelican/tests/content/include.unknown new file mode 100644 index 0000000000..08f4b65992 --- /dev/null +++ b/pelican/tests/content/include.unknown @@ -0,0 +1,2 @@ +**this is Markdown** +Here is a [link](https://docs.getpelican.com). diff --git a/pelican/tests/content/include/include3.html b/pelican/tests/content/include/include3.html new file mode 100644 index 0000000000..d65793c17e --- /dev/null +++ b/pelican/tests/content/include/include3.html @@ -0,0 +1,2 @@ +this file includes another in a different directory +{include}../include1.html diff --git a/pelican/tests/content/include/include4.html b/pelican/tests/content/include/include4.html new file mode 100644 index 0000000000..5aed913bad --- /dev/null +++ b/pelican/tests/content/include/include4.html @@ -0,0 +1,2 @@ +this file includes another via absolute path +{include}/include1.html diff --git a/pelican/tests/content/include1.html b/pelican/tests/content/include1.html new file mode 100644 index 0000000000..5d27544c65 --- /dev/null +++ b/pelican/tests/content/include1.html @@ -0,0 +1 @@ +this content has been included diff --git a/pelican/tests/content/include2.html b/pelican/tests/content/include2.html new file mode 100644 index 0000000000..69232fe48b --- /dev/null +++ b/pelican/tests/content/include2.html @@ -0,0 +1,2 @@ +this file includes another +{include}include1.html diff --git a/pelican/tests/content/include5.html b/pelican/tests/content/include5.html new file mode 100644 index 0000000000..b5018e3682 --- /dev/null +++ b/pelican/tests/content/include5.html @@ -0,0 +1 @@ +{include}include6.html diff --git a/pelican/tests/content/include6.html b/pelican/tests/content/include6.html new file mode 100644 index 0000000000..e70cf4e653 --- /dev/null +++ b/pelican/tests/content/include6.html @@ -0,0 +1 @@ +{include}include5.html diff --git a/pelican/tests/test_cache.py b/pelican/tests/test_cache.py index ceba649e30..08988e282c 100644 --- a/pelican/tests/test_cache.py +++ b/pelican/tests/test_cache.py @@ -162,8 +162,11 @@ def test_article_object_caching(self): - 2012-11-30_md_w_filename_meta#foo-bar.md - empty.md - empty_with_bom.md + + There are 5 more include* files which are HTML or Markdown snippets + and also not valid. """ - self.assertEqual(generator.readers.read_file.call_count, 6) + self.assertEqual(generator.readers.read_file.call_count, 11) @unittest.skipUnless(MagicMock, 'Needs Mock module') def test_article_reader_content_caching(self): diff --git a/pelican/tests/test_contents.py b/pelican/tests/test_contents.py index 104bc88902..d1b5752cb9 100644 --- a/pelican/tests/test_contents.py +++ b/pelican/tests/test_contents.py @@ -11,7 +11,8 @@ import six -from pelican.contents import Article, Author, Category, Page, Static +from pelican.contents import Article, Author, Category, Page, Static, Tag +from pelican.readers import Readers from pelican.settings import DEFAULT_CONFIG from pelican.signals import content_object_init from pelican.tests.support import LoggedTestCase, get_context, get_settings,\ @@ -23,6 +24,8 @@ TEST_CONTENT = str(generate_lorem_ipsum(n=1)) TEST_SUMMARY = generate_lorem_ipsum(n=1, html=False) +CONTENT_PATH = os.path.join(os.path.dirname(__file__), 'content') + class TestPage(LoggedTestCase): @@ -534,6 +537,199 @@ def test_intrasite_link_source_and_generated(self): 'source' ) + def test_include_markdown_from_html(self): + args = self.page_kwargs.copy() + args['source_path'] = 'fakepage.html' + args['content'] = ( + 'HTML includes Markdown ' + '{include}include.md\n' + 'Included content is above' + ) + content = Page(**args).get_content('') + self.assertEqual( + content, + 'HTML includes Markdown ' + '

this is Markdown\n' + 'Here is a link.

\n' + 'Included content is above' + ) + + def test_include_markdown_from_markdown(self): + args = self.page_kwargs.copy() + args['source_path'] = 'fakepage.md' + args['content'] = ( + '_HTML_ includes Markdown ' + '{include}include.md\n' + 'Included content is above' + ) + content = Page(**args).get_content('') + self.assertEqual( + content, + 'HTML includes Markdown ' + '

this is Markdown\n' + 'Here is a link.

\n' + 'Included content is above' + ) + + def test_include_unknown_type(self): + args = self.page_kwargs.copy() + args['settings'] = get_settings() + args['source_path'] = CONTENT_PATH + args['context']['filenames'] = {} + settings = get_settings() + settings['PATH'] = CONTENT_PATH + args['settings'] = settings + args['readers'] = Readers(settings) + args['content'] = ( + 'HTML includes Unknown ' + '{include}include.unknown' + ) + content = Page(**args).get_content('') + # we have a warning in this case + self.assertLogCountEqual( + count=1, + msg="Unable to read `.*`, skipping include\.", + level=logging.WARNING) + self.assertEqual( + content, + 'HTML includes Unknown ' + '{include}include.unknown' + ) + + def test_include_html_with_relative_path(self): + args = self.page_kwargs.copy() + args['settings'] = get_settings() + args['source_path'] = CONTENT_PATH + args['context']['filenames'] = {} + settings = get_settings() + settings['PATH'] = CONTENT_PATH + args['settings'] = settings + args['readers'] = Readers(settings) + args['content'] = ( + 'There is a simple include here ' + '{include}include1.html\n' + 'Included content is above' + ) + content = Page(**args).get_content('') + self.assertEqual( + content, + 'There is a simple include here ' + 'this content has been included\n\n' + 'Included content is above' + ) + + def test_include_nested_html(self): + args = self.page_kwargs.copy() + args['settings'] = get_settings() + args['source_path'] = CONTENT_PATH + args['context']['filenames'] = {} + settings = get_settings() + settings['PATH'] = CONTENT_PATH + args['settings'] = settings + args['readers'] = Readers(settings) + args['content'] = ( + 'There is a simple include here ' + '{include}include2.html\n' + 'Included content is above' + ) + content = Page(**args).get_content('') + self.assertEqual( + content, + 'There is a simple include here ' + 'this file includes another\n' + 'this content has been included\n\n\n' + 'Included content is above' + ) + + def test_include_html_with_full_path(self): + args = self.page_kwargs.copy() + args['settings'] = get_settings() + args['source_path'] = CONTENT_PATH + args['context']['filenames'] = {} + settings = get_settings() + settings['PATH'] = CONTENT_PATH + args['settings'] = settings + args['readers'] = Readers(settings) + args['content'] = ( + 'There is a simple include here ' + '{include}/include1.html' + ' Included content is above' + ) + content = Page(**args).get_content('') + self.assertEqual( + content, + 'There is a simple include here ' + 'this content has been included\n' + ' Included content is above' + ) + + def test_include_html_in_other_directory(self): + args = self.page_kwargs.copy() + args['settings'] = get_settings() + args['source_path'] = CONTENT_PATH + args['context']['filenames'] = {} + settings = get_settings() + settings['PATH'] = CONTENT_PATH + args['settings'] = settings + args['readers'] = Readers(settings) + args['content'] = ( + 'There is a simple include here ' + '{include}include/include3.html' + ' Included content is above' + ) + content = Page(**args).get_content('') + self.assertEqual( + content, + 'There is a simple include here ' + 'this file includes another in a different directory\n' + 'this content has been included\n\n' + ' Included content is above' + ) + + def test_include_non_existing_file(self): + args = self.page_kwargs.copy() + args['settings'] = get_settings() + args['source_path'] = CONTENT_PATH + args['context']['filenames'] = {} + settings = get_settings() + settings['PATH'] = CONTENT_PATH + args['settings'] = settings + args['readers'] = Readers(settings) + args['content'] = ( + 'There is a simple include here ' + '{include}missing.html' + ' Included content is above' + ) + content = Page(**args).get_content('') + # we have a warning in this case + self.assertLogCountEqual( + count=1, + msg="Unable to find `.*`, skipping include\.", + level=logging.WARNING) + self.assertEqual( + content, + 'There is a simple include here ' + '{include}missing.html' + ' Included content is above' + ) + + def test_include_with_recursion_loop(self): + args = self.page_kwargs.copy() + args['settings'] = get_settings() + args['source_path'] = CONTENT_PATH + args['context']['filenames'] = {} + settings = get_settings() + settings['PATH'] = CONTENT_PATH + args['settings'] = settings + args['readers'] = Readers(settings) + args['content'] = ( + 'There is a simple include here ' + '{include}include5.html' + ) + with self.assertRaisesRegex(RuntimeError, 'Circular inclusion detected'): + Page(**args).get_content('') + + def test_intrasite_link_to_static_content_with_filename(self): """Test linking to a static resource with deprecated {filename} """ @@ -550,11 +746,11 @@ def test_intrasite_link_to_static_content_with_filename(self): 'A simple test, with a link to a' 'poster' ) - content = Page(**args).get_content('http://notmyidea.org') + content = Page(**args).get_content('') self.assertEqual( content, 'A simple test, with a link to a' - 'poster' + 'poster' ) def test_multiple_authors(self):