Skip to content

Extended Meta Data Extension for Python-Markdown #1598

@DevTwilight

Description

@DevTwilight

I wrote an extended version of the Python-Markdown Meta Data extension with a small improvement to multiline metadata handling.

Here is the extended version code:

  # Meta Data Extension for Python-Markdown
  # =======================================
  
  # This extension adds Meta Data handling to markdown.
  
  # See https://Python-Markdown.github.io/extensions/meta_data
  # for documentation.
  
  # Original code Copyright 2007-2008 [Waylan Limberg](https://github.com/waylan).
  # Modifications Copyright 2026 DevTwilight
  
  # --------------------------------------------------------------------
  # Modifications by DevTwilight:
  # - Added YAML-style list support (e.g. "- item" under keys)
  # - Added LIST_ITEM_RE regex for parsing indented list items
  # - Extended MetaPreprocessor to recognize list-style metadata values
  # - Improved meta parsing robustness for multiline structured input
  # --------------------------------------------------------------------
  
  # All changes Copyright 2008-2014 The Python Markdown Project
  
  # License: [BSD](https://opensource.org/licenses/bsd-license.php)
  
  """
  This extension adds Meta Data handling to markdown.
  
  See the [documentation](https://Python-Markdown.github.io/extensions/meta_data)
  for details.
  """
  
  from __future__ import annotations
  
  from markdown import Extension
  from markdown.preprocessors import Preprocessor
  import re
  import logging
  from typing import Any
  
  log = logging.getLogger('MARKDOWN')
  
  # Global Vars
  META_RE = re.compile(r'^[ ]{0,3}(?P<key>[A-Za-z0-9_-]+):\s*(?P<value>.*)')
  META_MORE_RE = re.compile(r'^[ ]{4,}(?P<value>.*)')
  LIST_ITEM_RE = re.compile(r'^[ ]{2,}-\s+(?P<value>.*)')  # NEW: YAML list support
  BEGIN_RE = re.compile(r'^-{3}(\s.*)?')
  END_RE = re.compile(r'^(-{3}|\.{3})(\s.*)?')
  
  
  class MetaExtension(Extension):
      """ Meta-Data extension for Python-Markdown. """
  
      def extendMarkdown(self, md):
          """ Add `MetaPreprocessor` to Markdown instance. """
          md.registerExtension(self)
          self.md = md
          md.preprocessors.register(MetaPreprocessor(md), 'meta', 27)
  
      def reset(self) -> None:
          self.md.Meta = {}
  
  
  class MetaPreprocessor(Preprocessor):
      """ Get Meta-Data. """
  
      def run(self, lines: list[str]) -> list[str]:
          """ Parse Meta-Data and store in Markdown.Meta. """
          meta: dict[str, Any] = {}
          key = None
  
          if lines and BEGIN_RE.match(lines[0]):
              lines.pop(0)
  
          while lines:
              line = lines.pop(0)
  
              # Stop conditions
              if line.strip() == '' or END_RE.match(line):
                  break
  
              m1 = META_RE.match(line)
  
              # Standard key: value
              if m1:
                  key = m1.group('key').lower().strip()
                  value = m1.group('value').strip()
  
                  meta[key] = [value] if key not in meta else meta[key]
                  continue
  
              # NEW: YAML-style list items
              m3 = LIST_ITEM_RE.match(line)
              if m3 and key:
                  value = m3.group('value').strip()
                  meta.setdefault(key, []).append(value)
                  continue
  
              # Multiline continuation
              m2 = META_MORE_RE.match(line)
              if m2 and key:
                  meta[key].append(m2.group('value').strip())
                  continue
  
              # Not meta → stop parsing
              lines.insert(0, line)
              break
  
          self.md.Meta = meta
          return lines
  
  
  def makeExtension(**kwargs):  # pragma: no cover
      return MetaExtension(**kwargs)

AI Disclosure

This code was prepared with assistance from an AI language model.

Could it be integrated in the library?
so that multi line yaml list support could be achieved

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions