Mathics3 · mmatera · Feb 11, 2024 · Mar 13, 2024 · Mar 14, 2024 · Mar 14, 2024
diff --git a/mathics/doc/doc_entries.py b/mathics/doc/doc_entries.py
@@ -13,6 +13,7 @@
 from typing import Callable, List, Optional
 
 from mathics.core.evaluation import Message, Print
+from mathics.doc.rst_parser import normalize_indent, rst_to_native
 
 # Used for getting test results by test expression and chapter/section information.
 test_result_map = {}
@@ -69,6 +70,17 @@
 LIST_RE = re.compile(r"(?s)<(?P<tag>ul|ol)>(?P<content>.*?)</(?P=tag)>")
 MATHICS_RE = re.compile(r"(?<!\\)\'(.*?)(?<!\\)\'")
 
+
+MD_IMG_RE = re.compile(r"!\[(?P<title>.*?)\]\((?P<src>.*?)\)")
+MD_IMG_LABEL_RE = re.compile(r"!\[(?P<title>.*?)\]\((?P<src>.*?)\)\{\#(?P<label>.*?)\}")
+MD_PYTHON_RE = re.compile(
+    r"``\s*[pP]ython\n(?P<pythoncode>.*?)``", re.DOTALL | re.MULTILINE
+)
+MD_REF_RE = re.compile(r"\[(?P<label>.*?)\]\((?P<url>.*?)\)")
+MD_URL_RE = re.compile(r"\<(?P<prot>http|https|ftp|mail?)\:\/\/(?P<url>.*?)\>")
+
+MD_TAG_RE = re.compile(r"[{]\#(?P<label>.*?)[}]")
+
 PYTHON_RE = re.compile(r"(?s)<python>(.*?)</python>")
 QUOTATIONS_RE = re.compile(r"\"([\w\s,]*?)\"")
 REF_RE = re.compile(r'<ref label="(?P<label>.*?)">')
@@ -96,6 +108,72 @@
 TESTCASE_OUT_RE = re.compile(r"^\s*([:|=])(.*)$")
 
 
+# TODO: Check if it wouldn't be better to go in the opposite direction,
+# to have a ReStructured markdown compliant syntax everywhere.
+def markdown_to_native(text):
+    """
+    This function converts common markdown syntax into
+    the Mathics XML native documentation syntax.
+    """
+    text, post_substitutions = pre_sub(
+        MD_PYTHON_RE, text, lambda m: "<python>%s</python>" % m.group(1)
+    )
+
+    # First, convert some RsT syntax into the native
+    # format.
+    text = rst_to_native(text)
+
+    def repl_figs_with_label(match):
+        caption = match.group(1)
+        src = match.group(2)
+        label = match.group(3)
+        return (
+            r"<imgpng src="
+            f"'{src}'"
+            " title="
+            f"'{caption}'"
+            " label="
+            f"'{label}'"
+            ">"
+        )
+
+    text = MD_IMG_LABEL_RE.sub(repl_figs_with_label, text)
+
+    def repl_figs(match):
+        caption = match.group(1)
+        src = match.group(2)
+        return r"<imgpng src=" f"'{src}'" " title=" f"'{caption}'" ">"
+
+    text = MD_IMG_RE.sub(repl_figs, text)
+
+    def repl_ref(match):
+        label = match.group(1)
+        reference = match.group(2)
+        return f"<url>:{label}:{reference}</url>"
+
+    text = MD_REF_RE.sub(repl_ref, text)
+
+    def repl_url(match):
+        prot = match.group(1)
+        reference = match.group(2)
+        return f"<url>{prot}://{reference}</url>"
+
+    text = MD_URL_RE.sub(repl_url, text)
+
+    def repl_labels(match):
+        label = match.group(1)
+        return r" \label{" f"{label}" "} "
+
+    text = MD_TAG_RE.sub(repl_labels, text)
+
+    def repl_python_code(match):
+        pass
+
+    text = MD_PYTHON_RE.sub(repl_python_code, text)
+
+    return post_sub(text, post_substitutions)
+
+
 def get_results_by_test(test_expr: str, full_test_key: list, doc_data: dict) -> dict:
     """
     Sometimes test numbering is off, either due to bugs or changes since the
@@ -214,10 +292,16 @@ def parse_docstring_to_DocumentationEntry_items(
 
     # Remove commented lines.
     doc = filter_comments(doc).strip(r"\s")
+    # Normalize the indent level.
+    text = normalize_indent(doc)
 
     # Remove leading <dl>...</dl>
     # doc = DL_RE.sub("", doc)
 
+    # Convert markdown syntax to XML native syntax.
+    # TODO: See if it wouldn't be better to go in the opposite way:
+    # convert the native syntax to a common-markdown compliant syntax.
+
     # pre-substitute Python code because it might contain tests
     doc, post_substitutions = pre_sub(
         PYTHON_RE, doc, lambda m: "<python>%s</python>" % m.group(1)
@@ -451,7 +535,7 @@ class DocText:
     """
 
     def __init__(self, text):
-        self.text = text
+        self.text = markdown_to_native(text)
 
     def __str__(self) -> str:
         return self.text

diff --git a/mathics/doc/documentation/1-Manual.mdoc b/mathics/doc/documentation/1-Manual.mdoc
@@ -10,9 +10,9 @@ The programming language and built-in functions of \Mathics tries to match the \
 
 \Mathics is in no way affiliated or supported by \Wolfram. \Mathics will probably never have the power to compete with \Mathematica in industrial applications; it is a free alternative though. It also invites community development at all levels.
 
-See the <url>:installation instructions: https://mathics-development-guide.readthedocs.io/en/latest/installing/index.html</url> for the most recent instructions for installing from PyPI, or the source.
+See the [installation instructions](https://mathics-development-guide.readthedocs.io/en/latest/installing/index.html) for the most recent instructions for installing from PyPI, or the source.
 
-For implementation details, please refer to the <url>:Developers Guide:https://mathics-development-guide.readthedocs.io/en/latest/</url>.
+For implementation details please refer to <https://mathics-development-guide.readthedocs.io/en/latest/>.
 
 <section title="Why try to recreate Wolfram Language?">
 \Mathematica is great, but it a couple of disadvantages.
@@ -30,10 +30,10 @@ However, even if you are willing to pay hundreds of dollars for the software, yo
 \Mathics combines the beauty of \Mathematica implemented in an open-source environment written in Python. The Python ecosystem includes libraries and toos like:
 
 <ul>
-  <li><url>:mpmath: https://mpmath.org/</url> for floating-point arithmetic with arbitrary precision,
-  <li><url>:NumPy: https://numpy.org</url> for numeric computation,
-  <li><url>:SymPy: https://sympy.org</url> for symbolic mathematics, and
-  <li><url>:SciPy: https://www.scipy.org/</url> for Scientific calculations.
+  <li>[mpmath](https://mpmath.org/) for floating-point arithmetic with arbitrary precision,
+  <li>[numpy](https://numpy.org/numpy) for numeric computation,
+  <li>[SymPy](https://sympy.org) for symbolic mathematics, and
+  <li>optionally [SciPy](https://www.scipy.org/) for Scientific calculations.
 </ul>
 
 Performance of \Mathics is not, right now, practical in large-scale projects and calculations. However can be used as a tool for exploration and education.
@@ -53,9 +53,10 @@ Outside of the "core" \Mathics kernel (which has a only primitive command-line i
 <ul>
   <li>a <url>:command-line interface:https://pypi.org/project/mathicsscript/</url> using either <url>:prompt-toolkit:https://python-prompt-toolkit.readthedocs.io/en/master/</url>, or GNU Readline
   <li>a <url>:Django-based web server:https://pypi.org/project/Mathics-Django/</url>
-  <li>a <url>:Mathics3 module for Graphs:https://pypi.org/project/pymathics-graph/</url> (via <url>:NetworkX:https://networkx.org/</url>),
-  <li>a <url>:Mathics3 module for NLP:https://pypi.org/project/pymathics-natlang/</url> (via <url>:nltk:https://www.nltk.org/</url>, <url>:spacy:https://spacy.io/</url>, and others)
-  <li>a <url>:A docker container:https://hub.docker.com/r/mathicsorg/mathics</url> which bundles all of the above
+  <li>a command-line interface using either prompt-toolkit, or GNU Readline
+  <li>a [Mathics3 module for Graphs](https://pypi.org/project/pymathics-graph/) (via [NetworkX](https://networkx.org/)),
+  <li>a [Mathics3 module for NLP](https://pypi.org/project/pymathics-natlang/) (via [nltk](https://www.nltk.org/), [spacy](https://spacy.io/), and others)
+  <li>a [A docker container](https://hub.docker.com/r/mathicsorg/mathics) which bundles all of the above
 </ul>
 
 </section>
@@ -238,8 +239,7 @@ The relative uncertainty of '3.1416`3' is 10^-3. It is numerically equivalent, i
   >> 3.1416`3 == 3.1413`4
    = True
 
-
-We can get the precision of the number by using the \Mathics Built-in function <url>:'Precision': /doc/reference-of-built-in-symbols/atomic-elements-of-expressions/representation-of-numbers/precision</url>:
+We can get the precision of the number by using the \Mathics Built-in function <url>:'Precision': /doc/reference-of-built-in-symbols/atomic-elements-of-expressions/precision</url>:
 
     >> Precision[3.1413`4]
      = 4.

diff --git a/mathics/doc/rst_parser.py b/mathics/doc/rst_parser.py
@@ -0,0 +1,161 @@
+"""
+Minimal parser for ReStructuredText
+
+This module provides a compatibility support for RsT syntax
+in the Mathics documentation system.
+
+We cannot use an standard library like docutils or sphinx since
+by now, the documentation is written in a Mathics-specific syntax,
+and for a while, both syntaxes will have to coexist.
+
+"""
+
+import re
+
+RST_BLOCK_RE = re.compile(r"^\.\.\s+(.*)\n((?:^[ ]+.*\n|^\n)+)", re.MULTILINE)
+RST_URL_RE = re.compile(r"`(?P<label>.*?)\<(?P<url>.*?)\>`_(?P<under>_?)")
+
+
+PROCESS_RST_BLOCK = {}
+
+
+def indent_level(line_str: str) -> int:
+    """
+    Compute the number of blank spaces at the left
+    of a string.
+    """
+    line_lstrip = line_str.lstrip()
+    if line_lstrip == "":
+        return 80
+    return len(line_str) - len(line_lstrip)
+
+
+def normalize_indent(text: str, omit_first_line: bool = True) -> str:
+    """
+    Normalize the indentation level of the text.
+    Usually, the docstring has an indentation equal
+    to the code where its belongs.
+    For processing the documentation, it is useful
+    to normalize the indentation level.
+
+    Usually, in a docstring, the first line has a different
+    indentation level just because the "indentation" lays before the quotes.
+    `omit_first_line` controls if that line must be taken into account to compute
+    the indentation reference.
+
+    """
+    lines = text.splitlines()
+    if len(lines) > 1:
+        # First, look for the minimal level
+        # of indentation.
+        lines_ = lines[1:] if omit_first_line else lines
+
+        # 80 is a safe upper limit in standard docstrings,
+        # because the line shouldn't have more characters.
+        block_indent_level = min(min(indent_level(line) for line in lines_), 80)
+        if block_indent_level == 80:
+            block_indent_level = 0
+
+        # Now, remove the extra indent.
+        if block_indent_level:
+            if omit_first_line:
+                return (
+                    lines[0]
+                    + "\n"
+                    + "\n".join(
+                        line[block_indent_level:] if line else "" for line in lines_
+                    )
+                )
+            return "\n".join(
+                line[block_indent_level:] if line else "" for line in lines_
+            )
+    return text
+
+
+def process_image_block(head: str, block: str) -> str:
+    """ """
+    src = head.split("::")[1]
+    lines = block.splitlines()
+    keys = f" src='{src}'"
+    for line in lines:
+        try:
+            _, key, val = line.strip().split(":")
+        except ValueError:
+            continue
+        keys += f""" {key}='{val.strip()}'"""
+    return f"""<imgpng {keys}>"""
+
+
+PROCESS_RST_BLOCK["image"] = process_image_block
+
+
+def process_code_block(head: str, block: str) -> str:
+    """
+    Process a block of code
+    """
+    if block.strip() == "":
+        return None
+
+    try:
+        lang = head.split("::")[1].strip()
+    except ValueError:
+        lang = ""
+
+    if lang.lower() == "python":
+        lines = block.splitlines()
+        if len(lines) == 1:
+            return f"""<python>{lines[0]}</python>"""
+        code = normalize_indent(block, False)
+        return f"""<python>\n{code}</python>"""
+    if lang.lower() == "mathics":
+        indentation = 7 * " "
+        lines = [
+            indentation + line.lstrip() if idx else line.lstrip()
+            for idx, line in enumerate(block.splitlines())
+        ]
+        code = "    >> " + "\n".join(lines)
+        return code
+    return None
+
+
+PROCESS_RST_BLOCK["code"] = process_code_block
+
+
+# TODO: Check if it wouldn't be better to go in the opposite direction,
+# to have a ReStructured markdown compliant syntax everywhere.
+def rst_to_native(text):
+    """
+    convert a RsT syntax to the Mathics XML
+    native documentation syntax
+    """
+
+    def repl_url(match):
+        label = strip(match.group(1))
+        url = strip(match.group(2))
+        private = "_" == match.group(3)
+        if label == "" and private:
+            return f"<url>{url}</url>"
+        return f"<url>:{label}:{url}</url>"
+
+    text = RST_URL_RE.sub(repl_url, text)
+
+    def repl_block(match):
+        head = match.group(1)
+        block = match.group(2)
+        lines = block.splitlines()
+        block_type = head.split(" ")[0].split("::")[0].strip()
+        last_line = lines[-1]
+        if last_line and last_line[0] != " ":
+            lines = lines[:-1]
+            block = "\n".join(lines)
+        else:
+            last_line = ""
+
+        result = PROCESS_RST_BLOCK.get(block_type, None)(head, block)
+        if result is None:
+            return
+        return result + "\n" + last_line
+
+    text = RST_BLOCK_RE.sub(repl_block, text)
+
+    return text
diff --git a/test/doc/test_common.py b/test/doc/test_common.py
@@ -218,3 +218,30 @@ def test_load_mathics_documentation():
                 for subsection in section.subsections:
                     assert subsection.title not in visited_subsections
                     visited_subsections.add(subsection.title)
+
+
+def test_doc_parser():
+    for input_str, output_str in (
+        ["![figure](figure.png)", "<imgpng src='figure.png' title='figure'>"],
+        [
+            "![figure](figure.png){#figure-label}",
+            "<imgpng src='figure.png' title='figure' label='figure-label'>",
+        ],
+        [
+            ("""\n`` python\ndef f(x):\n   g[i](x)\n""" """    return x + 2\n``\n"""),
+            """<python>def f(x):\n   g[i](x)\n    return x + 2\n</python>""",
+        ],
+        ["[url de destino](/doc/algo)", "<url>:url de destino:/doc/algo</url>"],
+    ):
+        result = parse_docstring_to_DocumentationEntry_items(
+            input_str,
+            DocTests,
+            DocTest,
+            DocText,
+            (
+                "part example",
+                "chapter example",
+                "section example",
+            ),
+        )[0].text
+        assert result == output_str