Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RsT support in docstrings #1027

Draft
wants to merge 38 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
a4478bb
* more docstrings and annotations
mmatera Feb 11, 2024
9f462e0
Update docpipeline.py
mmatera Mar 13, 2024
00ba03e
black
mmatera Mar 14, 2024
42c160a
removing trailing code
mmatera Mar 14, 2024
4f89b07
* more docstrings and annotations
mmatera Feb 11, 2024
5f17c0e
Update docpipeline.py
mmatera Mar 13, 2024
4674e20
split mathics.doc.common_doc
mmatera Mar 14, 2024
1c5e6fb
mathics.doc.doctests->mathics.doc.doc_entries and fix pytests
mmatera Mar 14, 2024
612a47a
DRY test_section_in_chapter. Include the tests in chapter.doc.
mmatera Mar 14, 2024
2ab499b
black
mmatera Mar 14, 2024
d86487f
restore docpipeline break with the parameter -x
mmatera Mar 14, 2024
4445a12
test chapters by name
mmatera Mar 14, 2024
00d74f6
more doctests
mmatera Mar 16, 2024
993c0ad
split common_doc
mmatera Mar 16, 2024
b3777f1
django compatibility
mmatera Mar 16, 2024
cd9bb92
issue with subsection_re
mmatera Mar 16, 2024
1f274e8
more moving code around
mmatera Mar 16, 2024
47be542
more
mmatera Mar 16, 2024
715eb6c
Merge branch 'master' into more_docpipeline_fixes
mmatera Mar 24, 2024
2f6eabb
black
mmatera Mar 25, 2024
3750d5f
Adding support for markdown notation in figures and references. This …
mmatera Mar 25, 2024
d3dba50
markdown_to_native after processing tests
mmatera Mar 26, 2024
ae20c96
rst parser
mmatera Mar 27, 2024
76a894b
docstring
mmatera Mar 27, 2024
7339c7d
removing prints
mmatera Mar 27, 2024
30c56a5
* more docstrings and annotations
mmatera Feb 11, 2024
d9f64fa
split mathics.doc.common_doc
mmatera Mar 14, 2024
36cc410
mathics.doc.doctests->mathics.doc.doc_entries and fix pytests
mmatera Mar 14, 2024
af690a7
DRY test_section_in_chapter. Include the tests in chapter.doc.
mmatera Mar 14, 2024
e31d807
split common_doc
mmatera Mar 16, 2024
cd0d92c
django compatibility
mmatera Mar 16, 2024
cb203ce
merging more_docpipeline_fixes_2_revert
mmatera Mar 30, 2024
ceb48c0
More docpipeline fixes 2 revert (#1028)
mmatera Mar 30, 2024
bbb19cd
merge
mmatera Mar 30, 2024
b0bb40c
Merge branch 'more_docpipeline_fixes' into RsT_urls
mmatera Mar 30, 2024
010b908
Merge branch 'master' into RsT_urls
mmatera Aug 3, 2024
3d68b51
merge
mmatera Aug 3, 2024
6aa7b04
Merge branch 'master' into RsT_urls
mmatera Oct 29, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 85 additions & 1 deletion mathics/doc/doc_entries.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from typing import Callable, List, Optional

from mathics.core.evaluation import Message, Print
from mathics.doc.rst_parser import normalize_indent, rst_to_native

# Used for getting test results by test expression and chapter/section information.
test_result_map = {}
Expand Down Expand Up @@ -69,6 +70,17 @@
LIST_RE = re.compile(r"(?s)<(?P<tag>ul|ol)>(?P<content>.*?)</(?P=tag)>")
MATHICS_RE = re.compile(r"(?<!\\)\'(.*?)(?<!\\)\'")


MD_IMG_RE = re.compile(r"!\[(?P<title>.*?)\]\((?P<src>.*?)\)")
MD_IMG_LABEL_RE = re.compile(r"!\[(?P<title>.*?)\]\((?P<src>.*?)\)\{\#(?P<label>.*?)\}")
MD_PYTHON_RE = re.compile(
r"``\s*[pP]ython\n(?P<pythoncode>.*?)``", re.DOTALL | re.MULTILINE
)
MD_REF_RE = re.compile(r"\[(?P<label>.*?)\]\((?P<url>.*?)\)")
MD_URL_RE = re.compile(r"\<(?P<prot>http|https|ftp|mail?)\:\/\/(?P<url>.*?)\>")

MD_TAG_RE = re.compile(r"[{]\#(?P<label>.*?)[}]")

PYTHON_RE = re.compile(r"(?s)<python>(.*?)</python>")
QUOTATIONS_RE = re.compile(r"\"([\w\s,]*?)\"")
REF_RE = re.compile(r'<ref label="(?P<label>.*?)">')
Expand Down Expand Up @@ -96,6 +108,72 @@
TESTCASE_OUT_RE = re.compile(r"^\s*([:|=])(.*)$")


# TODO: Check if it wouldn't be better to go in the opposite direction,
# to have a ReStructured markdown compliant syntax everywhere.
def markdown_to_native(text):
"""
This function converts common markdown syntax into
the Mathics XML native documentation syntax.
"""
text, post_substitutions = pre_sub(
MD_PYTHON_RE, text, lambda m: "<python>%s</python>" % m.group(1)
)

# First, convert some RsT syntax into the native
# format.
text = rst_to_native(text)

def repl_figs_with_label(match):
caption = match.group(1)
src = match.group(2)
label = match.group(3)
return (
r"<imgpng src="
f"'{src}'"
" title="
f"'{caption}'"
" label="
f"'{label}'"
">"
)

text = MD_IMG_LABEL_RE.sub(repl_figs_with_label, text)

def repl_figs(match):
caption = match.group(1)
src = match.group(2)
return r"<imgpng src=" f"'{src}'" " title=" f"'{caption}'" ">"

text = MD_IMG_RE.sub(repl_figs, text)

def repl_ref(match):
label = match.group(1)
reference = match.group(2)
return f"<url>:{label}:{reference}</url>"

text = MD_REF_RE.sub(repl_ref, text)

def repl_url(match):
prot = match.group(1)
reference = match.group(2)
return f"<url>{prot}://{reference}</url>"

text = MD_URL_RE.sub(repl_url, text)

def repl_labels(match):
label = match.group(1)
return r" \label{" f"{label}" "} "

text = MD_TAG_RE.sub(repl_labels, text)

def repl_python_code(match):
pass

text = MD_PYTHON_RE.sub(repl_python_code, text)

return post_sub(text, post_substitutions)


def get_results_by_test(test_expr: str, full_test_key: list, doc_data: dict) -> dict:
"""
Sometimes test numbering is off, either due to bugs or changes since the
Expand Down Expand Up @@ -214,10 +292,16 @@ def parse_docstring_to_DocumentationEntry_items(

# Remove commented lines.
doc = filter_comments(doc).strip(r"\s")
# Normalize the indent level.
text = normalize_indent(doc)

# Remove leading <dl>...</dl>
# doc = DL_RE.sub("", doc)

# Convert markdown syntax to XML native syntax.
# TODO: See if it wouldn't be better to go in the opposite way:
# convert the native syntax to a common-markdown compliant syntax.

# pre-substitute Python code because it might contain tests
doc, post_substitutions = pre_sub(
PYTHON_RE, doc, lambda m: "<python>%s</python>" % m.group(1)
Expand Down Expand Up @@ -451,7 +535,7 @@ class DocText:
"""

def __init__(self, text):
self.text = text
self.text = markdown_to_native(text)

def __str__(self) -> str:
return self.text
Expand Down
22 changes: 11 additions & 11 deletions mathics/doc/documentation/1-Manual.mdoc
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ The programming language and built-in functions of \Mathics tries to match the \

\Mathics is in no way affiliated or supported by \Wolfram. \Mathics will probably never have the power to compete with \Mathematica in industrial applications; it is a free alternative though. It also invites community development at all levels.

See the <url>:installation instructions: https://mathics-development-guide.readthedocs.io/en/latest/installing/index.html</url> for the most recent instructions for installing from PyPI, or the source.
See the [installation instructions](https://mathics-development-guide.readthedocs.io/en/latest/installing/index.html) for the most recent instructions for installing from PyPI, or the source.

For implementation details, please refer to the <url>:Developers Guide:https://mathics-development-guide.readthedocs.io/en/latest/</url>.
For implementation details please refer to <https://mathics-development-guide.readthedocs.io/en/latest/>.

<section title="Why try to recreate Wolfram Language?">
\Mathematica is great, but it a couple of disadvantages.
Expand All @@ -30,10 +30,10 @@ However, even if you are willing to pay hundreds of dollars for the software, yo
\Mathics combines the beauty of \Mathematica implemented in an open-source environment written in Python. The Python ecosystem includes libraries and toos like:

<ul>
<li><url>:mpmath: https://mpmath.org/</url> for floating-point arithmetic with arbitrary precision,
<li><url>:NumPy: https://numpy.org</url> for numeric computation,
<li><url>:SymPy: https://sympy.org</url> for symbolic mathematics, and
<li><url>:SciPy: https://www.scipy.org/</url> for Scientific calculations.
<li>[mpmath](https://mpmath.org/) for floating-point arithmetic with arbitrary precision,
<li>[numpy](https://numpy.org/numpy) for numeric computation,
<li>[SymPy](https://sympy.org) for symbolic mathematics, and
<li>optionally [SciPy](https://www.scipy.org/) for Scientific calculations.
</ul>

Performance of \Mathics is not, right now, practical in large-scale projects and calculations. However can be used as a tool for exploration and education.
Expand All @@ -53,9 +53,10 @@ Outside of the "core" \Mathics kernel (which has a only primitive command-line i
<ul>
<li>a <url>:command-line interface:https://pypi.org/project/mathicsscript/</url> using either <url>:prompt-toolkit:https://python-prompt-toolkit.readthedocs.io/en/master/</url>, or GNU Readline
<li>a <url>:Django-based web server:https://pypi.org/project/Mathics-Django/</url>
<li>a <url>:Mathics3 module for Graphs:https://pypi.org/project/pymathics-graph/</url> (via <url>:NetworkX:https://networkx.org/</url>),
<li>a <url>:Mathics3 module for NLP:https://pypi.org/project/pymathics-natlang/</url> (via <url>:nltk:https://www.nltk.org/</url>, <url>:spacy:https://spacy.io/</url>, and others)
<li>a <url>:A docker container:https://hub.docker.com/r/mathicsorg/mathics</url> which bundles all of the above
<li>a command-line interface using either prompt-toolkit, or GNU Readline
<li>a [Mathics3 module for Graphs](https://pypi.org/project/pymathics-graph/) (via [NetworkX](https://networkx.org/)),
<li>a [Mathics3 module for NLP](https://pypi.org/project/pymathics-natlang/) (via [nltk](https://www.nltk.org/), [spacy](https://spacy.io/), and others)
<li>a [A docker container](https://hub.docker.com/r/mathicsorg/mathics) which bundles all of the above
</ul>

</section>
Expand Down Expand Up @@ -238,8 +239,7 @@ The relative uncertainty of '3.1416`3' is 10^-3. It is numerically equivalent, i
>> 3.1416`3 == 3.1413`4
= True


We can get the precision of the number by using the \Mathics Built-in function <url>:'Precision': /doc/reference-of-built-in-symbols/atomic-elements-of-expressions/representation-of-numbers/precision</url>:
We can get the precision of the number by using the \Mathics Built-in function <url>:'Precision': /doc/reference-of-built-in-symbols/atomic-elements-of-expressions/precision</url>:

>> Precision[3.1413`4]
= 4.
Expand Down
161 changes: 161 additions & 0 deletions mathics/doc/rst_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
"""
Minimal parser for ReStructuredText

This module provides a compatibility support for RsT syntax
in the Mathics documentation system.

We cannot use an standard library like docutils or sphinx since
by now, the documentation is written in a Mathics-specific syntax,
and for a while, both syntaxes will have to coexist.

"""

import re

RST_BLOCK_RE = re.compile(r"^\.\.\s+(.*)\n((?:^[ ]+.*\n|^\n)+)", re.MULTILINE)
RST_URL_RE = re.compile(r"`(?P<label>.*?)\<(?P<url>.*?)\>`_(?P<under>_?)")


PROCESS_RST_BLOCK = {}


def indent_level(line_str: str) -> int:
"""
Compute the number of blank spaces at the left
of a string.
"""
line_lstrip = line_str.lstrip()
if line_lstrip == "":
return 80
return len(line_str) - len(line_lstrip)


def normalize_indent(text: str, omit_first_line: bool = True) -> str:
"""
Normalize the indentation level of the text.
Usually, the docstring has an indentation equal
to the code where its belongs.
For processing the documentation, it is useful
to normalize the indentation level.

Usually, in a docstring, the first line has a different
indentation level just because the "indentation" lays before the quotes.
`omit_first_line` controls if that line must be taken into account to compute
the indentation reference.

"""
lines = text.splitlines()
if len(lines) > 1:
# First, look for the minimal level
# of indentation.
lines_ = lines[1:] if omit_first_line else lines

# 80 is a safe upper limit in standard docstrings,
# because the line shouldn't have more characters.
block_indent_level = min(min(indent_level(line) for line in lines_), 80)
if block_indent_level == 80:
block_indent_level = 0

# Now, remove the extra indent.
if block_indent_level:
if omit_first_line:
return (
lines[0]
+ "\n"
+ "\n".join(
line[block_indent_level:] if line else "" for line in lines_
)
)
return "\n".join(
line[block_indent_level:] if line else "" for line in lines_
)
return text


def process_image_block(head: str, block: str) -> str:
""" """
src = head.split("::")[1]
lines = block.splitlines()
keys = f" src='{src}'"
for line in lines:
try:
_, key, val = line.strip().split(":")
except ValueError:
continue
keys += f""" {key}='{val.strip()}'"""
return f"""<imgpng {keys}>"""


PROCESS_RST_BLOCK["image"] = process_image_block


def process_code_block(head: str, block: str) -> str:
"""
Process a block of code
"""
if block.strip() == "":
return None

try:
lang = head.split("::")[1].strip()
except ValueError:
lang = ""

if lang.lower() == "python":
lines = block.splitlines()
if len(lines) == 1:
return f"""<python>{lines[0]}</python>"""
code = normalize_indent(block, False)
return f"""<python>\n{code}</python>"""
if lang.lower() == "mathics":
indentation = 7 * " "
lines = [
indentation + line.lstrip() if idx else line.lstrip()
for idx, line in enumerate(block.splitlines())
]
code = " >> " + "\n".join(lines)
return code
return None


PROCESS_RST_BLOCK["code"] = process_code_block


# TODO: Check if it wouldn't be better to go in the opposite direction,
# to have a ReStructured markdown compliant syntax everywhere.
def rst_to_native(text):
"""
convert a RsT syntax to the Mathics XML
native documentation syntax
"""

def repl_url(match):
label = strip(match.group(1))
url = strip(match.group(2))
private = "_" == match.group(3)
if label == "" and private:
return f"<url>{url}</url>"
return f"<url>:{label}:{url}</url>"

text = RST_URL_RE.sub(repl_url, text)

def repl_block(match):
head = match.group(1)
block = match.group(2)
lines = block.splitlines()
block_type = head.split(" ")[0].split("::")[0].strip()
last_line = lines[-1]
if last_line and last_line[0] != " ":
lines = lines[:-1]
block = "\n".join(lines)
else:
last_line = ""

result = PROCESS_RST_BLOCK.get(block_type, None)(head, block)
if result is None:
return
return result + "\n" + last_line

text = RST_BLOCK_RE.sub(repl_block, text)

return text
27 changes: 27 additions & 0 deletions test/doc/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,3 +218,30 @@ def test_load_mathics_documentation():
for subsection in section.subsections:
assert subsection.title not in visited_subsections
visited_subsections.add(subsection.title)


def test_doc_parser():
for input_str, output_str in (
["![figure](figure.png)", "<imgpng src='figure.png' title='figure'>"],
[
"![figure](figure.png){#figure-label}",
"<imgpng src='figure.png' title='figure' label='figure-label'>",
],
[
("""\n`` python\ndef f(x):\n g[i](x)\n""" """ return x + 2\n``\n"""),
"""<python>def f(x):\n g[i](x)\n return x + 2\n</python>""",
],
["[url de destino](/doc/algo)", "<url>:url de destino:/doc/algo</url>"],
):
result = parse_docstring_to_DocumentationEntry_items(
input_str,
DocTests,
DocTest,
DocText,
(
"part example",
"chapter example",
"section example",
),
)[0].text
assert result == output_str
Loading