## Mistletoe

In [15]:
%load_ext autoreload
%autoreload 2

In [21]:
import mistletoe
from mistletoe import Document, HtmlRenderer
from mistletoe.ast_renderer import ASTRenderer
from mistletoe.contrib.mathjax import MathJaxRenderer
from mistletoe.latex_renderer import LaTeXRenderer
from mistletoe import block_token, span_token
from mistletoe.block_token import tokenize, BlockToken
from mistletoe.span_token import SpanToken

from rich import print

In [22]:
with open("text.md", "r+") as f:
    content = f.read()

print(content)

In [23]:
        
class CustomHtmlRenderer(HtmlRenderer, LaTeXRenderer):
    def __init__(self):
        super().__init__(TripleCommaDiv, HTMLInMD, process_html_tokens=True)

    def render_math(self, token):
        """
        Convert single dollar sign enclosed math expressions to the ``\\(...\\)`` syntax, to support
        the default MathJax settings which ignore single dollar signs as described at
        https://docs.mathjax.org/en/latest/basic/mathematics.html#tex-and-latex-input.
        """
        if token.content.startswith('$$'):
            return self.render_raw_text(token)
        # return '\\({}\\)'.format(self.render_raw_text(token).strip('$'))
        return self.render_raw_text(token)
    
    def render_triple_comma_div(self, token):
        inner = self.render_inner(token)
        return f'<div class="{token.classes}">{inner}</div>'
    
    def render_html_in_md(self, token):
        return token.children

    # def escape_html_text(self, s: str) -> str:
    #     """
    #     Like `html.escape()`, but this  looks into the current rendering options
    #     to decide which of the quotes (double, single, or both) to escape.

    #     Intended for escaping text content. To escape content of an attribute,
    #     simply call `html.escape()`.
    #     """
    #     # print(f"S = {s}")
    #     # s = s.replace("&", "&amp;")  # Must be done first!
    #     # s = s.replace("<", "&lt;")
    #     # s = s.replace(">", "&gt;")
    #     if self.html_escape_double_quotes:
    #         s = s.replace('"', "&quot;")
    #     if self.html_escape_single_quotes:
    #         s = s.replace('\'', "&#x27;")
        
    #     if s[-1] == "\\":
    #         s+= "\\"
        
    #     # print(f"S = {s}")
        
    #     return s
    
    # @staticmethod
    # def render_line_break(token: span_token.LineBreak) -> str:
    #     # print(token)
    #     return '\n' # if token.soft else '<br />\n'

class HTMLInMD(BlockToken):
    @staticmethod
    def start(line):
        return line.startswith("!!!")

    @staticmethod
    def read(lines):
        first_line = next(lines)
        delimiter = "!!!"
        child_lines = []
        for line in lines:
            # print(line)
            if line.startswith(delimiter):
                if line[len(delimiter)] != ":":
                    # End block found:
                    break
            child_lines.append(line)
        # children = tokenize(child_lines)
        children = "".join(child_lines)
        return children

    def __init__(self, match):
        self.children = match

class TripleCommaDiv(BlockToken):
    @staticmethod
    def start(line):
        return line.startswith(":::")

    @staticmethod
    def read(lines):
        first_line = next(lines)
        # Get class of the div
        classes = first_line.lstrip(":").strip()
        delimiter = ":::"
        child_lines = []
        for line in lines:
            if line.startswith(delimiter):
                if line[len(delimiter)] != ":":
                    # End block found:
                    break
            child_lines.append(line)
        children = tokenize(child_lines)
        return classes, children

    def __init__(self, match):
        self.classes, self.children = match

In [25]:

print(mistletoe.markdown(content, CustomHtmlRenderer))

In [20]:
# with open("text1.md", "r+") as f:
#     content = f.read()

# print(content)

# print(mistletoe.markdown(content, MathJaxRenderer))


In [245]:
doc = Document(content) #mistletoe.markdown(content, ASTRenderer))
print(doc.children)

In [93]:
doc.children[1].children

[<mistletoe.span_token.RawText content='$$' at 0x72a552b6c2f0>,
 <mistletoe.span_token.LineBreak content='' soft=True at 0x72a552b6dd30>,
 <mistletoe.span_token.RawText content='\\begin{aligned}' at 0x72a552b6de50>,
 <mistletoe.span_token.LineBreak content='' soft=True at 0x72a552b6eed0>,
 <mistletoe.span_token.RawText content='f &=  ma \\' at 0x72a552b6cb00>,
 <mistletoe.span_token.LineBreak content='\\' soft=False at 0x72a552b6d820>,
 <mistletoe.span_token.RawText content='\\end{aligned}' at 0x72a552b6fe90>,
 <mistletoe.span_token.LineBreak content='' soft=True at 0x72a552b6e900>,
 <mistletoe.span_token.RawText content='$$' at 0x72a552b6e300>]

In [95]:
print(mistletoe.markdown(content, CustomHtmlRenderer))



AttributeError: 'CustomHtmlRenderer' object has no attribute 'render_html_in_md'

In [318]:
import re
# s = "\begin{aligned}"
S = r"f &=  ma \\"
re.sub('\\\\$', '\\', S)

error: bad escape (end of pattern) at position 0