diff --git a/docs/source/topics/text-formatting.rst b/docs/source/topics/text-formatting.rst index 0f072c24a7..3ab0a2d95d 100644 --- a/docs/source/topics/text-formatting.rst +++ b/docs/source/topics/text-formatting.rst @@ -59,7 +59,23 @@ To strictly use this mode, pass :obj:`~pyrogram.enums.ParseMode.MARKDOWN` to the ~~strike~~ - > blockquote + >blockquote + + |>escaped blockquote + + >Fist line of multi line blockquote + >Block quotation continued + >Block quotation continued + >Block quotation continued + >The last line of the block quotation + + **> + The expandable block quotation started right after the previous block quotation + It is separated from the previous block quotation by expandable syntax + Expandable block quotation continued + Hidden by default part of the expandable block quotation started + Expandable block quotation continued + The last line of the expandable block quotation with the expandability mark<** `inline fixed-width code` @@ -80,7 +96,7 @@ To strictly use this mode, pass :obj:`~pyrogram.enums.ParseMode.MARKDOWN` to the .. code-block:: python - from pyrogram import enums + from pyrogram.enums import ParseMode await app.send_message( chat_id="me", @@ -96,8 +112,27 @@ To strictly use this mode, pass :obj:`~pyrogram.enums.ParseMode.MARKDOWN` to the "for i in range(10):\n" " print(i)" "```" + + ">blockquote\n" + + "|>escaped blockquote\n" + + ">Fist line of multi line blockquote\n" + ">Block quotation continued\n" + ">Block quotation continued\n" + ">Block quotation continued\n" + ">The last line of the block quotation" + + "**>\n" + "The expandable block quotation started right after the previous block quotation\n" + "It is separated from the previous block quotation by expandable syntax\n" + "Expandable block quotation continued\n" + "Hidden by default part of the expandable block quotation started\n" + "Expandable block quotation continued\n" + "The last line of the expandable block quotation with the expandability mark<**" + ), - parse_mode=enums.ParseMode.MARKDOWN + parse_mode=ParseMode.MARKDOWN ) HTML Style @@ -136,24 +171,40 @@ To strictly use this mode, pass :obj:`~pyrogram.enums.HTML` to the *parse_mode* .. code-block:: python - from pyrogram import enums + from pyrogram.enums import ParseMode await app.send_message( chat_id="me", text=( - "bold, " - "italic, " - "underline, " - "strike, " - "spoiler, " - "URL, " - "code\n\n" + "bold, bold" + "italic, italic" + "underline, underline" + "strike, strike, strike" + "spoiler\n\n" + + "bold italic bold italic bold strike italic bold strike spoiler underline italic bold bold\n\n" + + "inline URL " + "inline mention of a user\n" + "👍 " + "inline fixed-width code " + "
pre-formatted fixed-width code block
\n\n" "
"
             "for i in range(10):\n"
             "    print(i)"
-            "
" + "\n\n" + + "
Block quotation started" + "Block quotation continued" + "The last line of the block quotation
" + "
Expandable block quotation started" + "Expandable block quotation continued" + "Expandable block quotation continued" + "Hidden by default part of the block quotation started" + "Expandable block quotation continued" + "The last line of the block quotation
" ), - parse_mode=enums.ParseMode.HTML + parse_mode=ParseMode.HTML ) .. note:: @@ -194,10 +245,10 @@ If you don't like this behaviour you can always choose to only enable either Mar .. code-block:: python - from pyrogram import enums + from pyrogram.enums import ParseMode - await app.send_message(chat_id="me", text="**bold**, italic", parse_mode=enums.ParseMode.MARKDOWN) - await app.send_message(chat_id="me", text="**bold**, italic", parse_mode=enums.ParseMode.HTML) + await app.send_message(chat_id="me", text="**bold**, italic", parse_mode=ParseMode.MARKDOWN) + await app.send_message(chat_id="me", text="**bold**, italic", parse_mode=ParseMode.HTML) Result: @@ -210,9 +261,9 @@ The text will be sent as-is. .. code-block:: python - from pyrogram import enums + from pyrogram.enums import ParseMode - await app.send_message(chat_id="me", text="**bold**, italic", parse_mode=enums.ParseMode.DISABLED) + await app.send_message(chat_id="me", text="**bold**, italic", parse_mode=ParseMode.DISABLED) Result: diff --git a/pyrogram/parser/markdown.py b/pyrogram/parser/markdown.py index fdcf80f8f7..3c35ba3e2f 100644 --- a/pyrogram/parser/markdown.py +++ b/pyrogram/parser/markdown.py @@ -17,12 +17,12 @@ # along with Pyrogram. If not, see . import html -import logging import re from typing import Optional import pyrogram from pyrogram.enums import MessageEntityType + from . import utils from .html import HTML @@ -34,28 +34,37 @@ CODE_DELIM = "`" PRE_DELIM = "```" BLOCKQUOTE_DELIM = ">" +BLOCKQUOTE_ESCAPE_DELIM = "|>" BLOCKQUOTE_EXPANDABLE_DELIM = "**>" - -MARKDOWN_RE = re.compile(r"({d})|(!?)\[(.+?)\]\((.+?)\)".format( - d="|".join( - ["".join(i) for i in [ - [rf"\{j}" for j in i] - for i in [ - PRE_DELIM, - CODE_DELIM, - STRIKE_DELIM, - UNDERLINE_DELIM, - ITALIC_DELIM, - BOLD_DELIM, - SPOILER_DELIM +BLOCKQUOTE_EXPANDABLE_END_DELIM = "<**" + + +MARKDOWN_RE = re.compile( + r"({d})|(!?)\[(.+?)\]\((.+?)\)".format( + d="|".join( + [ + "".join(i) + for i in [ + [rf"\{j}" for j in i] + for i in [ + PRE_DELIM, + CODE_DELIM, + STRIKE_DELIM, + UNDERLINE_DELIM, + ITALIC_DELIM, + BOLD_DELIM, + SPOILER_DELIM, + ] + ] ] - ]] - ))) + ) + ) +) OPENING_TAG = "<{}>" CLOSING_TAG = "" URL_MARKUP = '{}' -EMOJI_MARKUP = '{}' +EMOJI_MARKUP = "{}" FIXED_WIDTH_DELIMS = [CODE_DELIM, PRE_DELIM] @@ -63,50 +72,105 @@ class Markdown: def __init__(self, client: Optional["pyrogram.Client"]): self.html = HTML(client) - def _parse_blockquotes(self, text: str): - text = html.unescape(text) - lines = text.split('\n') - result = [] - in_blockquote = False - is_expandable_blockquote = False - current_blockquote = [] + @staticmethod + def escape_and_create_quotes(text: str, strict: bool): + text_lines: list[str | None] = text.splitlines() - for line in lines: - if line.startswith(BLOCKQUOTE_DELIM): - in_blockquote = True - current_blockquote.append(line[1:].strip()) - elif line.startswith(BLOCKQUOTE_EXPANDABLE_DELIM): - in_blockquote = True - is_expandable_blockquote = True - current_blockquote.append(line[3:].strip()) - else: - if in_blockquote: - in_blockquote = False - result.append( - (f"
" if is_expandable_blockquote else OPENING_TAG.format("blockquote")) + - '\n'.join(current_blockquote) + - CLOSING_TAG.format("blockquote") - ) - current_blockquote = [] - result.append(line) - - if in_blockquote: - result.append( - (f"
" if is_expandable_blockquote else OPENING_TAG.format("blockquote")) + - '\n'.join(current_blockquote) + - CLOSING_TAG.format("blockquote") + # Indexes of Already escaped lines + html_escaped_list: list[int] = [] + + # Temporary Queue to hold lines to be quoted + to_quote_list: list[tuple[int, str]] = [] + + def create_blockquote(quote_type: str = "") -> None: + """ + Merges all lines in quote_queue into first line of queue + Encloses that line in html quote + Replaces rest of the lines with None placeholders to preserve indexes + """ + if len(to_quote_list) == 0: + return + + joined_lines = "\n".join([i[1] for i in to_quote_list]) + + first_line_index, _ = to_quote_list[0] + text_lines[first_line_index] = ( + f"{joined_lines}
" ) - return '\n'.join(result) - async def parse(self, text: str, strict: bool = False): - if strict: - text = html.escape(text) - + for line_to_remove in to_quote_list[1:]: + text_lines[line_to_remove[0]] = None + + to_quote_list.clear() + + # Handle Expandable Quote + inside_blockquote = False + for index, line in enumerate(text_lines): + if line.startswith(BLOCKQUOTE_EXPANDABLE_DELIM): + delim_stripped_line = line[3:] + parsed_line = ( + html.escape(delim_stripped_line) if strict else delim_stripped_line + ) + + to_quote_list.append((index, parsed_line)) + html_escaped_list.append(index) + + inside_blockquote = True + continue + + elif line.endswith(BLOCKQUOTE_EXPANDABLE_END_DELIM): + delim_stripped_line = line[:-3] + parsed_line = ( + html.escape(delim_stripped_line) if strict else delim_stripped_line + ) - text = self._parse_blockquotes(text) + to_quote_list.append((index, parsed_line)) + html_escaped_list.append(index) - text = self._parse_blockquotes(text) + inside_blockquote = False + create_blockquote(quote_type=" expandable") + + if inside_blockquote: + parsed_line = html.escape(line) if strict else line + to_quote_list.append((index, parsed_line)) + html_escaped_list.append(index) + + # Handle Single line/Continued Quote + for index, line in enumerate(text_lines): + if line is None: + continue + + if line.startswith(BLOCKQUOTE_ESCAPE_DELIM): + text_lines[index] = line[1:] + create_blockquote() + continue + + if line.startswith(BLOCKQUOTE_DELIM): + delim_stripped_line = line[1:] + parsed_line = ( + html.escape(delim_stripped_line) if strict else delim_stripped_line + ) + + to_quote_list.append((index, parsed_line)) + html_escaped_list.append(index) + + elif len(to_quote_list) > 0: + create_blockquote() + else: + create_blockquote() + + if strict: + for idx, line in enumerate(text_lines): + if idx not in html_escaped_list: + text_lines[idx] = html.escape(line) + + return "\n".join( + [valid_line for valid_line in text_lines if valid_line is not None] + ) + + async def parse(self, text: str, strict: bool = False): + text = self.escape_and_create_quotes(text, strict=strict) delims = set() is_fixed_width = False @@ -122,13 +186,17 @@ async def parse(self, text: str, strict: bool = False): continue if not is_emoji and text_url: - text = utils.replace_once(text, full, URL_MARKUP.format(url, text_url), start) + text = utils.replace_once( + text, full, URL_MARKUP.format(url, text_url), start + ) continue if is_emoji: emoji = text_url emoji_id = url.lstrip("tg://emoji?id=") - text = utils.replace_once(text, full, EMOJI_MARKUP.format(emoji_id, emoji), start) + text = utils.replace_once( + text, full, EMOJI_MARKUP.format(emoji_id, emoji), start + ) continue if delim == BOLD_DELIM: @@ -156,9 +224,11 @@ async def parse(self, text: str, strict: bool = False): tag = CLOSING_TAG.format(tag) if delim == PRE_DELIM and delim in delims: - delim_and_language = text[text.find(PRE_DELIM):].split("\n")[0] - language = delim_and_language[len(PRE_DELIM):] - text = utils.replace_once(text, delim_and_language, f'
', start)
+                delim_and_language = text[text.find(PRE_DELIM) :].split("\n")[0]
+                language = delim_and_language[len(PRE_DELIM) :]
+                text = utils.replace_once(
+                    text, delim_and_language, f'
', start
+                )
                 continue
 
             text = utils.replace_once(text, delim, tag, start)
@@ -199,12 +269,22 @@ def unparse(text: str, entities: list):
                 for line in lines:
                     if len(line) == 0 and last_length == end:
                         continue
-                    start_offset = start+last_length
-                    last_length = last_length+len(line)
-                    end_offset = start_offset+last_length
-                    entities_offsets.append((start_tag, start_offset,))
-                    entities_offsets.append((end_tag, end_offset,))
-                    last_length = last_length+1
+                    start_offset = start + last_length
+                    last_length = last_length + len(line)
+                    end_offset = start_offset + last_length
+                    entities_offsets.append(
+                        (
+                            start_tag,
+                            start_offset,
+                        )
+                    )
+                    entities_offsets.append(
+                        (
+                            end_tag,
+                            end_offset,
+                        )
+                    )
+                    last_length = last_length + 1
                 continue
             elif entity_type == MessageEntityType.SPOILER:
                 start_tag = end_tag = SPOILER_DELIM
@@ -223,16 +303,24 @@ def unparse(text: str, entities: list):
             else:
                 continue
 
-            entities_offsets.append((start_tag, start,))
-            entities_offsets.append((end_tag, end,))
+            entities_offsets.append(
+                (
+                    start_tag,
+                    start,
+                )
+            )
+            entities_offsets.append(
+                (
+                    end_tag,
+                    end,
+                )
+            )
 
         entities_offsets = map(
             lambda x: x[1],
             sorted(
-                enumerate(entities_offsets),
-                key=lambda x: (x[1][1], x[0]),
-                reverse=True
-            )
+                enumerate(entities_offsets), key=lambda x: (x[1][1], x[0]), reverse=True
+            ),
         )
 
         for entity, offset in entities_offsets: