From 1ea879c421ee9924b287a79d12948c394bbc269e Mon Sep 17 00:00:00 2001 From: Ryuk <88324835+anonymousx97@users.noreply.github.com> Date: Fri, 7 Feb 2025 16:16:59 +0530 Subject: [PATCH 1/4] Update markdown.py: Fix Blockquote. --- pyrogram/parser/markdown.py | 231 +++++++++++++++++++++++++----------- 1 file changed, 159 insertions(+), 72 deletions(-) diff --git a/pyrogram/parser/markdown.py b/pyrogram/parser/markdown.py index fdcf80f8f7..397a4a122c 100644 --- a/pyrogram/parser/markdown.py +++ b/pyrogram/parser/markdown.py @@ -17,12 +17,12 @@ # along with Pyrogram. If not, see . import html -import logging import re from typing import Optional import pyrogram from pyrogram.enums import MessageEntityType + from . import utils from .html import HTML @@ -34,28 +34,37 @@ CODE_DELIM = "`" PRE_DELIM = "```" BLOCKQUOTE_DELIM = ">" +BLOCKQUOTE_ESCAPE_DELIM = "|>" BLOCKQUOTE_EXPANDABLE_DELIM = "**>" - -MARKDOWN_RE = re.compile(r"({d})|(!?)\[(.+?)\]\((.+?)\)".format( - d="|".join( - ["".join(i) for i in [ - [rf"\{j}" for j in i] - for i in [ - PRE_DELIM, - CODE_DELIM, - STRIKE_DELIM, - UNDERLINE_DELIM, - ITALIC_DELIM, - BOLD_DELIM, - SPOILER_DELIM +BLOCKQUOTE_EXPANDABLE_END_DELIM = "<**" + + +MARKDOWN_RE = re.compile( + r"({d})|(!?)\[(.+?)\]\((.+?)\)".format( + d="|".join( + [ + "".join(i) + for i in [ + [rf"\{j}" for j in i] + for i in [ + PRE_DELIM, + CODE_DELIM, + STRIKE_DELIM, + UNDERLINE_DELIM, + ITALIC_DELIM, + BOLD_DELIM, + SPOILER_DELIM, + ] + ] ] - ]] - ))) + ) + ) +) OPENING_TAG = "<{}>" CLOSING_TAG = "" URL_MARKUP = '{}' -EMOJI_MARKUP = '{}' +EMOJI_MARKUP = "{}" FIXED_WIDTH_DELIMS = [CODE_DELIM, PRE_DELIM] @@ -63,50 +72,104 @@ class Markdown: def __init__(self, client: Optional["pyrogram.Client"]): self.html = HTML(client) - def _parse_blockquotes(self, text: str): - text = html.unescape(text) - lines = text.split('\n') - result = [] - in_blockquote = False - is_expandable_blockquote = False - current_blockquote = [] + @staticmethod + def escape_and_create_quotes(text: str, strict: bool): + text_lines: list[str | None] = text.splitlines() - for line in lines: - if line.startswith(BLOCKQUOTE_DELIM): - in_blockquote = True - current_blockquote.append(line[1:].strip()) - elif line.startswith(BLOCKQUOTE_EXPANDABLE_DELIM): - in_blockquote = True - is_expandable_blockquote = True - current_blockquote.append(line[3:].strip()) - else: - if in_blockquote: - in_blockquote = False - result.append( - (f"
" if is_expandable_blockquote else OPENING_TAG.format("blockquote")) + - '\n'.join(current_blockquote) + - CLOSING_TAG.format("blockquote") - ) - current_blockquote = [] - result.append(line) - - if in_blockquote: - result.append( - (f"
" if is_expandable_blockquote else OPENING_TAG.format("blockquote")) + - '\n'.join(current_blockquote) + - CLOSING_TAG.format("blockquote") + # Indexes of Already escaped lines + html_escaped_list: list[int] = [] + + # Temporary Queue to hold lines to be quoted + to_quote_list: list[tuple[int, str]] = [] + + def create_blockquote(quote_type: str = "") -> None: + """ + Merges all lines in quote_queue into first line of queue + Encloses that line in html quote + Replaces rest of the lines with None placeholders to preserve indexes + """ + if len(to_quote_list) == 0: + return + + joined_lines = "\n".join([i[1] for i in to_quote_list]) + + first_line_index, _ = to_quote_list[0] + text_lines[first_line_index] = ( + f"{joined_lines}
" ) - return '\n'.join(result) - async def parse(self, text: str, strict: bool = False): - if strict: - text = html.escape(text) - + for line_to_remove in to_quote_list[1:]: + text_lines[line_to_remove[0]] = None + + to_quote_list.clear() + + # Handle Expandable Quote + inside_blockquote = False + for index, line in enumerate(text_lines): + if line.startswith(BLOCKQUOTE_EXPANDABLE_DELIM): + delim_stripped_line = line[3:] + parsed_line = ( + html.escape(delim_stripped_line) if strict else delim_stripped_line + ) + + to_quote_list.append((index, parsed_line)) + html_escaped_list.append(index) + + inside_blockquote = True + continue + + elif line.endswith(BLOCKQUOTE_EXPANDABLE_END_DELIM): + delim_stripped_line = line[:-3] + parsed_line = ( + html.escape(delim_stripped_line) if strict else delim_stripped_line + ) - text = self._parse_blockquotes(text) + to_quote_list.append((index, parsed_line)) + html_escaped_list.append(index) - text = self._parse_blockquotes(text) + inside_blockquote = False + create_blockquote(quote_type=" expandable") + + if inside_blockquote: + parsed_line = html.escape(line) if strict else line + to_quote_list.append((index, parsed_line)) + html_escaped_list.append(index) + + # Handle Single line/Continued Quote + for index, line in enumerate(text_lines): + if line is None: + continue + + if line.startswith(BLOCKQUOTE_ESCAPE_DELIM): + text_lines[index] = line[1:] + continue + + if line.startswith(BLOCKQUOTE_DELIM): + delim_stripped_line = line[1:] + parsed_line = ( + html.escape(delim_stripped_line) if strict else delim_stripped_line + ) + + to_quote_list.append((index, parsed_line)) + html_escaped_list.append(index) + + elif len(to_quote_list) > 0: + create_blockquote() + else: + create_blockquote() + + if strict: + for idx, line in enumerate(text_lines): + if idx not in html_escaped_list: + text_lines[idx] = html.escape(line) + + return "\n".join( + [valid_line for valid_line in text_lines if valid_line is not None] + ) + + async def parse(self, text: str, strict: bool = False): + text = self.escape_and_create_quotes(text, strict=strict) delims = set() is_fixed_width = False @@ -122,13 +185,17 @@ async def parse(self, text: str, strict: bool = False): continue if not is_emoji and text_url: - text = utils.replace_once(text, full, URL_MARKUP.format(url, text_url), start) + text = utils.replace_once( + text, full, URL_MARKUP.format(url, text_url), start + ) continue if is_emoji: emoji = text_url emoji_id = url.lstrip("tg://emoji?id=") - text = utils.replace_once(text, full, EMOJI_MARKUP.format(emoji_id, emoji), start) + text = utils.replace_once( + text, full, EMOJI_MARKUP.format(emoji_id, emoji), start + ) continue if delim == BOLD_DELIM: @@ -156,9 +223,11 @@ async def parse(self, text: str, strict: bool = False): tag = CLOSING_TAG.format(tag) if delim == PRE_DELIM and delim in delims: - delim_and_language = text[text.find(PRE_DELIM):].split("\n")[0] - language = delim_and_language[len(PRE_DELIM):] - text = utils.replace_once(text, delim_and_language, f'
', start)
+                delim_and_language = text[text.find(PRE_DELIM) :].split("\n")[0]
+                language = delim_and_language[len(PRE_DELIM) :]
+                text = utils.replace_once(
+                    text, delim_and_language, f'
', start
+                )
                 continue
 
             text = utils.replace_once(text, delim, tag, start)
@@ -199,12 +268,22 @@ def unparse(text: str, entities: list):
                 for line in lines:
                     if len(line) == 0 and last_length == end:
                         continue
-                    start_offset = start+last_length
-                    last_length = last_length+len(line)
-                    end_offset = start_offset+last_length
-                    entities_offsets.append((start_tag, start_offset,))
-                    entities_offsets.append((end_tag, end_offset,))
-                    last_length = last_length+1
+                    start_offset = start + last_length
+                    last_length = last_length + len(line)
+                    end_offset = start_offset + last_length
+                    entities_offsets.append(
+                        (
+                            start_tag,
+                            start_offset,
+                        )
+                    )
+                    entities_offsets.append(
+                        (
+                            end_tag,
+                            end_offset,
+                        )
+                    )
+                    last_length = last_length + 1
                 continue
             elif entity_type == MessageEntityType.SPOILER:
                 start_tag = end_tag = SPOILER_DELIM
@@ -223,16 +302,24 @@ def unparse(text: str, entities: list):
             else:
                 continue
 
-            entities_offsets.append((start_tag, start,))
-            entities_offsets.append((end_tag, end,))
+            entities_offsets.append(
+                (
+                    start_tag,
+                    start,
+                )
+            )
+            entities_offsets.append(
+                (
+                    end_tag,
+                    end,
+                )
+            )
 
         entities_offsets = map(
             lambda x: x[1],
             sorted(
-                enumerate(entities_offsets),
-                key=lambda x: (x[1][1], x[0]),
-                reverse=True
-            )
+                enumerate(entities_offsets), key=lambda x: (x[1][1], x[0]), reverse=True
+            ),
         )
 
         for entity, offset in entities_offsets:

From 10de284adc0858fab4013a9ee1b8c401f9926f6a Mon Sep 17 00:00:00 2001
From: Ryuk <88324835+anonymousx97@users.noreply.github.com>
Date: Fri, 7 Feb 2025 21:25:45 +0530
Subject: [PATCH 2/4] Update text-formatting.rst: add documentation for MD
 blockquote

---
 docs/source/topics/text-formatting.rst | 37 +++++++++++++++++++++++++-
 1 file changed, 36 insertions(+), 1 deletion(-)

diff --git a/docs/source/topics/text-formatting.rst b/docs/source/topics/text-formatting.rst
index 0f072c24a7..e7b40be684 100644
--- a/docs/source/topics/text-formatting.rst
+++ b/docs/source/topics/text-formatting.rst
@@ -59,7 +59,23 @@ To strictly use this mode, pass :obj:`~pyrogram.enums.ParseMode.MARKDOWN` to the
 
     ~~strike~~
 
-    > blockquote
+    >blockquote
+
+    |>escaped blockquote 
+
+    >Fist line of multi line blockquote 
+    >Block quotation continued
+    >Block quotation continued
+    >Block quotation continued
+    >The last line of the block quotation
+
+    **>
+    The expandable block quotation started right after the previous block quotation
+    It is separated from the previous block quotation by expandable syntax 
+    Expandable block quotation continued
+    Hidden by default part of the expandable block quotation started
+    Expandable block quotation continued
+    The last line of the expandable block quotation with the expandability mark<**
 
     `inline fixed-width code`
 
@@ -96,6 +112,25 @@ To strictly use this mode, pass :obj:`~pyrogram.enums.ParseMode.MARKDOWN` to the
             "for i in range(10):\n"
             "    print(i)"
             "```"
+
+            ">blockquote\n"
+
+            "|>escaped blockquote\n"
+
+            ">Fist line of multi line blockquote\n"
+            ">Block quotation continued\n"
+            ">Block quotation continued\n"
+            ">Block quotation continued\n"
+            ">The last line of the block quotation"
+
+            "**>\n"
+            "The expandable block quotation started right after the previous block quotation\n"
+            "It is separated from the previous block quotation by expandable syntax\n"
+            "Expandable block quotation continued\n"
+            "Hidden by default part of the expandable block quotation started\n"
+            "Expandable block quotation continued\n"
+            "The last line of the expandable block quotation with the expandability mark<**"
+
         ),
         parse_mode=enums.ParseMode.MARKDOWN
     )

From b9e1a89f7e6b390c4848c97f976e19c145d8c46a Mon Sep 17 00:00:00 2001
From: Ryuk <88324835+anonymousx97@users.noreply.github.com>
Date: Fri, 7 Feb 2025 22:11:12 +0530
Subject: [PATCH 3/4] Update markdown.py: fix an edgecase regarding quote
 escape character.

---
 pyrogram/parser/markdown.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyrogram/parser/markdown.py b/pyrogram/parser/markdown.py
index 397a4a122c..3c35ba3e2f 100644
--- a/pyrogram/parser/markdown.py
+++ b/pyrogram/parser/markdown.py
@@ -143,6 +143,7 @@ def create_blockquote(quote_type: str = "") -> None:
 
             if line.startswith(BLOCKQUOTE_ESCAPE_DELIM):
                 text_lines[index] = line[1:]
+                create_blockquote()
                 continue
 
             if line.startswith(BLOCKQUOTE_DELIM):

From b75cc915e4abd13c7cfc6e1d6fbc0f94b890b354 Mon Sep 17 00:00:00 2001
From: shriMADhav U k 
Date: Fri, 7 Feb 2025 17:51:04 +0100
Subject: [PATCH 4/4] minor update

---
 docs/source/topics/text-formatting.rst | 50 +++++++++++++++++---------
 1 file changed, 33 insertions(+), 17 deletions(-)

diff --git a/docs/source/topics/text-formatting.rst b/docs/source/topics/text-formatting.rst
index e7b40be684..3ab0a2d95d 100644
--- a/docs/source/topics/text-formatting.rst
+++ b/docs/source/topics/text-formatting.rst
@@ -96,7 +96,7 @@ To strictly use this mode, pass :obj:`~pyrogram.enums.ParseMode.MARKDOWN` to the
 
 .. code-block:: python
 
-    from pyrogram import enums
+    from pyrogram.enums import ParseMode
 
     await app.send_message(
         chat_id="me",
@@ -132,7 +132,7 @@ To strictly use this mode, pass :obj:`~pyrogram.enums.ParseMode.MARKDOWN` to the
             "The last line of the expandable block quotation with the expandability mark<**"
 
         ),
-        parse_mode=enums.ParseMode.MARKDOWN
+        parse_mode=ParseMode.MARKDOWN
     )
 
 HTML Style
@@ -171,24 +171,40 @@ To strictly use this mode, pass :obj:`~pyrogram.enums.HTML` to the *parse_mode*
 
 .. code-block:: python
 
-    from pyrogram import enums
+    from pyrogram.enums import ParseMode
 
     await app.send_message(
         chat_id="me",
         text=(
-            "bold, "
-            "italic, "
-            "underline, "
-            "strike, "
-            "spoiler, "
-            "URL, "
-            "code\n\n"
+            "bold, bold"
+            "italic, italic"
+            "underline, underline"
+            "strike, strike, strike"
+            "spoiler\n\n"
+
+            "bold italic bold italic bold strike italic bold strike spoiler underline italic bold bold\n\n"
+
+            "inline URL "
+            "inline mention of a user\n"
+            "👍 "
+            "inline fixed-width code "
+            "
pre-formatted fixed-width code block
\n\n" "
"
             "for i in range(10):\n"
             "    print(i)"
-            "
" + "
\n\n" + + "
Block quotation started" + "Block quotation continued" + "The last line of the block quotation
" + "
Expandable block quotation started" + "Expandable block quotation continued" + "Expandable block quotation continued" + "Hidden by default part of the block quotation started" + "Expandable block quotation continued" + "The last line of the block quotation
" ), - parse_mode=enums.ParseMode.HTML + parse_mode=ParseMode.HTML ) .. note:: @@ -229,10 +245,10 @@ If you don't like this behaviour you can always choose to only enable either Mar .. code-block:: python - from pyrogram import enums + from pyrogram.enums import ParseMode - await app.send_message(chat_id="me", text="**bold**, italic", parse_mode=enums.ParseMode.MARKDOWN) - await app.send_message(chat_id="me", text="**bold**, italic", parse_mode=enums.ParseMode.HTML) + await app.send_message(chat_id="me", text="**bold**, italic", parse_mode=ParseMode.MARKDOWN) + await app.send_message(chat_id="me", text="**bold**, italic", parse_mode=ParseMode.HTML) Result: @@ -245,9 +261,9 @@ The text will be sent as-is. .. code-block:: python - from pyrogram import enums + from pyrogram.enums import ParseMode - await app.send_message(chat_id="me", text="**bold**, italic", parse_mode=enums.ParseMode.DISABLED) + await app.send_message(chat_id="me", text="**bold**, italic", parse_mode=ParseMode.DISABLED) Result: