Fix link recognition for check command

The check command now also accepts the weird Slack links. Additionally, this improved how Slack links are recognized and extracted.
GrafeasGroup · Feb 28, 2022 · 0fe405a · 0fe405a
1 parent 5d8c139
commit 0fe405a
Show file tree

Hide file tree

Showing 4 changed files with 99 additions and 18 deletions.
diff --git a/api/slack/commands.py b/api/slack/commands.py
@@ -7,7 +7,12 @@
 from api.serializers import VolunteerSerializer
 from api.slack import client
 from api.slack.transcription_check.messages import send_check_message
-from api.slack.utils import clean_links, dict_to_table, get_message
+from api.slack.utils import (
+    dict_to_table,
+    extract_text_from_link,
+    extract_url_from_link,
+    get_message,
+)
 from api.views.find import find_by_url, normalize_url
 from api.views.misc import Summary
 from authentication.models import BlossomUser
@@ -115,7 +120,7 @@ def reset_cmd(channel: str, message: str) -> None:
         # they didn't give a username
         msg = i18n["slack"]["errors"]["missing_username"]
     elif len(parsed_message) == 2:
-        username = clean_links(parsed_message[1])
+        username = extract_text_from_link(parsed_message[1])
         if user := BlossomUser.objects.filter(username__iexact=username).first():
             if user.accepted_coc:
                 user.accepted_coc = False
@@ -142,7 +147,7 @@ def watch_cmd(channel: str, message: str) -> None:
         # they didn't give a username
         msg = i18n["slack"]["errors"]["missing_username"]
     elif len(parsed_message) <= 3:
-        username = clean_links(parsed_message[1])
+        username = extract_text_from_link(parsed_message[1])
         if user := BlossomUser.objects.filter(username__iexact=username).first():
             if len(parsed_message) == 2:
                 # they didn't give a percentage, default to 100%
@@ -198,7 +203,7 @@ def unwatch_cmd(channel: str, message: str) -> None:
         # they didn't give a username
         msg = i18n["slack"]["errors"]["missing_username"]
     elif len(parsed_message) == 2:
-        username = clean_links(parsed_message[1])
+        username = extract_text_from_link(parsed_message[1])
         if user := BlossomUser.objects.filter(username__iexact=username).first():
             # Set the check percentage back to automatic
             user.overwrite_check_percentage = None
@@ -222,7 +227,7 @@ def watchstatus_cmd(channel: str, message: str) -> None:
         # they didn't give a username
         msg = i18n["slack"]["errors"]["missing_username"]
     elif len(parsed_message) == 2:
-        username = clean_links(parsed_message[1])
+        username = extract_text_from_link(parsed_message[1])
         if user := BlossomUser.objects.filter(username__iexact=username).first():
             status = user.transcription_check_reason(ignore_low_activity=True)
             msg = i18n["slack"]["watchstatus"]["success"].format(
@@ -327,7 +332,7 @@ def blacklist_cmd(channel: str, message: str) -> None:
         # they didn't give a username
         msg = i18n["slack"]["errors"]["missing_username"]
     elif len(parsed_message) == 2:
-        username = clean_links(parsed_message[1])
+        username = extract_text_from_link(parsed_message[1])
         if user := BlossomUser.objects.filter(username__iexact=username).first():
             if user.blacklisted:
                 user.blacklisted = False
@@ -360,7 +365,7 @@ def check_cmd(channel: str, message: str) -> None:
         return
 
     url = parsed_message[1]
-    normalized_url = normalize_url(url)
+    normalized_url = normalize_url(extract_url_from_link(url))
 
     # Check if the URL is valid
     if normalized_url is None:
@@ -426,10 +431,10 @@ def check_cmd(channel: str, message: str) -> None:
                     tor_url=find_response["submission"].tor_url,
                 ),
             )
-            pass
+            return
     else:
         # URL not found
         client.chat_postMessage(
-            channel=channel, text=i18n["slack"]["check"]["not_found"].format(url=url,),
+            channel=channel, text=i18n["slack"]["check"]["not_found"].format(url=url),
         )
-        pass
+        return
diff --git a/api/slack/utils.py b/api/slack/utils.py
@@ -1,5 +1,6 @@
 import logging
 import re
+from re import Match
 from typing import Dict, List, Optional
 
 from django.conf import settings
@@ -17,18 +18,37 @@
 # find a link in the slack format, then strip out the text at the end.
 # they're formatted like this: <https://example.com|Text!>
 SLACK_TEXT_EXTRACTOR = re.compile(
-    r"<(?:https?://)?[\w-]+(?:\.[\w-]+)+\.?(?::\d+)?(?:/\S*)?\|([^>]+)>"
+    # Allow long lines for the regex
+    # flake8: noqa: E501
+    r"<(?P<url>(?:https?://)?[\w-]+(?:\.[\w-]+)+\.?(?::\d+)?(?:/[^\s|]*)?)(?:\|(?P<text>[^>]+))?>"
 )
 
 
-def clean_links(text: str) -> str:
+def extract_text_from_link(text: str) -> str:
     """Strip link out of auto-generated slack fancy URLS and return the text only."""
     results = [_ for _ in re.finditer(SLACK_TEXT_EXTRACTOR, text)]
     # we'll replace things going backwards so that we don't mess up indexing
     results.reverse()
 
+    def extract_text(mx: Match) -> str:
+        return mx["text"] or mx["url"]
+
+    for match in results:
+        text = text[: match.start()] + extract_text(match) + text[match.end() :]
+    return text
+
+
+def extract_url_from_link(text: str) -> str:
+    """Strip link out of auto-generated slack fancy URLS and return the link only."""
+    results = [_ for _ in re.finditer(SLACK_TEXT_EXTRACTOR, text)]
+    # we'll replace things going backwards so that we don't mess up indexing
+    results.reverse()
+
+    def extract_link(m: Match) -> str:
+        return m["url"]
+
     for match in results:
-        text = text[: match.start()] + match.groups()[0] + text[match.end() :]
+        text = text[: match.start()] + extract_link(match) + text[match.end() :]
     return text
 
 

diff --git a/api/tests/slack/test_commands.py b/api/tests/slack/test_commands.py
@@ -394,8 +394,14 @@ def test_dadjoke_target(message: str) -> None:
     "url",
     [
         "https://reddit.com/r/TranscribersOfReddit/comments/t31715/curatedtumblr_image_linguistics_fax/",
-        "https://reddit.com/r/CuratedTumblr/comments/t315gq/linguistics_fax/"
+        "https://reddit.com/r/CuratedTumblr/comments/t315gq/linguistics_fax/",
         "https://www.reddit.com/r/CuratedTumblr/comments/t315gq/linguistics_fax/hypuw2r/",
+        "<https://reddit.com/r/TranscribersOfReddit/comments/t31715/curatedtumblr_image_linguistics_fax/>",
+        "<https://reddit.com/r/CuratedTumblr/comments/t315gq/linguistics_fax/>",
+        "<https://www.reddit.com/r/CuratedTumblr/comments/t315gq/linguistics_fax/hypuw2r/>",
+        "<https://reddit.com/r/TranscribersOfReddit/comments/t31715/curatedtumblr_image_linguistics_fax/|Tor_Post>",
+        "<https://reddit.com/r/CuratedTumblr/comments/t315gq/linguistics_fax/|Partner_Post>",
+        "<https://www.reddit.com/r/CuratedTumblr/comments/t315gq/linguistics_fax/hypuw2r/|Transcription>",
     ],
 )
 def test_check_cmd(client: Client, url: str) -> None:
@@ -410,7 +416,7 @@ def test_check_cmd(client: Client, url: str) -> None:
     transcription = create_transcription(
         submission=submission,
         user=user,
-        url="https://www.reddit.com/r/CuratedTumblr/comments/t315gq/linguistics_fax/hypuw2r/",
+        url="https://reddit.com/r/CuratedTumblr/comments/t315gq/linguistics_fax/hypuw2r/",
     )
 
     with patch("api.slack.commands.client.chat_postMessage") as message_mock, patch(
@@ -423,15 +429,14 @@ def test_check_cmd(client: Client, url: str) -> None:
         assert link_mock.call_count == 1
 
         checks = TranscriptionCheck.objects.filter(transcription=transcription)
-
         assert len(checks) == 1
 
 
 @pytest.mark.parametrize(
     "url",
     [
         "https://reddit.com/r/TranscribersOfReddit/comments/t31715/curatedtumblr_image_linguistics_fax/",
-        "https://reddit.com/r/CuratedTumblr/comments/t315gq/linguistics_fax/"
+        "https://reddit.com/r/CuratedTumblr/comments/t315gq/linguistics_fax/",
         "https://www.reddit.com/r/CuratedTumblr/comments/t315gq/linguistics_fax/hypuw2r/",
     ],
 )

diff --git a/api/tests/slack/test_utils.py b/api/tests/slack/test_utils.py
@@ -1,11 +1,18 @@
+# Disable line length restrictions to allow long URLs
+# flake8: noqa: E501
 from typing import Dict
 from unittest.mock import MagicMock, PropertyMock, patch
 
 import pytest
 from django.test import Client
 
 from api.slack import client as slack_client
-from api.slack.utils import dict_to_table, send_transcription_check
+from api.slack.utils import (
+    dict_to_table,
+    extract_text_from_link,
+    extract_url_from_link,
+    send_transcription_check,
+)
 from blossom.strings import translation
 from utils.test_helpers import (
     create_submission,
@@ -117,3 +124,47 @@ def test_send_transcription_to_slack(
 
         actual_message = mock.call_args[1]["text"]
         assert actual_message == message
+
+
+@pytest.mark.parametrize(
+    "text,expected",
+    [
+        ("Normal text", "Normal text"),
+        (
+            "<https://www.reddit.com/user/transcribersofreddit|u/transcribersofreddit>",
+            "u/transcribersofreddit",
+        ),
+        (
+            "<https://www.reddit.com/user/transcribersofreddit>",
+            "https://www.reddit.com/user/transcribersofreddit",
+        ),
+    ],
+)
+def test_extract_text_from_link(text: str, expected: str) -> None:
+    """Test that the text is extracted from a Slack link."""
+    actual = extract_text_from_link(text)
+    assert actual == expected
+
+
+@pytest.mark.parametrize(
+    "text,expected",
+    [
+        ("Normal text", "Normal text"),
+        (
+            "<https://www.reddit.com/user/transcribersofreddit|u/transcribersofreddit>",
+            "https://www.reddit.com/user/transcribersofreddit",
+        ),
+        (
+            "<https://www.reddit.com/user/transcribersofreddit>",
+            "https://www.reddit.com/user/transcribersofreddit",
+        ),
+        (
+            "<https://reddit.com/r/TranscribersOfReddit/comments/t31715/curatedtumblr_image_linguistics_fax/|Tor Post>",
+            "https://reddit.com/r/TranscribersOfReddit/comments/t31715/curatedtumblr_image_linguistics_fax/",
+        ),
+    ],
+)
+def test_extract_url_from_link(text: str, expected: str) -> None:
+    """Test that the URL is extracted from a Slack link."""
+    actual = extract_url_from_link(text)
+    assert actual == expected