Skip to content

Commit

Permalink
Fix link recognition for check command
Browse files Browse the repository at this point in the history
The check command now also accepts the weird Slack links.
Additionally, this improved how Slack links are recognized and extracted.
  • Loading branch information
TimJentzsch committed Feb 28, 2022
1 parent 5d8c139 commit 0fe405a
Show file tree
Hide file tree
Showing 4 changed files with 99 additions and 18 deletions.
25 changes: 15 additions & 10 deletions api/slack/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,12 @@
from api.serializers import VolunteerSerializer
from api.slack import client
from api.slack.transcription_check.messages import send_check_message
from api.slack.utils import clean_links, dict_to_table, get_message
from api.slack.utils import (
dict_to_table,
extract_text_from_link,
extract_url_from_link,
get_message,
)
from api.views.find import find_by_url, normalize_url
from api.views.misc import Summary
from authentication.models import BlossomUser
Expand Down Expand Up @@ -115,7 +120,7 @@ def reset_cmd(channel: str, message: str) -> None:
# they didn't give a username
msg = i18n["slack"]["errors"]["missing_username"]
elif len(parsed_message) == 2:
username = clean_links(parsed_message[1])
username = extract_text_from_link(parsed_message[1])
if user := BlossomUser.objects.filter(username__iexact=username).first():
if user.accepted_coc:
user.accepted_coc = False
Expand All @@ -142,7 +147,7 @@ def watch_cmd(channel: str, message: str) -> None:
# they didn't give a username
msg = i18n["slack"]["errors"]["missing_username"]
elif len(parsed_message) <= 3:
username = clean_links(parsed_message[1])
username = extract_text_from_link(parsed_message[1])
if user := BlossomUser.objects.filter(username__iexact=username).first():
if len(parsed_message) == 2:
# they didn't give a percentage, default to 100%
Expand Down Expand Up @@ -198,7 +203,7 @@ def unwatch_cmd(channel: str, message: str) -> None:
# they didn't give a username
msg = i18n["slack"]["errors"]["missing_username"]
elif len(parsed_message) == 2:
username = clean_links(parsed_message[1])
username = extract_text_from_link(parsed_message[1])
if user := BlossomUser.objects.filter(username__iexact=username).first():
# Set the check percentage back to automatic
user.overwrite_check_percentage = None
Expand All @@ -222,7 +227,7 @@ def watchstatus_cmd(channel: str, message: str) -> None:
# they didn't give a username
msg = i18n["slack"]["errors"]["missing_username"]
elif len(parsed_message) == 2:
username = clean_links(parsed_message[1])
username = extract_text_from_link(parsed_message[1])
if user := BlossomUser.objects.filter(username__iexact=username).first():
status = user.transcription_check_reason(ignore_low_activity=True)
msg = i18n["slack"]["watchstatus"]["success"].format(
Expand Down Expand Up @@ -327,7 +332,7 @@ def blacklist_cmd(channel: str, message: str) -> None:
# they didn't give a username
msg = i18n["slack"]["errors"]["missing_username"]
elif len(parsed_message) == 2:
username = clean_links(parsed_message[1])
username = extract_text_from_link(parsed_message[1])
if user := BlossomUser.objects.filter(username__iexact=username).first():
if user.blacklisted:
user.blacklisted = False
Expand Down Expand Up @@ -360,7 +365,7 @@ def check_cmd(channel: str, message: str) -> None:
return

url = parsed_message[1]
normalized_url = normalize_url(url)
normalized_url = normalize_url(extract_url_from_link(url))

# Check if the URL is valid
if normalized_url is None:
Expand Down Expand Up @@ -426,10 +431,10 @@ def check_cmd(channel: str, message: str) -> None:
tor_url=find_response["submission"].tor_url,
),
)
pass
return
else:
# URL not found
client.chat_postMessage(
channel=channel, text=i18n["slack"]["check"]["not_found"].format(url=url,),
channel=channel, text=i18n["slack"]["check"]["not_found"].format(url=url),
)
pass
return
26 changes: 23 additions & 3 deletions api/slack/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import logging
import re
from re import Match
from typing import Dict, List, Optional

from django.conf import settings
Expand All @@ -17,18 +18,37 @@
# find a link in the slack format, then strip out the text at the end.
# they're formatted like this: <https://example.com|Text!>
SLACK_TEXT_EXTRACTOR = re.compile(
r"<(?:https?://)?[\w-]+(?:\.[\w-]+)+\.?(?::\d+)?(?:/\S*)?\|([^>]+)>"
# Allow long lines for the regex
# flake8: noqa: E501
r"<(?P<url>(?:https?://)?[\w-]+(?:\.[\w-]+)+\.?(?::\d+)?(?:/[^\s|]*)?)(?:\|(?P<text>[^>]+))?>"
)


def clean_links(text: str) -> str:
def extract_text_from_link(text: str) -> str:
"""Strip link out of auto-generated slack fancy URLS and return the text only."""
results = [_ for _ in re.finditer(SLACK_TEXT_EXTRACTOR, text)]
# we'll replace things going backwards so that we don't mess up indexing
results.reverse()

def extract_text(mx: Match) -> str:
return mx["text"] or mx["url"]

for match in results:
text = text[: match.start()] + extract_text(match) + text[match.end() :]
return text


def extract_url_from_link(text: str) -> str:
"""Strip link out of auto-generated slack fancy URLS and return the link only."""
results = [_ for _ in re.finditer(SLACK_TEXT_EXTRACTOR, text)]
# we'll replace things going backwards so that we don't mess up indexing
results.reverse()

def extract_link(m: Match) -> str:
return m["url"]

for match in results:
text = text[: match.start()] + match.groups()[0] + text[match.end() :]
text = text[: match.start()] + extract_link(match) + text[match.end() :]
return text


Expand Down
13 changes: 9 additions & 4 deletions api/tests/slack/test_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,8 +394,14 @@ def test_dadjoke_target(message: str) -> None:
"url",
[
"https://reddit.com/r/TranscribersOfReddit/comments/t31715/curatedtumblr_image_linguistics_fax/",
"https://reddit.com/r/CuratedTumblr/comments/t315gq/linguistics_fax/"
"https://reddit.com/r/CuratedTumblr/comments/t315gq/linguistics_fax/",
"https://www.reddit.com/r/CuratedTumblr/comments/t315gq/linguistics_fax/hypuw2r/",
"<https://reddit.com/r/TranscribersOfReddit/comments/t31715/curatedtumblr_image_linguistics_fax/>",
"<https://reddit.com/r/CuratedTumblr/comments/t315gq/linguistics_fax/>",
"<https://www.reddit.com/r/CuratedTumblr/comments/t315gq/linguistics_fax/hypuw2r/>",
"<https://reddit.com/r/TranscribersOfReddit/comments/t31715/curatedtumblr_image_linguistics_fax/|Tor_Post>",
"<https://reddit.com/r/CuratedTumblr/comments/t315gq/linguistics_fax/|Partner_Post>",
"<https://www.reddit.com/r/CuratedTumblr/comments/t315gq/linguistics_fax/hypuw2r/|Transcription>",
],
)
def test_check_cmd(client: Client, url: str) -> None:
Expand All @@ -410,7 +416,7 @@ def test_check_cmd(client: Client, url: str) -> None:
transcription = create_transcription(
submission=submission,
user=user,
url="https://www.reddit.com/r/CuratedTumblr/comments/t315gq/linguistics_fax/hypuw2r/",
url="https://reddit.com/r/CuratedTumblr/comments/t315gq/linguistics_fax/hypuw2r/",
)

with patch("api.slack.commands.client.chat_postMessage") as message_mock, patch(
Expand All @@ -423,15 +429,14 @@ def test_check_cmd(client: Client, url: str) -> None:
assert link_mock.call_count == 1

checks = TranscriptionCheck.objects.filter(transcription=transcription)

assert len(checks) == 1


@pytest.mark.parametrize(
"url",
[
"https://reddit.com/r/TranscribersOfReddit/comments/t31715/curatedtumblr_image_linguistics_fax/",
"https://reddit.com/r/CuratedTumblr/comments/t315gq/linguistics_fax/"
"https://reddit.com/r/CuratedTumblr/comments/t315gq/linguistics_fax/",
"https://www.reddit.com/r/CuratedTumblr/comments/t315gq/linguistics_fax/hypuw2r/",
],
)
Expand Down
53 changes: 52 additions & 1 deletion api/tests/slack/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,18 @@
# Disable line length restrictions to allow long URLs
# flake8: noqa: E501
from typing import Dict
from unittest.mock import MagicMock, PropertyMock, patch

import pytest
from django.test import Client

from api.slack import client as slack_client
from api.slack.utils import dict_to_table, send_transcription_check
from api.slack.utils import (
dict_to_table,
extract_text_from_link,
extract_url_from_link,
send_transcription_check,
)
from blossom.strings import translation
from utils.test_helpers import (
create_submission,
Expand Down Expand Up @@ -117,3 +124,47 @@ def test_send_transcription_to_slack(

actual_message = mock.call_args[1]["text"]
assert actual_message == message


@pytest.mark.parametrize(
"text,expected",
[
("Normal text", "Normal text"),
(
"<https://www.reddit.com/user/transcribersofreddit|u/transcribersofreddit>",
"u/transcribersofreddit",
),
(
"<https://www.reddit.com/user/transcribersofreddit>",
"https://www.reddit.com/user/transcribersofreddit",
),
],
)
def test_extract_text_from_link(text: str, expected: str) -> None:
"""Test that the text is extracted from a Slack link."""
actual = extract_text_from_link(text)
assert actual == expected


@pytest.mark.parametrize(
"text,expected",
[
("Normal text", "Normal text"),
(
"<https://www.reddit.com/user/transcribersofreddit|u/transcribersofreddit>",
"https://www.reddit.com/user/transcribersofreddit",
),
(
"<https://www.reddit.com/user/transcribersofreddit>",
"https://www.reddit.com/user/transcribersofreddit",
),
(
"<https://reddit.com/r/TranscribersOfReddit/comments/t31715/curatedtumblr_image_linguistics_fax/|Tor Post>",
"https://reddit.com/r/TranscribersOfReddit/comments/t31715/curatedtumblr_image_linguistics_fax/",
),
],
)
def test_extract_url_from_link(text: str, expected: str) -> None:
"""Test that the URL is extracted from a Slack link."""
actual = extract_url_from_link(text)
assert actual == expected

0 comments on commit 0fe405a

Please sign in to comment.