Add submission/subreddits endpoint (#471)

* Add submission/subreddits endpoint to count submissions by subreddit * Add tests for submission/subreddits endpoint * Make the database do the work --------- Co-authored-by: Joe Kaufeld <opensource@joekaufeld.com>
GrafeasGroup · Feb 28, 2023 · 1c19c00 · 1c19c00
1 parent 8e0ef60
commit 1c19c00
Show file tree

Hide file tree

Showing 3 changed files with 148 additions and 2 deletions.
diff --git a/blossom/api/models.py b/blossom/api/models.py
@@ -287,7 +287,8 @@ def get_subreddit_name(self) -> str:
         """
         if self.source.name != "reddit":
             return self.source.name
-        return f'/r/{self.url.split("/r/")[1].split("/")[0]}'
+
+        return f"/r/{extract_subreddit_from_url(str(self.url))}"
 
 
 class Transcription(models.Model):
@@ -496,3 +497,22 @@ def revert(self) -> None:
         self.affected_submissions.update(
             claimed_by=self.old_user, completed_by=self.old_user
         )
+
+
+def extract_subreddit_from_url(url: str) -> Optional[str]:
+    """
+    Given a Reddit URL, extract the subreddit.
+
+    The name will be without the `r/` prefix.
+
+    :returns: The subreddit name or `None` if it's not a Reddit URL.
+    """
+    if "reddit.com" not in url:
+        return None
+
+    sub_split = url.split("/r/")
+
+    if len(sub_split) < 2:
+        return None
+
+    return sub_split[1].split("/")[0]
diff --git a/blossom/api/tests/submissions/test_submission_subreddits.py b/blossom/api/tests/submissions/test_submission_subreddits.py
@@ -0,0 +1,89 @@
+# Disable line length restrictions to allow long URLs
+# flake8: noqa: E501
+from collections import OrderedDict
+
+from django.test import Client
+from django.urls import reverse
+from rest_framework import status
+
+from blossom.utils.test_helpers import create_submission, setup_user_client
+
+
+class TestSubreddits:
+    """Tests to validate that the subreddit data is generated correctly."""
+
+    def test_subreddit_extraction(self, client: Client) -> None:
+        """Test that the subreddit of a single submission is determined correctly."""
+        client, headers, user = setup_user_client(client, accepted_coc=True, id=123456)
+
+        create_submission(
+            url="https://reddit.com/r/ProgrammerHumor/comments/11e845g/think_smart_not_hard/"
+        )
+
+        result = client.get(
+            reverse("submission-subreddits"),
+            content_type="application/json",
+            **headers,
+        )
+
+        assert result.status_code == status.HTTP_200_OK
+
+        expected_subreddits = OrderedDict(ProgrammerHumor=1)
+        subreddits = result.json()
+        assert subreddits == expected_subreddits
+
+    def test_subreddit_aggregation(self, client: Client) -> None:
+        """Test that multiple submissions from the same subreddit are aggregated."""
+        client, headers, user = setup_user_client(client, accepted_coc=True, id=123456)
+
+        create_submission(
+            url="https://reddit.com/r/ProgrammerHumor/comments/11e845g/think_smart_not_hard/"
+        )
+        create_submission(
+            url="https://reddit.com/r/ProgrammerHumor/comments/11e88ls/then_what_do_you_do/"
+        )
+        create_submission(
+            url="https://reddit.com/r/CuratedTumblr/comments/11e232j/life_is_nuanced_and_complex/"
+        )
+        create_submission(
+            url="https://reddit.com/r/ProgrammerHumor/comments/11e42w6/yes_i_know_about_transactions_and_backups/"
+        )
+        create_submission(
+            url="https://reddit.com/r/CuratedTumblr/comments/11ds7gc/big_boss_was_down_bad/"
+        )
+
+        result = client.get(
+            reverse("submission-subreddits"),
+            content_type="application/json",
+            **headers,
+        )
+
+        assert result.status_code == status.HTTP_200_OK
+
+        expected_subreddits = OrderedDict(ProgrammerHumor=3, CuratedTumblr=2)
+        subreddits = result.json()
+        assert subreddits == expected_subreddits
+
+    def test_submission_filters(self, client: Client) -> None:
+        """Test that the normal submission filters work."""
+        client, headers, user = setup_user_client(client, accepted_coc=True, id=123456)
+
+        create_submission(
+            url="https://reddit.com/r/ProgrammerHumor/comments/11e845g/think_smart_not_hard/",
+            completed_by=user,
+        )
+        create_submission(
+            url="https://reddit.com/r/ProgrammerHumor/comments/11e88ls/then_what_do_you_do/"
+        )
+
+        result = client.get(
+            reverse("submission-subreddits") + "?completed_by=123456",
+            content_type="application/json",
+            **headers,
+        )
+
+        assert result.status_code == status.HTTP_200_OK
+
+        expected_subreddits = OrderedDict(ProgrammerHumor=1)
+        subreddits = result.json()
+        assert subreddits == expected_subreddits
diff --git a/blossom/api/views/submission.py b/blossom/api/views/submission.py
@@ -1,15 +1,18 @@
 """Views that specifically relate to submissions."""
 import datetime
 import logging
+from collections import OrderedDict
 from datetime import timedelta
 from typing import Union
 
 from django.conf import settings
-from django.db.models import Count, F
+from django.db.models import Count, F, Value
 from django.db.models.functions import (
     ExtractHour,
     ExtractIsoWeekDay,
     Length,
+    StrIndex,
+    Substr,
     TruncDate,
     TruncDay,
     TruncHour,
@@ -388,6 +391,40 @@ def heatmap(self, request: Request) -> Response:
 
         return Response(heatmap)
 
+    @csrf_exempt
+    @swagger_auto_schema(
+        operation_summary="Get the submission count by subreddit.",
+    )
+    @action(detail=False, methods=["get"])
+    def subreddits(self, request: Request) -> Response:
+        """Count the submissions by subreddit."""
+        subreddit_query = (
+            self.filter_queryset(Submission.objects)
+            # https://reddit.com/r/testing/comments/12345/hi/
+            .filter(url__startswith="https://reddit.com/r/")
+            # testing/comments/12345/hi/
+            .annotate(subreddit=Substr("url", 22, 50))
+            # 8
+            .annotate(slash_index=StrIndex("subreddit", Value("/")))
+            # testing
+            .annotate(subreddit_name=Substr("subreddit", 1, F("slash_index") - 1))
+            .values("subreddit_name")
+            .annotate(Count("subreddit_name"))
+        )
+
+        subreddit_counts: dict[str, int] = dict()
+
+        # Make the query response a little prettier
+        for item in subreddit_query:
+            subreddit_counts[item["subreddit_name"]] = item["subreddit_name__count"]
+
+        # Sort descending by submission count
+        sorted_subreddits = OrderedDict(
+            sorted(subreddit_counts.items(), key=lambda x: x[1], reverse=True)
+        )
+
+        return Response(sorted_subreddits)
+
     @csrf_exempt
     @swagger_auto_schema(
         request_body=Schema(