From 9b0969f573a61609afaf938b1771c28ea7737ede Mon Sep 17 00:00:00 2001 From: Joe Kaufeld Date: Tue, 28 Feb 2023 13:13:32 -0500 Subject: [PATCH] Make the database do the work --- blossom/api/views/submission.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/blossom/api/views/submission.py b/blossom/api/views/submission.py index 1dbf67f4..c9359a96 100644 --- a/blossom/api/views/submission.py +++ b/blossom/api/views/submission.py @@ -6,11 +6,13 @@ from typing import Union from django.conf import settings -from django.db.models import Count, F +from django.db.models import Count, F, Value from django.db.models.functions import ( ExtractHour, ExtractIsoWeekDay, Length, + StrIndex, + Substr, TruncDate, TruncDay, TruncHour, @@ -37,13 +39,7 @@ from blossom.api.authentication import BlossomApiPermission from blossom.api.helpers import validate_request -from blossom.api.models import ( - Source, - Submission, - Transcription, - TranscriptionCheck, - extract_subreddit_from_url, -) +from blossom.api.models import Source, Submission, Transcription, TranscriptionCheck from blossom.api.pagination import StandardResultsSetPagination from blossom.api.serializers import SubmissionSerializer from blossom.api.slack import client as slack @@ -404,19 +400,23 @@ def subreddits(self, request: Request) -> Response: """Count the submissions by subreddit.""" subreddit_query = ( self.filter_queryset(Submission.objects) - # Only include Reddit posts with the expected URL format + # https://reddit.com/r/testing/comments/12345/hi/ .filter(url__startswith="https://reddit.com/r/") - # Return only the relevant values - .values("id", "url") + # testing/comments/12345/hi/ + .annotate(subreddit=Substr("url", 22, 50)) + # 8 + .annotate(slash_index=StrIndex("subreddit", Value("/"))) + # testing + .annotate(subreddit_name=Substr("subreddit", 1, F("slash_index") - 1)) + .values("subreddit_name") + .annotate(Count("subreddit_name")) ) subreddit_counts: dict[str, int] = dict() - # Count the number of submissions per subreddit - # TODO: Make this more efficient (at the database level) + # Make the query response a little prettier for item in subreddit_query: - subreddit = extract_subreddit_from_url(item["url"]) - subreddit_counts[subreddit] = subreddit_counts.get(subreddit, 0) + 1 + subreddit_counts[item["subreddit_name"]] = item["subreddit_name__count"] # Sort descending by submission count sorted_subreddits = OrderedDict(