From d075c43d395beb3e0e3a67951494efe4e207fd32 Mon Sep 17 00:00:00 2001 From: Saliou Diallo Date: Tue, 27 Sep 2022 12:03:25 -0400 Subject: [PATCH] [PAY-631] Update filter logic for trending premium content (#3904) * Update filter logic for trending premium content * Reduce db round trip * Fix queries Co-authored-by: Saliou Diallo --- .../src/queries/get_trending_tracks.py | 82 +++++++++++-------- .../src/queries/get_underground_trending.py | 27 +++--- 2 files changed, 61 insertions(+), 48 deletions(-) diff --git a/discovery-provider/src/queries/get_trending_tracks.py b/discovery-provider/src/queries/get_trending_tracks.py index 0f78e6f1462..82ac3351327 100644 --- a/discovery-provider/src/queries/get_trending_tracks.py +++ b/discovery-provider/src/queries/get_trending_tracks.py @@ -2,6 +2,7 @@ from sqlalchemy import desc from sqlalchemy.orm.session import Session +from src.models.tracks.track import Track from src.models.tracks.track_trending_score import TrackTrendingScore from src.premium_content.premium_content_constants import ( SHOULD_TRENDING_EXCLUDE_PREMIUM_TRACKS, @@ -48,30 +49,35 @@ def generate_unpopulated_trending( strategy.get_track_score(time_range, track) for track in trending_tracks["listen_counts"] ] + + # If exclude_premium is true, then filter out track ids + # belonging to premium tracks before applying the limit. + if exclude_premium: + ids = [track["track_id"] for track in track_scores] + non_premium_track_ids = ( + session.query(Track.track_id) + .filter( + Track.track_id.in_(ids), + Track.is_current == True, + Track.is_delete == False, + Track.is_premium == False, + ) + .all() + ) + non_premium_track_id_set = set(map(lambda t: t[0], non_premium_track_ids)) + track_scores = list( + filter(lambda t: t["track_id"] in non_premium_track_id_set, track_scores) + ) + sorted_track_scores = sorted( track_scores, key=lambda k: (k["score"], k["track_id"]), reverse=True ) - - # Re apply the limit just in case we did decide to include more tracks in the scoring than the limit - # Only limit the number of sorted tracks here if we are not later - # filtering out the premium tracks. Otherwise, the number of - # tracks we return later may be smaller than the limit. - # If we don't limit it here, we limit it later after getting the - # unpopulated tracks. - should_apply_limit_early = True # not exclude_premium - if should_apply_limit_early: - sorted_track_scores = sorted_track_scores[:limit] + sorted_track_scores = sorted_track_scores[:limit] # Get unpopulated metadata track_ids = [track["track_id"] for track in sorted_track_scores] tracks = get_unpopulated_tracks(session, track_ids, exclude_premium=exclude_premium) - # Make sure to apply the limit if not previously applied - # because of the filtering out of premium tracks - if not should_apply_limit_early: - tracks = tracks[:limit] - track_ids = [track["track_id"] for track in tracks] - return (tracks, track_ids) @@ -103,13 +109,33 @@ def generate_unpopulated_trending_from_mat_views( TrackTrendingScore.genre == genre ) - # Only limit the number of sorted tracks here if we are not later - # filtering out the premium tracks. Otherwise, the number of - # tracks we return later may be smaller than the limit. - # If we don't limit it here, we limit it later after getting the - # unpopulated tracks. - should_apply_limit_early = True # not exclude_premium - if should_apply_limit_early: + # If exclude_premium is true, then filter out track ids belonging to + # premium tracks before applying the limit. + if exclude_premium: + trending_track_ids_subquery = trending_track_ids_query.subquery() + trending_track_ids = ( + session.query( + trending_track_ids_subquery.c.track_id, + trending_track_ids_subquery.c.score, + Track.track_id, + ) + .join( + trending_track_ids_subquery, + Track.track_id == trending_track_ids_subquery.c.track_id, + ) + .filter( + Track.is_current == True, + Track.is_delete == False, + Track.is_premium == False, + ) + .order_by( + desc(trending_track_ids_subquery.c.score), + desc(trending_track_ids_subquery.c.track_id), + ) + .limit(limit) + .all() + ) + else: trending_track_ids = ( trending_track_ids_query.order_by( desc(TrackTrendingScore.score), desc(TrackTrendingScore.track_id) @@ -117,21 +143,11 @@ def generate_unpopulated_trending_from_mat_views( .limit(limit) .all() ) - else: - trending_track_ids = trending_track_ids_query.order_by( - desc(TrackTrendingScore.score), desc(TrackTrendingScore.track_id) - ).all() # Get unpopulated metadata track_ids = [track_id[0] for track_id in trending_track_ids] tracks = get_unpopulated_tracks(session, track_ids, exclude_premium=exclude_premium) - # Make sure to apply the limit if not previously applied - # because of the filtering out of premium tracks - if not should_apply_limit_early: - tracks = tracks[:limit] - track_ids = [track["track_id"] for track in tracks] - return (tracks, track_ids) diff --git a/discovery-provider/src/queries/get_underground_trending.py b/discovery-provider/src/queries/get_underground_trending.py index f5f48c01434..cfe5fc9bebd 100644 --- a/discovery-provider/src/queries/get_underground_trending.py +++ b/discovery-provider/src/queries/get_underground_trending.py @@ -64,6 +64,7 @@ def get_scorable_track_data(session, redis_instance, strategy): "karma": number "listens": number "owner_verified": boolean + "is_premium": boolean } """ @@ -108,6 +109,7 @@ def get_scorable_track_data(session, redis_instance, strategy): AggregatePlay.count, Track.created_at, follower_query.c.is_verified, + Track.is_premium, ) .join(Track, Track.track_id == AggregatePlay.play_item_id) .join(follower_query, follower_query.c.user_id == Track.owner_id) @@ -139,6 +141,7 @@ def get_scorable_track_data(session, redis_instance, strategy): "karma": 1, "listens": record[3], "owner_verified": record[5], + "is_premium": record[6], } for record in base_query } @@ -192,19 +195,19 @@ def make_get_unpopulated_tracks(session, redis_instance, strategy): def wrapped(): # Score and sort track_scoring_data = get_scorable_track_data(session, redis_instance, strategy) + + # If SHOULD_TRENDING_EXCLUDE_PREMIUM_TRACKS is true, then filter out track ids + # belonging to premium tracks before applying the limit. + if SHOULD_TRENDING_EXCLUDE_PREMIUM_TRACKS: + track_scoring_data = list( + filter(lambda item: not item["is_premium"], track_scoring_data) + ) + scored_tracks = [ strategy.get_track_score("week", track) for track in track_scoring_data ] sorted_tracks = sorted(scored_tracks, key=lambda k: k["score"], reverse=True) - - # Only limit the number of sorted tracks here if we are not later - # filtering out the premium tracks. Otherwise, the number of - # tracks we return later may be smaller than the limit. - # If we don't limit it here, we limit it later after getting the - # unpopulated tracks. - should_apply_limit_early = True # not SHOULD_TRENDING_EXCLUDE_PREMIUM_TRACKS - if should_apply_limit_early: - sorted_tracks = sorted_tracks[:UNDERGROUND_TRENDING_LENGTH] + sorted_tracks = sorted_tracks[:UNDERGROUND_TRENDING_LENGTH] # Get unpopulated metadata track_ids = [track["track_id"] for track in sorted_tracks] @@ -214,12 +217,6 @@ def wrapped(): exclude_premium=SHOULD_TRENDING_EXCLUDE_PREMIUM_TRACKS, ) - # Make sure to apply the limit if not previously applied - # because of the filtering out of premium tracks - if not should_apply_limit_early: - tracks = tracks[:UNDERGROUND_TRENDING_LENGTH] - track_ids = [track["track_id"] for track in tracks] - return (tracks, track_ids) return wrapped