Skip to content

Commit

Permalink
[PAY-631] Update filter logic for trending premium content (#3904)
Browse files Browse the repository at this point in the history
* Update filter logic for trending premium content

* Reduce db round trip

* Fix queries

Co-authored-by: Saliou Diallo <saliou@audius.co>
  • Loading branch information
sddioulde and Saliou Diallo committed Sep 27, 2022
1 parent 13d48e5 commit d075c43
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 48 deletions.
82 changes: 49 additions & 33 deletions discovery-provider/src/queries/get_trending_tracks.py
Expand Up @@ -2,6 +2,7 @@

from sqlalchemy import desc
from sqlalchemy.orm.session import Session
from src.models.tracks.track import Track
from src.models.tracks.track_trending_score import TrackTrendingScore
from src.premium_content.premium_content_constants import (
SHOULD_TRENDING_EXCLUDE_PREMIUM_TRACKS,
Expand Down Expand Up @@ -48,30 +49,35 @@ def generate_unpopulated_trending(
strategy.get_track_score(time_range, track)
for track in trending_tracks["listen_counts"]
]

# If exclude_premium is true, then filter out track ids
# belonging to premium tracks before applying the limit.
if exclude_premium:
ids = [track["track_id"] for track in track_scores]
non_premium_track_ids = (
session.query(Track.track_id)
.filter(
Track.track_id.in_(ids),
Track.is_current == True,
Track.is_delete == False,
Track.is_premium == False,
)
.all()
)
non_premium_track_id_set = set(map(lambda t: t[0], non_premium_track_ids))
track_scores = list(
filter(lambda t: t["track_id"] in non_premium_track_id_set, track_scores)
)

sorted_track_scores = sorted(
track_scores, key=lambda k: (k["score"], k["track_id"]), reverse=True
)

# Re apply the limit just in case we did decide to include more tracks in the scoring than the limit
# Only limit the number of sorted tracks here if we are not later
# filtering out the premium tracks. Otherwise, the number of
# tracks we return later may be smaller than the limit.
# If we don't limit it here, we limit it later after getting the
# unpopulated tracks.
should_apply_limit_early = True # not exclude_premium
if should_apply_limit_early:
sorted_track_scores = sorted_track_scores[:limit]
sorted_track_scores = sorted_track_scores[:limit]

# Get unpopulated metadata
track_ids = [track["track_id"] for track in sorted_track_scores]
tracks = get_unpopulated_tracks(session, track_ids, exclude_premium=exclude_premium)

# Make sure to apply the limit if not previously applied
# because of the filtering out of premium tracks
if not should_apply_limit_early:
tracks = tracks[:limit]
track_ids = [track["track_id"] for track in tracks]

return (tracks, track_ids)


Expand Down Expand Up @@ -103,35 +109,45 @@ def generate_unpopulated_trending_from_mat_views(
TrackTrendingScore.genre == genre
)

# Only limit the number of sorted tracks here if we are not later
# filtering out the premium tracks. Otherwise, the number of
# tracks we return later may be smaller than the limit.
# If we don't limit it here, we limit it later after getting the
# unpopulated tracks.
should_apply_limit_early = True # not exclude_premium
if should_apply_limit_early:
# If exclude_premium is true, then filter out track ids belonging to
# premium tracks before applying the limit.
if exclude_premium:
trending_track_ids_subquery = trending_track_ids_query.subquery()
trending_track_ids = (
session.query(
trending_track_ids_subquery.c.track_id,
trending_track_ids_subquery.c.score,
Track.track_id,
)
.join(
trending_track_ids_subquery,
Track.track_id == trending_track_ids_subquery.c.track_id,
)
.filter(
Track.is_current == True,
Track.is_delete == False,
Track.is_premium == False,
)
.order_by(
desc(trending_track_ids_subquery.c.score),
desc(trending_track_ids_subquery.c.track_id),
)
.limit(limit)
.all()
)
else:
trending_track_ids = (
trending_track_ids_query.order_by(
desc(TrackTrendingScore.score), desc(TrackTrendingScore.track_id)
)
.limit(limit)
.all()
)
else:
trending_track_ids = trending_track_ids_query.order_by(
desc(TrackTrendingScore.score), desc(TrackTrendingScore.track_id)
).all()

# Get unpopulated metadata
track_ids = [track_id[0] for track_id in trending_track_ids]
tracks = get_unpopulated_tracks(session, track_ids, exclude_premium=exclude_premium)

# Make sure to apply the limit if not previously applied
# because of the filtering out of premium tracks
if not should_apply_limit_early:
tracks = tracks[:limit]
track_ids = [track["track_id"] for track in tracks]

return (tracks, track_ids)


Expand Down
27 changes: 12 additions & 15 deletions discovery-provider/src/queries/get_underground_trending.py
Expand Up @@ -64,6 +64,7 @@ def get_scorable_track_data(session, redis_instance, strategy):
"karma": number
"listens": number
"owner_verified": boolean
"is_premium": boolean
}
"""

Expand Down Expand Up @@ -108,6 +109,7 @@ def get_scorable_track_data(session, redis_instance, strategy):
AggregatePlay.count,
Track.created_at,
follower_query.c.is_verified,
Track.is_premium,
)
.join(Track, Track.track_id == AggregatePlay.play_item_id)
.join(follower_query, follower_query.c.user_id == Track.owner_id)
Expand Down Expand Up @@ -139,6 +141,7 @@ def get_scorable_track_data(session, redis_instance, strategy):
"karma": 1,
"listens": record[3],
"owner_verified": record[5],
"is_premium": record[6],
}
for record in base_query
}
Expand Down Expand Up @@ -192,19 +195,19 @@ def make_get_unpopulated_tracks(session, redis_instance, strategy):
def wrapped():
# Score and sort
track_scoring_data = get_scorable_track_data(session, redis_instance, strategy)

# If SHOULD_TRENDING_EXCLUDE_PREMIUM_TRACKS is true, then filter out track ids
# belonging to premium tracks before applying the limit.
if SHOULD_TRENDING_EXCLUDE_PREMIUM_TRACKS:
track_scoring_data = list(
filter(lambda item: not item["is_premium"], track_scoring_data)
)

scored_tracks = [
strategy.get_track_score("week", track) for track in track_scoring_data
]
sorted_tracks = sorted(scored_tracks, key=lambda k: k["score"], reverse=True)

# Only limit the number of sorted tracks here if we are not later
# filtering out the premium tracks. Otherwise, the number of
# tracks we return later may be smaller than the limit.
# If we don't limit it here, we limit it later after getting the
# unpopulated tracks.
should_apply_limit_early = True # not SHOULD_TRENDING_EXCLUDE_PREMIUM_TRACKS
if should_apply_limit_early:
sorted_tracks = sorted_tracks[:UNDERGROUND_TRENDING_LENGTH]
sorted_tracks = sorted_tracks[:UNDERGROUND_TRENDING_LENGTH]

# Get unpopulated metadata
track_ids = [track["track_id"] for track in sorted_tracks]
Expand All @@ -214,12 +217,6 @@ def wrapped():
exclude_premium=SHOULD_TRENDING_EXCLUDE_PREMIUM_TRACKS,
)

# Make sure to apply the limit if not previously applied
# because of the filtering out of premium tracks
if not should_apply_limit_early:
tracks = tracks[:UNDERGROUND_TRENDING_LENGTH]
track_ids = [track["track_id"] for track in tracks]

return (tracks, track_ids)

return wrapped
Expand Down

0 comments on commit d075c43

Please sign in to comment.