Skip to content

Commit

Permalink
adding data to /trending/<time> (#304)
Browse files Browse the repository at this point in the history
* adding repost counts with respect to time field

* removing trailing whitespace

* adding trailing whitespace

* renaming time_key

* using text() for relying on postgres dates over sqlalchemy dates

* removing unused constants

* adding comment

* adding windowed_save_count field

* addressing method order comments + additional comments on code

* setting default time in repost and save methods + changing var name + change api call + comments
  • Loading branch information
vicky-g committed Mar 2, 2020
1 parent 718a082 commit ddd6412
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 7 deletions.
36 changes: 30 additions & 6 deletions discovery-provider/src/queries/query_helpers.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging # pylint: disable=C0302
import json
import requests
from sqlalchemy import func, desc
from sqlalchemy import func, desc, text
from urllib.parse import urljoin

from flask import request
Expand All @@ -12,6 +12,8 @@
from src.utils import helpers
from src.utils.config import shared_config

from datetime import datetime, timedelta

logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -501,8 +503,7 @@ def populate_playlist_metadata(session, playlist_ids, playlists, repost_types, s

return playlists


def get_repost_counts(session, query_by_user_flag, query_repost_type_flag, filter_ids, repost_types, max_block_number=None):
def get_repost_counts_query(session, query_by_user_flag, query_repost_type_flag, filter_ids, repost_types, max_block_number=None):
query_col = Repost.user_id if query_by_user_flag else Repost.repost_item_id

repost_counts_query = None
Expand Down Expand Up @@ -546,10 +547,22 @@ def get_repost_counts(session, query_by_user_flag, query_repost_type_flag, filte
repost_counts_query = repost_counts_query.filter(
Repost.blocknumber <= max_block_number
)
return repost_counts_query.all()

return repost_counts_query

# Gets the repost count for users or tracks with the filters specified in the params.
# The time param {day, week, month, year} is used in generate_trending to create a windowed time frame for repost counts
def get_repost_counts(session, query_by_user_flag, query_repost_type_flag, filter_ids, repost_types, max_block_number=None, time=None):
repost_counts_query = get_repost_counts_query(session, query_by_user_flag, query_repost_type_flag, filter_ids, repost_types, max_block_number)

if time is not None:
interval = "NOW() - interval '1 {}'".format(time)
repost_counts_query = repost_counts_query.filter(
Repost.created_at >= text(interval)
)
return repost_counts_query.all()

def get_save_counts(session, query_by_user_flag, query_save_type_flag, filter_ids, save_types, max_block_number=None):
def get_save_counts_query(session, query_by_user_flag, query_save_type_flag, filter_ids, save_types, max_block_number=None):
query_col = Save.user_id if query_by_user_flag else Save.save_item_id

save_counts_query = None
Expand Down Expand Up @@ -594,8 +607,19 @@ def get_save_counts(session, query_by_user_flag, query_save_type_flag, filter_id
Save.blocknumber <= max_block_number
)

return save_counts_query.all()
return save_counts_query

# Gets the save count for users or tracks with the filters specified in the params.
# The time param {day, week, month, year} is used in generate_trending to create a windowed time frame for save counts
def get_save_counts(session, query_by_user_flag, query_save_type_flag, filter_ids, save_types, max_block_number=None, time=None):
save_counts_query = get_save_counts_query(session, query_by_user_flag, query_save_type_flag, filter_ids, save_types, max_block_number)

if time is not None:
interval = "NOW() - interval '1 {}'".format(time)
save_counts_query = save_counts_query.filter(
Save.created_at >= text(interval)
)
return save_counts_query.all()

def get_followee_count_dict(session, user_ids):
# build dict of user id --> followee count
Expand Down
3 changes: 3 additions & 0 deletions discovery-provider/src/queries/response_name_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
created_at = 'created_at' # datetime - time track was created
repost_count = 'repost_count' # integer - total count of reposts by given user
track_blocknumber = 'track_blocknumber' # integer - blocknumber of latest track for user
windowed_repost_count = 'windowed_repost_count'
windowed_save_count = 'windowed_save_count'

# current user specific
does_current_user_follow = 'does_current_user_follow' # boolean - does current user follow given user
current_user_followee_follow_count = 'current_user_followee_follow_count' # integer - number of followees of current user that also follow given user
Expand Down
40 changes: 39 additions & 1 deletion discovery-provider/src/tasks/generate_trending.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
from src.models import Track, RepostType, Follow, SaveType
from src.utils.config import shared_config
from src.queries import response_name_constants
from src.queries.query_helpers import get_repost_counts, get_save_counts, get_genre_list
from src.queries.query_helpers import \
get_repost_counts, get_save_counts, get_genre_list

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -85,12 +86,23 @@ def generate_trending(db, time, genre, limit, offset):
not_deleted_track_ids = set([record[0] for record in not_deleted_track_ids]) # pylint: disable=R1718
# Query repost counts
repost_counts = get_repost_counts(session, False, True, not_deleted_track_ids, None)
# Generate track_id --> repost_count mapping
track_repost_counts = {
repost_item_id: repost_count
for (repost_item_id, repost_count, repost_type) in repost_counts
if repost_type == RepostType.track
}

# Query repost count with respect to rolling time frame in URL (e.g. /trending/week -> window = rolling week)
track_repost_counts_for_time = \
get_repost_counts(session, False, True, not_deleted_track_ids, None, None, time)
# Generate track_id --> windowed_save_count mapping
track_repost_counts_for_time = {
repost_item_id: repost_count
for (repost_item_id, repost_count, repost_type) in track_repost_counts_for_time
if repost_type == RepostType.track
}

# Query follower info for each track owner
# Query each track owner
track_owners_query = (
Expand Down Expand Up @@ -126,13 +138,24 @@ def generate_trending(db, time, genre, limit, offset):
follower_count_dict = \
{user_id: follower_count for (user_id, follower_count) in follower_counts}

# Query save counts
save_counts = get_save_counts(session, False, True, not_deleted_track_ids, None)
# Generate track_id --> save_count mapping
track_save_counts = {
save_item_id: save_count
for (save_item_id, save_count, save_type) in save_counts
if save_type == SaveType.track
}

# Query save counts with respect to rolling time frame in URL (e.g. /trending/week -> window = rolling week)
save_counts_for_time = get_save_counts(session, False, True, not_deleted_track_ids, None, None, time)
# Generate track_id --> windowed_save_count mapping
track_save_counts_for_time = {
save_item_id: save_count
for (save_item_id, save_count, save_type) in save_counts_for_time
if save_type == SaveType.track
}

trending_tracks = []
for track_entry in listen_counts:
# Skip over deleted tracks
Expand All @@ -146,13 +169,27 @@ def generate_trending(db, time, genre, limit, offset):
else:
track_entry[response_name_constants.repost_count] = 0

# Populate repost counts with respect to time
if track_entry[response_name_constants.track_id] in track_repost_counts_for_time:
track_entry[response_name_constants.windowed_repost_count] = \
track_repost_counts_for_time[track_entry[response_name_constants.track_id]]
else:
track_entry[response_name_constants.windowed_repost_count] = 0

# Populate save counts
if track_entry[response_name_constants.track_id] in track_save_counts:
track_entry[response_name_constants.save_count] = \
track_save_counts[track_entry[response_name_constants.track_id]]
else:
track_entry[response_name_constants.save_count] = 0

# Populate save counts with respect to time
if track_entry[response_name_constants.track_id] in track_save_counts_for_time:
track_entry[response_name_constants.windowed_save_count] = \
track_save_counts_for_time[track_entry[response_name_constants.track_id]]
else:
track_entry[response_name_constants.windowed_save_count] = 0

# Populate listen counts
owner_id = track_owner_dict[track_entry[response_name_constants.track_id]]
owner_follow_count = 0
Expand All @@ -173,3 +210,4 @@ def generate_trending(db, time, genre, limit, offset):
final_resp = {}
final_resp['listen_counts'] = trending_tracks
return final_resp

0 comments on commit ddd6412

Please sign in to comment.