Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add Discovery Sitemap Endpoint (#3973)
* Add sitemap methods to discovery * Fix sitemap urls and add tests * rename route for sitemap to be more consistent * Fix double set * Fix integration test
- Loading branch information
Showing
5 changed files
with
419 additions
and
2 deletions.
There are no files selected for viewing
122 changes: 122 additions & 0 deletions
122
discovery-provider/integration_tests/queries/test_get_sitemap.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
import logging | ||
from unittest import mock | ||
|
||
from integration_tests.utils import populate_mock_db | ||
from src.queries.get_sitemap import ( | ||
build_default, | ||
get_playlist_page, | ||
get_playlist_root, | ||
get_track_page, | ||
get_track_root, | ||
get_user_page, | ||
get_user_root, | ||
) | ||
from src.utils.db_session import get_db | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
@mock.patch("src.queries.get_sitemap.get_client_base_url") | ||
@mock.patch("src.queries.get_sitemap.set_base_url") | ||
def test_get_sitemaps(mock_set_base_url, mock_get_client_base_url, app): | ||
"""Tests that get sitemap works""" | ||
with app.app_context(): | ||
db = get_db() | ||
|
||
mock_set_base_url.return_value = "https://discoveryprovider.audius.co" | ||
mock_get_client_base_url.return_value = "https://discoveryprovider.audius.co" | ||
|
||
test_entities = { | ||
"playlists": [ | ||
{ | ||
"playlist_id": i, | ||
"playlist_owner_id": i, | ||
"playlist_name": f"p_name_{i}", | ||
"is_album": i % 2 == 0, | ||
} | ||
for i in range(10) | ||
], | ||
"tracks": [{"track_id": i, "owner_id": i} for i in range(10)], | ||
"track_routes": [ | ||
{ | ||
"track_id": i, | ||
"owner_id": i, | ||
"slug": f"slug_{i}", | ||
"title_slug": f"title_slug_{i}", | ||
} | ||
for i in range(10) | ||
], | ||
"users": [{"user_id": i, "handle": f"user_{i}"} for i in range(20)], | ||
} | ||
|
||
populate_mock_db(db, test_entities) | ||
|
||
with db.scoped_session() as session: | ||
default_sitemap = build_default() | ||
assert ( | ||
default_sitemap | ||
== b'<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n <sitemap>\n <loc>https://discoveryprovider.audius.co/legal/privacy-policy</loc>\n </sitemap>\n <sitemap>\n <loc>https://discoveryprovider.audius.co/legal/terms-of-use</loc>\n </sitemap>\n <sitemap>\n <loc>https://discoveryprovider.audius.co/download</loc>\n </sitemap>\n <sitemap>\n <loc>https://discoveryprovider.audius.co/feed</loc>\n </sitemap>\n <sitemap>\n <loc>https://discoveryprovider.audius.co/trending</loc>\n </sitemap>\n <sitemap>\n <loc>https://discoveryprovider.audius.co/explore</loc>\n </sitemap>\n <sitemap>\n <loc>https://discoveryprovider.audius.co/explore/playlists</loc>\n </sitemap>\n <sitemap>\n <loc>https://discoveryprovider.audius.co/explore/underground</loc>\n </sitemap>\n <sitemap>\n <loc>https://discoveryprovider.audius.co/explore/top-albums</loc>\n </sitemap>\n <sitemap>\n <loc>https://discoveryprovider.audius.co/explore/remixables</loc>\n </sitemap>\n <sitemap>\n <loc>https://discoveryprovider.audius.co/explore/feeling-lucky</loc>\n </sitemap>\n <sitemap>\n <loc>https://discoveryprovider.audius.co/explore/chill</loc>\n </sitemap>\n <sitemap>\n <loc>https://discoveryprovider.audius.co/explore/upbeat</loc>\n </sitemap>\n <sitemap>\n <loc>https://discoveryprovider.audius.co/explore/intense</loc>\n </sitemap>\n <sitemap>\n <loc>https://discoveryprovider.audius.co/explore/provoking</loc>\n </sitemap>\n <sitemap>\n <loc>https://discoveryprovider.audius.co/explore/intimate</loc>\n </sitemap>\n <sitemap>\n <loc>https://discoveryprovider.audius.co/signup</loc>\n </sitemap>\n <sitemap>\n <loc>https://discoveryprovider.audius.co/signin</loc>\n </sitemap>\n</urlset>\n' | ||
) | ||
|
||
# Validate that there are 7 track sitemaps - 10 total track / 3 user per sitemap = 4 | ||
track_root = get_track_root(session, 3) | ||
assert ( | ||
track_root | ||
== b'<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n <sitemap>\n <loc>https://discoveryprovider.audius.co/sitemaps/track/1.xml</loc>\n </sitemap>\n <sitemap>\n <loc>https://discoveryprovider.audius.co/sitemaps/track/2.xml</loc>\n </sitemap>\n <sitemap>\n <loc>https://discoveryprovider.audius.co/sitemaps/track/3.xml</loc>\n </sitemap>\n <sitemap>\n <loc>https://discoveryprovider.audius.co/sitemaps/track/4.xml</loc>\n </sitemap>\n</urlset>\n' | ||
) | ||
|
||
# Validate that there are 6 playlist sitemaps - 10 total playlist / 2 user per sitemap = 5 | ||
playlist_root = get_playlist_root(session, 2) | ||
assert ( | ||
playlist_root | ||
== b'<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n <sitemap>\n <loc>https://discoveryprovider.audius.co/sitemaps/playlist/1.xml</loc>\n </sitemap>\n <sitemap>\n <loc>https://discoveryprovider.audius.co/sitemaps/playlist/2.xml</loc>\n </sitemap>\n <sitemap>\n <loc>https://discoveryprovider.audius.co/sitemaps/playlist/3.xml</loc>\n </sitemap>\n <sitemap>\n <loc>https://discoveryprovider.audius.co/sitemaps/playlist/4.xml</loc>\n </sitemap>\n <sitemap>\n <loc>https://discoveryprovider.audius.co/sitemaps/playlist/5.xml</loc>\n </sitemap>\n</urlset>\n' | ||
) | ||
|
||
# Validate that there are 2 user sitemaps - 20 total user / 12 user per sitemap = 2 | ||
user_root = get_user_root(session, 12) | ||
assert ( | ||
user_root | ||
== b'<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n <sitemap>\n <loc>https://discoveryprovider.audius.co/sitemaps/user/1.xml</loc>\n </sitemap>\n <sitemap>\n <loc>https://discoveryprovider.audius.co/sitemaps/user/2.xml</loc>\n </sitemap>\n</urlset>\n' | ||
) | ||
|
||
# Validate that returns 6 track slugs | ||
track_page_1 = get_track_page(session, 1, 6) | ||
assert ( | ||
track_page_1 | ||
== b'<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n <url>\n <loc>https://discoveryprovider.audius.co/user_0/slug_0</loc>\n </url>\n <url>\n <loc>https://discoveryprovider.audius.co/user_1/slug_1</loc>\n </url>\n <url>\n <loc>https://discoveryprovider.audius.co/user_2/slug_2</loc>\n </url>\n <url>\n <loc>https://discoveryprovider.audius.co/user_3/slug_3</loc>\n </url>\n <url>\n <loc>https://discoveryprovider.audius.co/user_4/slug_4</loc>\n </url>\n <url>\n <loc>https://discoveryprovider.audius.co/user_5/slug_5</loc>\n </url>\n</urlset>\n' | ||
) | ||
|
||
# Validate that returns remained 4 track slugs and starts at 6 | ||
track_page_2 = get_track_page(session, 2, 6) | ||
assert ( | ||
track_page_2 | ||
== b'<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n <url>\n <loc>https://discoveryprovider.audius.co/user_6/slug_6</loc>\n </url>\n <url>\n <loc>https://discoveryprovider.audius.co/user_7/slug_7</loc>\n </url>\n <url>\n <loc>https://discoveryprovider.audius.co/user_8/slug_8</loc>\n </url>\n <url>\n <loc>https://discoveryprovider.audius.co/user_9/slug_9</loc>\n </url>\n</urlset>\n' | ||
) | ||
|
||
# Validate that returns all playlist(total of 10) | ||
playlist_page_1 = get_playlist_page(session, 1, 100) | ||
assert ( | ||
playlist_page_1 | ||
== b'<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n <url>\n <loc>https://discoveryprovider.audius.co/user_0/album/p_name_0-0</loc>\n </url>\n <url>\n <loc>https://discoveryprovider.audius.co/user_1/playlist/p_name_1-1</loc>\n </url>\n <url>\n <loc>https://discoveryprovider.audius.co/user_2/album/p_name_2-2</loc>\n </url>\n <url>\n <loc>https://discoveryprovider.audius.co/user_3/playlist/p_name_3-3</loc>\n </url>\n <url>\n <loc>https://discoveryprovider.audius.co/user_4/album/p_name_4-4</loc>\n </url>\n <url>\n <loc>https://discoveryprovider.audius.co/user_5/playlist/p_name_5-5</loc>\n </url>\n <url>\n <loc>https://discoveryprovider.audius.co/user_6/album/p_name_6-6</loc>\n </url>\n <url>\n <loc>https://discoveryprovider.audius.co/user_7/playlist/p_name_7-7</loc>\n </url>\n <url>\n <loc>https://discoveryprovider.audius.co/user_8/album/p_name_8-8</loc>\n </url>\n <url>\n <loc>https://discoveryprovider.audius.co/user_9/playlist/p_name_9-9</loc>\n </url>\n</urlset>\n' | ||
) | ||
|
||
# Validate that starts at user 0 and 8 user slugs | ||
user_page_1 = get_user_page(session, 1, 8) | ||
assert ( | ||
user_page_1 | ||
== b'<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n <url>\n <loc>https://discoveryprovider.audius.co/user_0</loc>\n </url>\n <url>\n <loc>https://discoveryprovider.audius.co/user_1</loc>\n </url>\n <url>\n <loc>https://discoveryprovider.audius.co/user_2</loc>\n </url>\n <url>\n <loc>https://discoveryprovider.audius.co/user_3</loc>\n </url>\n <url>\n <loc>https://discoveryprovider.audius.co/user_4</loc>\n </url>\n <url>\n <loc>https://discoveryprovider.audius.co/user_5</loc>\n </url>\n <url>\n <loc>https://discoveryprovider.audius.co/user_6</loc>\n </url>\n <url>\n <loc>https://discoveryprovider.audius.co/user_7</loc>\n </url>\n</urlset>\n' | ||
) | ||
|
||
# Validate that starts at user 8*1=8 and 8 user slugs | ||
user_page_2 = get_user_page(session, 2, 8) | ||
assert ( | ||
user_page_2 | ||
== b'<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n <url>\n <loc>https://discoveryprovider.audius.co/user_8</loc>\n </url>\n <url>\n <loc>https://discoveryprovider.audius.co/user_9</loc>\n </url>\n <url>\n <loc>https://discoveryprovider.audius.co/user_10</loc>\n </url>\n <url>\n <loc>https://discoveryprovider.audius.co/user_11</loc>\n </url>\n <url>\n <loc>https://discoveryprovider.audius.co/user_12</loc>\n </url>\n <url>\n <loc>https://discoveryprovider.audius.co/user_13</loc>\n </url>\n <url>\n <loc>https://discoveryprovider.audius.co/user_14</loc>\n </url>\n <url>\n <loc>https://discoveryprovider.audius.co/user_15</loc>\n </url>\n</urlset>\n' | ||
) | ||
|
||
# Validate that starts at user 8*2=16 and only 4 user slugs (The remainder with 8 max) | ||
user_page_3 = get_user_page(session, 3, 8) | ||
assert ( | ||
user_page_3 | ||
== b'<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n <url>\n <loc>https://discoveryprovider.audius.co/user_16</loc>\n </url>\n <url>\n <loc>https://discoveryprovider.audius.co/user_17</loc>\n </url>\n <url>\n <loc>https://discoveryprovider.audius.co/user_18</loc>\n </url>\n <url>\n <loc>https://discoveryprovider.audius.co/user_19</loc>\n </url>\n</urlset>\n' | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,224 @@ | ||
import logging | ||
from typing import List, Tuple | ||
|
||
from lxml import etree | ||
from sqlalchemy import asc, func | ||
from sqlalchemy.orm.session import Session | ||
from src.models.playlists.playlist import Playlist | ||
from src.models.tracks.track import Track | ||
from src.models.tracks.track_route import TrackRoute | ||
from src.models.users.user import User | ||
from src.utils.get_all_other_nodes import get_node_endpoint | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
root_site_maps_routes = [ | ||
"defaults.xml", | ||
"tracks/index.xml", | ||
"collections/index.xml", | ||
"users/index.xml", | ||
] | ||
|
||
|
||
def get_client_base_url(): | ||
return "https://audius.co" | ||
|
||
|
||
def set_base_url(): | ||
endpoint = get_node_endpoint() | ||
return endpoint | ||
|
||
|
||
def create_client_url(route): | ||
client_base = get_client_base_url() | ||
return f"{client_base}/{route}" | ||
|
||
|
||
def create_xml_url(route): | ||
self_base = set_base_url() | ||
return f"{self_base}/{route}" | ||
|
||
|
||
default_routes = [ | ||
# static | ||
"legal/privacy-policy", | ||
"legal/terms-of-use", | ||
"download", | ||
# app | ||
"feed", | ||
"trending", | ||
"explore", | ||
"explore/playlists", | ||
"explore/underground", | ||
"explore/top-albums", | ||
"explore/remixables", | ||
"explore/feeling-lucky", | ||
"explore/chill", | ||
"explore/upbeat", | ||
"explore/intense", | ||
"explore/provoking", | ||
"explore/intimate", | ||
"signup", | ||
"signin", | ||
] | ||
|
||
# The max number of urls that can be in a single sitemap | ||
LIMIT = 50_000 | ||
|
||
|
||
def build_default(): | ||
root = etree.Element("urlset", xmlns="http://www.sitemaps.org/schemas/sitemap/0.9") | ||
for site_map_route in default_routes: | ||
sitemap_el = etree.Element("sitemap") | ||
loc = etree.Element("loc") | ||
loc.text = create_client_url(site_map_route) | ||
sitemap_el.append(loc) | ||
root.append(sitemap_el) | ||
|
||
# pretty string | ||
return etree.tostring(root, pretty_print=True) | ||
|
||
|
||
def get_max_track_count(session: Session) -> int: | ||
max = ( | ||
session.query(func.count(Track.track_id)) | ||
.filter(Track.is_current == True, Track.stem_of == None) | ||
.one() | ||
) | ||
return max[0] | ||
|
||
|
||
def get_max_user_count(session: Session) -> int: | ||
max = ( | ||
session.query(func.count(User.user_id)) | ||
.filter(User.is_current == True, User.is_deactivated == False) | ||
.one() | ||
) | ||
return max[0] | ||
|
||
|
||
def get_max_playlist_count(session: Session) -> int: | ||
max = ( | ||
session.query(func.count(Playlist.playlist_id)) | ||
.filter( | ||
Playlist.is_current == True, | ||
Playlist.is_private == False, | ||
Playlist.is_delete == False, | ||
) | ||
.one() | ||
) | ||
return max[0] | ||
|
||
|
||
def get_dynamic_root(max: int, base_route: str, limit: int = LIMIT): | ||
num_pages = (max // limit) + 1 if max % limit != 0 else int(max / limit) | ||
root = etree.Element("urlset", xmlns="http://www.sitemaps.org/schemas/sitemap/0.9") | ||
for num in range(num_pages): | ||
sitemap_el = etree.Element("sitemap") | ||
loc = etree.Element("loc") | ||
loc.text = create_xml_url(f"sitemaps/{base_route}/{num+1}.xml") | ||
sitemap_el.append(loc) | ||
root.append(sitemap_el) | ||
|
||
return etree.tostring(root, pretty_print=True) | ||
|
||
|
||
def get_entity_page(slugs: List[str]): | ||
root = etree.Element("urlset", xmlns="http://www.sitemaps.org/schemas/sitemap/0.9") | ||
for slug in slugs: | ||
sitemap_url = etree.Element("url") | ||
loc = etree.Element("loc") | ||
loc.text = create_client_url(slug) | ||
sitemap_url.append(loc) | ||
root.append(sitemap_url) | ||
return etree.tostring(root, pretty_print=True) | ||
|
||
|
||
def get_track_slugs(session: Session, limit: int, offset: int): | ||
slugs: List[str] = ( | ||
session.query(User.handle_lc, TrackRoute.slug) | ||
.join(Track, TrackRoute.track_id == Track.track_id) | ||
.join(User, TrackRoute.owner_id == User.user_id) | ||
.filter( | ||
Track.is_current == True, Track.stem_of == None, User.is_current == True | ||
) | ||
.order_by(asc(Track.track_id)) | ||
.limit(limit) | ||
.offset(offset) | ||
.all() | ||
) | ||
|
||
return [f"{slug[0]}/{slug[1]}" for slug in slugs] | ||
|
||
|
||
def get_playlist_slugs(session: Session, limit: int, offset: int): | ||
playlists: List[Tuple[str, str, int, str]] = ( | ||
session.query( | ||
User.handle_lc, | ||
Playlist.playlist_name, | ||
Playlist.playlist_id, | ||
Playlist.is_album, | ||
) | ||
.join(User, User.user_id == Playlist.playlist_owner_id) | ||
.filter(Playlist.is_current == True, Playlist.is_private == False) | ||
.order_by(asc(Playlist.playlist_id)) | ||
.limit(limit) | ||
.offset(offset) | ||
.all() | ||
) | ||
slugs = [ | ||
f"{p[0]}/{'album' if p[3] else 'playlist'}/{p[1]}-{p[2]}" for p in playlists | ||
] | ||
|
||
return slugs | ||
|
||
|
||
def get_user_slugs(session: Session, limit: int, offset: int): | ||
slugs = ( | ||
session.query(User.handle_lc) | ||
.filter( | ||
User.is_current == True, | ||
User.is_deactivated == False, | ||
User.handle_lc != None, | ||
) | ||
.order_by(asc(User.user_id)) | ||
.limit(limit) | ||
.offset(offset) | ||
.all() | ||
) | ||
|
||
return [slug[0] for slug in slugs] | ||
|
||
|
||
def get_track_root(session: Session, limit: int = LIMIT): | ||
max_track_count = get_max_track_count(session) | ||
return get_dynamic_root(max_track_count, "track", limit) | ||
|
||
|
||
def get_playlist_root(session: Session, limit: int = LIMIT): | ||
max_track_count = get_max_playlist_count(session) | ||
return get_dynamic_root(max_track_count, "playlist", limit) | ||
|
||
|
||
def get_user_root(session: Session, limit: int = LIMIT): | ||
max_user_count = get_max_user_count(session) | ||
return get_dynamic_root(max_user_count, "user", limit) | ||
|
||
|
||
def get_track_page(session: Session, page: int, limit: int = LIMIT): | ||
offset = (page - 1) * limit | ||
slugs = get_track_slugs(session, limit, offset) | ||
return get_entity_page(slugs) | ||
|
||
|
||
def get_playlist_page(session: Session, page: int, limit: int = LIMIT): | ||
offset = (page - 1) * limit | ||
slugs = get_playlist_slugs(session, limit, offset) | ||
return get_entity_page(slugs) | ||
|
||
|
||
def get_user_page(session: Session, page: int, limit: int = LIMIT): | ||
offset = (page - 1) * limit | ||
slugs = get_user_slugs(session, limit, offset) | ||
return get_entity_page(slugs) |
Oops, something went wrong.