Skip to content

Commit

Permalink
fix: long decription not found robustness
Browse files Browse the repository at this point in the history
  • Loading branch information
Morriz committed May 24, 2024
1 parent ee3a208 commit 66a2318
Show file tree
Hide file tree
Showing 3 changed files with 200 additions and 208 deletions.
12 changes: 6 additions & 6 deletions api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,38 +45,38 @@ async def search_media(
@app.get("/media-videos", response_model=List[Video])
async def get_youtube_search(
query: Annotated[
Optional[str],
str,
Query(
title="Query string",
description="Query string used to match independent news channels and do a youtube search with in those channels.",
min_length=3,
example="@aljazeeraenglish,@DemocracyNow",
example="israel",
),
] = None,
channels: Annotated[
Optional[str],
str,
Query(
title="Channels to search in",
description="A string of comma-separated Youtube channels to search in.",
example="@aljazeeraenglish,@DemocracyNow",
),
] = None,
period_days: Annotated[
Optional[int],
int,
Query(
title="Period in days",
description="The period in days since now that we want to search videos for.",
),
] = 3,
max_channels: Annotated[
Optional[int],
int,
Query(
title="Max channels",
description="Maximum number of channels that we want to match. Needed when no channels were provided.",
),
] = 12,
max_videos_per_channel: Annotated[
Optional[int],
int,
Query(
title="Max videos per channel",
description="The maximum number of videos per channel that we want from each channel search.",
Expand Down
23 changes: 13 additions & 10 deletions api/youtube.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,29 +118,32 @@ def _parse_html_list(html: str, max_results: int) -> List[Video]:


def _parse_html_video(html: str) -> Dict[str, str]:
result: Dict[str, str] = {}
result: Dict[str, str] = {"long_desc": None}
start = html.index("ytInitialData") + len("ytInitialData") + 3
end = html.index("};", start) + 1
json_str = html[start:end]
data = json.loads(json_str)
obj = munchify(data)
result["long_desc"] = (
obj.contents.twoColumnWatchNextResults.results.results.contents[
1
].videoSecondaryInfoRenderer.attributedDescription.content
)
try:
result["long_desc"] = (
obj.contents.twoColumnWatchNextResults.results.results.contents[
1
].videoSecondaryInfoRenderer.attributedDescription.content
)
except:
pass
return result


@async_threadsafe_ttl_cache(ttl=3600)
async def youtube_search(
query: str = None,
channels: str = None,
get_descriptions: bool = False,
get_transcripts: bool = False,
period_days: int = 3,
max_channels: int = None,
max_videos_per_channel: int = 3,
period_days: int = 3,
query: str = None,
get_descriptions: bool = False,
get_transcripts: bool = False,
) -> List[Video]:
if channels:
channels_arr = [
Expand Down
Loading

0 comments on commit 66a2318

Please sign in to comment.