fix: long decription not found robustness

Morriz · May 24, 2024 · 66a2318 · 66a2318
1 parent ee3a208
commit 66a2318
Show file tree

Hide file tree

Showing 3 changed files with 200 additions and 208 deletions.
diff --git a/api/main.py b/api/main.py
@@ -45,38 +45,38 @@ async def search_media(
 @app.get("/media-videos", response_model=List[Video])
 async def get_youtube_search(
     query: Annotated[
-        Optional[str],
+        str,
         Query(
             title="Query string",
             description="Query string used to match independent news channels and do a youtube search with in those channels.",
             min_length=3,
-            example="@aljazeeraenglish,@DemocracyNow",
+            example="israel",
         ),
     ] = None,
     channels: Annotated[
-        Optional[str],
+        str,
         Query(
             title="Channels to search in",
             description="A string of comma-separated Youtube channels to search in.",
             example="@aljazeeraenglish,@DemocracyNow",
         ),
     ] = None,
     period_days: Annotated[
-        Optional[int],
+        int,
         Query(
             title="Period in days",
             description="The period in days since now that we want to search videos for.",
         ),
     ] = 3,
     max_channels: Annotated[
-        Optional[int],
+        int,
         Query(
             title="Max channels",
             description="Maximum number of channels that we want to match. Needed when no channels were provided.",
         ),
     ] = 12,
     max_videos_per_channel: Annotated[
-        Optional[int],
+        int,
         Query(
             title="Max videos per channel",
             description="The maximum number of videos per channel that we want from each channel search.",

diff --git a/api/youtube.py b/api/youtube.py
@@ -118,29 +118,32 @@ def _parse_html_list(html: str, max_results: int) -> List[Video]:
 
 
 def _parse_html_video(html: str) -> Dict[str, str]:
-    result: Dict[str, str] = {}
+    result: Dict[str, str] = {"long_desc": None}
     start = html.index("ytInitialData") + len("ytInitialData") + 3
     end = html.index("};", start) + 1
     json_str = html[start:end]
     data = json.loads(json_str)
     obj = munchify(data)
-    result["long_desc"] = (
-        obj.contents.twoColumnWatchNextResults.results.results.contents[
-            1
-        ].videoSecondaryInfoRenderer.attributedDescription.content
-    )
+    try:
+        result["long_desc"] = (
+            obj.contents.twoColumnWatchNextResults.results.results.contents[
+                1
+            ].videoSecondaryInfoRenderer.attributedDescription.content
+        )
+    except:
+        pass
     return result
 
 
 @async_threadsafe_ttl_cache(ttl=3600)
 async def youtube_search(
+    query: str = None,
     channels: str = None,
-    get_descriptions: bool = False,
-    get_transcripts: bool = False,
+    period_days: int = 3,
     max_channels: int = None,
     max_videos_per_channel: int = 3,
-    period_days: int = 3,
-    query: str = None,
+    get_descriptions: bool = False,
+    get_transcripts: bool = False,
 ) -> List[Video]:
     if channels:
         channels_arr = [