Identify article using only the first 400 chars of its ID

Some feeds, like Google News, have very long article IDs because they contain base64 encoded information. 400 chars seems enough to avoid duplicates while maintaining a reasonably sized index.
NicolasLM · May 6, 2019 · a810953 · a810953
1 parent 100a2c7
commit a810953
Showing 1 changed file with 2 additions and 2 deletions.
diff --git a/reader/tasks.py b/reader/tasks.py
@@ -126,7 +126,7 @@ def synchronize_parsed_feed(feed: models.Feed, parsed_feed: ParsedFeed):
     articles_to_uncache = list()
     existing_articles = (
         models.Article.objects.filter(feed=feed)
-        .filter(id_in_feed__in={a.id for a in parsed_feed.articles})
+        .filter(id_in_feed__in={a.id[:400] for a in parsed_feed.articles})
         .select_related('feed')
         .prefetch_related('attachment_set')
     )
@@ -136,7 +136,7 @@ def synchronize_parsed_feed(feed: models.Feed, parsed_feed: ParsedFeed):
         article, created, modified = create_or_update_if_needed(
             models.Article,
             existing_articles,
-            id_in_feed=parsed_article.id,
+            id_in_feed=parsed_article.id[:400],
             feed=feed,
             defaults={
                 'uri': parsed_article.link or '',