Skip to content

Commit

Permalink
De-duplicate also if episodes have different but similar media type (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
ByteHamster committed Nov 26, 2023
1 parent 95f431f commit 6177cc2
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ public static boolean seemDuplicates(FeedItem item1, FeedItem item2) {
return titlesLookSimilar(item1, item2)
&& datesLookSimilar(item1, item2)
&& durationsLookSimilar(media1, media2)
&& TextUtils.equals(media1.getMime_type(), media2.getMime_type());
&& mimeTypeLooksSimilar(media1, media2);
}

private static boolean sameAndNotEmpty(String string1, String string2) {
Expand All @@ -52,6 +52,19 @@ private static boolean durationsLookSimilar(FeedMedia media1, FeedMedia media2)
return Math.abs(media1.getDuration() - media2.getDuration()) < 10 * 60L * 1000L;
}

private static boolean mimeTypeLooksSimilar(FeedMedia media1, FeedMedia media2) {
String mimeType1 = media1.getMime_type();
String mimeType2 = media2.getMime_type();
if (mimeType1 == null || mimeType2 == null) {
return true;
}
if (mimeType1.contains("/") && mimeType2.contains("/")) {
mimeType1 = mimeType1.substring(0, mimeType1.indexOf("/"));
mimeType2 = mimeType2.substring(0, mimeType2.indexOf("/"));
}
return TextUtils.equals(mimeType1, mimeType2);
}

private static boolean titlesLookSimilar(FeedItem item1, FeedItem item2) {
return sameAndNotEmpty(canonicalizeTitle(item1.getTitle()), canonicalizeTitle(item2.getTitle()));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@ public void testOtherAttributes() {
assertFalse(FeedItemDuplicateGuesser.seemDuplicates(
item("id1", "Title", "example.com/episode1", 10, 5 * MINUTES, "audio/*"),
item("id2", "Title", "example.com/episode2", 10, 5 * MINUTES, "video/*")));
assertTrue(FeedItemDuplicateGuesser.seemDuplicates(
item("id1", "Title", "example.com/episode1", 10, 5 * MINUTES, "audio/mpeg"),
item("id2", "Title", "example.com/episode2", 10, 5 * MINUTES, "audio/mp3")));
assertFalse(FeedItemDuplicateGuesser.seemDuplicates(
item("id1", "Title", "example.com/episode1", 5 * DAYS, 5 * MINUTES, "audio/*"),
item("id2", "Title", "example.com/episode2", 2 * DAYS, 5 * MINUTES, "audio/*")));
Expand Down

0 comments on commit 6177cc2

Please sign in to comment.