Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement youtube subscription import from Google takeout #452

Merged
merged 1 commit into from
Nov 3, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,126 +1,71 @@
package org.schabi.newpipe.extractor.services.youtube.extractors;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonParser;
import com.grack.nanojson.JsonParserException;

import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.services.youtube.YoutubeService;
import org.schabi.newpipe.extractor.subscription.SubscriptionExtractor;
import org.schabi.newpipe.extractor.subscription.SubscriptionItem;
import org.schabi.newpipe.extractor.utils.Parser;

import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

import javax.annotation.Nonnull;

import static org.schabi.newpipe.extractor.subscription.SubscriptionExtractor.ContentSource.INPUT_STREAM;

/**
* Extract subscriptions from a YouTube export (OPML format supported)
* Extract subscriptions from a Google takout export (the user has to get the JSON out of the zip)
*/
public class YoutubeSubscriptionExtractor extends SubscriptionExtractor {
private static final String BASE_CHANNEL_URL = "https://www.youtube.com/channel/";

public YoutubeSubscriptionExtractor(YoutubeService service) {
super(service, Collections.singletonList(INPUT_STREAM));
public YoutubeSubscriptionExtractor(final YoutubeService youtubeService) {
super(youtubeService, Collections.singletonList(INPUT_STREAM));
}

@Override
public String getRelatedUrl() {
return "https://www.youtube.com/subscription_manager?action_takeout=1";
return "https://takeout.google.com/takeout/custom/youtube";
}

@Override
public List<SubscriptionItem> fromInputStream(InputStream contentInputStream) throws ExtractionException {
if (contentInputStream == null) throw new InvalidSourceException("input stream is null");

return getItemsFromOPML(contentInputStream);
}

/*//////////////////////////////////////////////////////////////////////////
// OPML implementation
//////////////////////////////////////////////////////////////////////////*/

private static final String ID_PATTERN = "/videos.xml\\?channel_id=([A-Za-z0-9_-]*)";
private static final String BASE_CHANNEL_URL = "https://www.youtube.com/channel/";

private List<SubscriptionItem> getItemsFromOPML(InputStream contentInputStream) throws ExtractionException {
final List<SubscriptionItem> result = new ArrayList<>();

final String contentString = readFromInputStream(contentInputStream);
Document document = Jsoup.parse(contentString, "", org.jsoup.parser.Parser.xmlParser());

if (document.select("opml").isEmpty()) {
throw new InvalidSourceException("document does not have OPML tag");
}

if (document.select("outline").isEmpty()) {
throw new InvalidSourceException("document does not have at least one outline tag");
}

for (Element outline : document.select("outline[type=rss]")) {
String title = outline.attr("title");
String xmlUrl = outline.attr("abs:xmlUrl");

try {
String id = Parser.matchGroup1(ID_PATTERN, xmlUrl);
result.add(new SubscriptionItem(service.getServiceId(), BASE_CHANNEL_URL + id, title));
} catch (Parser.RegexException ignored) { /* ignore invalid subscriptions */ }
}

return result;
}

/*//////////////////////////////////////////////////////////////////////////
// Utils
//////////////////////////////////////////////////////////////////////////*/

/**
* Throws an exception if the string does not have the right tag/string from a valid export.
*/
private void throwIfTagIsNotFound(String content) throws InvalidSourceException {
if (!content.trim().contains("<opml")) {
throw new InvalidSourceException("input stream does not have OPML tag");
}
}

private String readFromInputStream(InputStream inputStream) throws InvalidSourceException {
StringBuilder contentBuilder = new StringBuilder();
boolean hasTag = false;
public List<SubscriptionItem> fromInputStream(@Nonnull final InputStream contentInputStream)
throws ExtractionException {
final JsonArray subscriptions;
try {
byte[] buffer = new byte[16 * 1024];
int read;
while ((read = inputStream.read(buffer)) != -1) {
String currentPartOfContent = new String(buffer, 0, read, "UTF-8");
contentBuilder.append(currentPartOfContent);
subscriptions = JsonParser.array().from(contentInputStream);
} catch (JsonParserException e) {
throw new InvalidSourceException("Invalid json input stream", e);
}

// Fail-fast in case of reading a long unsupported input stream
if (!hasTag && contentBuilder.length() > 128) {
throwIfTagIsNotFound(contentBuilder.toString());
hasTag = true;
}
boolean foundInvalidSubscription = false;
final List<SubscriptionItem> subscriptionItems = new ArrayList<>();
for (final Object subscriptionObject : subscriptions) {
if (!(subscriptionObject instanceof JsonObject)) {
foundInvalidSubscription = true;
continue;
}
} catch (InvalidSourceException e) {
throw e;
} catch (Throwable e) {
throw new InvalidSourceException(e);
} finally {
try {
inputStream.close();
} catch (IOException ignored) {

final JsonObject subscription = ((JsonObject) subscriptionObject).getObject("snippet");
final String id = subscription.getObject("resourceId").getString("channelId", "");
if (id.length() != 24) { // e.g. UCsXVk37bltHxD1rDPwtNM8Q
foundInvalidSubscription = true;
continue;
}
}

final String fileContent = contentBuilder.toString().trim();
if (fileContent.isEmpty()) {
throw new InvalidSourceException("Empty input stream");
subscriptionItems.add(new SubscriptionItem(service.getServiceId(),
BASE_CHANNEL_URL + id, subscription.getString("title", "")));
}

if (!hasTag) {
throwIfTagIsNotFound(fileContent);
if (foundInvalidSubscription && subscriptionItems.isEmpty()) {
throw new InvalidSourceException("Found only invalid channel ids");
}

return fileContent;
return subscriptionItems;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.io.IOException;
import java.io.InputStream;
Expand Down Expand Up @@ -71,8 +72,9 @@ public List<SubscriptionItem> fromChannelUrl(String channelUrl) throws IOExcepti
*
* @throws InvalidSourceException when the content read from the InputStream is invalid and can not be parsed
*/
@SuppressWarnings("RedundantThrows")
public List<SubscriptionItem> fromInputStream(InputStream contentInputStream) throws IOException, ExtractionException {
throw new UnsupportedOperationException("Service " + service.getServiceInfo().getName() + " doesn't support extracting from an InputStream");
public List<SubscriptionItem> fromInputStream(@Nonnull final InputStream contentInputStream)
throws ExtractionException {
throw new UnsupportedOperationException("Service " + service.getServiceInfo().getName()
+ " doesn't support extracting from an InputStream");
}
}
15 changes: 15 additions & 0 deletions extractor/src/test/java/org/schabi/newpipe/FileUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,21 @@ private static void writeFile(final String filename, final String content) throw
writer.close();
}

/**
* Resolves the test resource file based on its filename. Looks in
* {@code extractor/src/test/resources/} and {@code src/test/resources/}
* @param filename the resource filename
* @return the resource file
*/
public static File resolveTestResource(final String filename) {
final File file = new File("extractor/src/test/resources/" + filename);
if (file.exists()) {
return file;
} else {
return new File("src/test/resources/" + filename);
}
}

/**
* Convert a JSON object to String
* toString() does not produce a valid JSON string
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,16 @@
import org.schabi.newpipe.extractor.subscription.SubscriptionItem;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.List;

import static org.junit.Assert.*;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import static org.schabi.newpipe.FileUtils.resolveTestResource;

/**
* Test for {@link YoutubeSubscriptionExtractor}
Expand All @@ -34,81 +38,75 @@ public static void setupClass() {

@Test
public void testFromInputStream() throws Exception {
File testFile = new File("extractor/src/test/resources/youtube_export_test.xml");
if (!testFile.exists()) testFile = new File("src/test/resources/youtube_export_test.xml");
final List<SubscriptionItem> subscriptionItems = subscriptionExtractor.fromInputStream(
new FileInputStream(resolveTestResource("youtube_takeout_import_test.json")));
assertEquals(7, subscriptionItems.size());

List<SubscriptionItem> subscriptionItems = subscriptionExtractor.fromInputStream(new FileInputStream(testFile));
assertTrue("List doesn't have exactly 8 items (had " + subscriptionItems.size() + ")", subscriptionItems.size() == 8);

for (SubscriptionItem item : subscriptionItems) {
for (final SubscriptionItem item : subscriptionItems) {
assertNotNull(item.getName());
assertNotNull(item.getUrl());
assertTrue(urlHandler.acceptUrl(item.getUrl()));
assertFalse(item.getServiceId() == -1);
assertEquals(ServiceList.YouTube.getServiceId(), item.getServiceId());
}
}

@Test
public void testEmptySourceException() throws Exception {
String emptySource = "<opml version=\"1.1\"><body>" +
"<outline text=\"Testing\" title=\"123\" />" +
"</body></opml>";

List<SubscriptionItem> items = subscriptionExtractor.fromInputStream(new ByteArrayInputStream(emptySource.getBytes("UTF-8")));
final List<SubscriptionItem> items = subscriptionExtractor.fromInputStream(
new ByteArrayInputStream("[]".getBytes(StandardCharsets.UTF_8)));
assertTrue(items.isEmpty());
}

@Test
public void testSubscriptionWithEmptyTitleInSource() throws Exception {
String channelId = "AA0AaAa0AaaaAAAAAA0aa0AA";
String source = "<opml version=\"1.1\"><body><outline text=\"YouTube Subscriptions\" title=\"YouTube Subscriptions\">" +
"<outline text=\"\" title=\"\" type=\"rss\" xmlUrl=\"https://www.youtube.com/feeds/videos.xml?channel_id=" + channelId + "\" />" +
"</outline></body></opml>";

List<SubscriptionItem> items = subscriptionExtractor.fromInputStream(new ByteArrayInputStream(source.getBytes("UTF-8")));
assertTrue("List doesn't have exactly 1 item (had " + items.size() + ")", items.size() == 1);
assertTrue("Item does not have an empty title (had \"" + items.get(0).getName() + "\")", items.get(0).getName().isEmpty());
assertTrue("Item does not have the right channel id \"" + channelId + "\" (the whole url is \"" + items.get(0).getUrl() + "\")", items.get(0).getUrl().endsWith(channelId));
final String source = "[{\"snippet\":{\"resourceId\":{\"channelId\":\"UCEOXxzW2vU0P-0THehuIIeg\"}}}]";
final List<SubscriptionItem> items = subscriptionExtractor.fromInputStream(
new ByteArrayInputStream(source.getBytes(StandardCharsets.UTF_8)));

assertEquals(1, items.size());
assertEquals(ServiceList.YouTube.getServiceId(), items.get(0).getServiceId());
assertEquals("https://www.youtube.com/channel/UCEOXxzW2vU0P-0THehuIIeg", items.get(0).getUrl());
assertEquals("", items.get(0).getName());
}

@Test
public void testSubscriptionWithInvalidUrlInSource() throws Exception {
String source = "<opml version=\"1.1\"><body><outline text=\"YouTube Subscriptions\" title=\"YouTube Subscriptions\">" +
"<outline text=\"invalid\" title=\"url\" type=\"rss\" xmlUrl=\"https://www.youtube.com/feeds/videos.xml?channel_not_id=|||||||\"/>" +
"<outline text=\"fail\" title=\"fail\" type=\"rss\" xmlUgrl=\"invalidTag\"/>" +
"<outline text=\"invalid\" title=\"url\" type=\"rss\" xmlUrl=\"\"/>" +
"<outline text=\"\" title=\"\" type=\"rss\" xmlUrl=\"\"/>" +
"</outline></body></opml>";

List<SubscriptionItem> items = subscriptionExtractor.fromInputStream(new ByteArrayInputStream(source.getBytes("UTF-8")));
assertTrue(items.isEmpty());
final String source = "[{\"snippet\":{\"resourceId\":{\"channelId\":\"gibberish\"},\"title\":\"name1\"}}," +
"{\"snippet\":{\"resourceId\":{\"channelId\":\"UCEOXxzW2vU0P-0THehuIIeg\"},\"title\":\"name2\"}}]";
final List<SubscriptionItem> items = subscriptionExtractor.fromInputStream(
new ByteArrayInputStream(source.getBytes(StandardCharsets.UTF_8)));

assertEquals(1, items.size());
assertEquals(ServiceList.YouTube.getServiceId(), items.get(0).getServiceId());
assertEquals("https://www.youtube.com/channel/UCEOXxzW2vU0P-0THehuIIeg", items.get(0).getUrl());
assertEquals("name2", items.get(0).getName());
}

@Test
public void testInvalidSourceException() {
List<String> invalidList = Arrays.asList(
"<xml><notvalid></notvalid></xml>",
"<opml><notvalid></notvalid></opml>",
"<opml><body></body></opml>",
"{\"a\":\"b\"}",
"[{}]",
"[\"\", 5]",
"[{\"snippet\":{\"title\":\"name\"}}]",
"[{\"snippet\":{\"resourceId\":{\"channelId\":\"gibberish\"}}}]",
"",
null,
"\uD83D\uDC28\uD83D\uDC28\uD83D\uDC28",
"gibberish");

for (String invalidContent : invalidList) {
try {
if (invalidContent != null) {
byte[] bytes = invalidContent.getBytes("UTF-8");
subscriptionExtractor.fromInputStream(new ByteArrayInputStream(bytes));
fail("Extracting from \"" + invalidContent + "\" didn't throw an exception");
} else {
subscriptionExtractor.fromInputStream(null);
fail("Extracting from null String didn't throw an exception");
byte[] bytes = invalidContent.getBytes(StandardCharsets.UTF_8);
subscriptionExtractor.fromInputStream(new ByteArrayInputStream(bytes));
fail("Extracting from \"" + invalidContent + "\" didn't throw an exception");
} catch (final Exception e) {
boolean correctType = e instanceof SubscriptionExtractor.InvalidSourceException;
if (!correctType) {
e.printStackTrace();
}
} catch (Exception e) {
// System.out.println(" -> " + e);
boolean isExpectedException = e instanceof SubscriptionExtractor.InvalidSourceException;
assertTrue("\"" + e.getClass().getSimpleName() + "\" is not the expected exception", isExpectedException);
assertTrue(e.getClass().getSimpleName() + " is not InvalidSourceException", correctType);
}
}
}
Expand Down
23 changes: 0 additions & 23 deletions extractor/src/test/resources/youtube_export_test.xml

This file was deleted.

Loading