Skip to content

Commit

Permalink
Implement youtube subscription import from Google takeout
Browse files Browse the repository at this point in the history
  • Loading branch information
Stypox committed Nov 1, 2020
1 parent 2f02c0e commit bfdb1d1
Show file tree
Hide file tree
Showing 6 changed files with 307 additions and 160 deletions.
Original file line number Diff line number Diff line change
@@ -1,126 +1,71 @@
package org.schabi.newpipe.extractor.services.youtube.extractors;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonParser;
import com.grack.nanojson.JsonParserException;

import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.services.youtube.YoutubeService;
import org.schabi.newpipe.extractor.subscription.SubscriptionExtractor;
import org.schabi.newpipe.extractor.subscription.SubscriptionItem;
import org.schabi.newpipe.extractor.utils.Parser;

import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

import javax.annotation.Nonnull;

import static org.schabi.newpipe.extractor.subscription.SubscriptionExtractor.ContentSource.INPUT_STREAM;

/**
* Extract subscriptions from a YouTube export (OPML format supported)
* Extract subscriptions from a Google takout export (the user has to get the JSON out of the zip)
*/
public class YoutubeSubscriptionExtractor extends SubscriptionExtractor {
private static final String BASE_CHANNEL_URL = "https://www.youtube.com/channel/";

public YoutubeSubscriptionExtractor(YoutubeService service) {
super(service, Collections.singletonList(INPUT_STREAM));
public YoutubeSubscriptionExtractor(final YoutubeService youtubeService) {
super(youtubeService, Collections.singletonList(INPUT_STREAM));
}

@Override
public String getRelatedUrl() {
return "https://www.youtube.com/subscription_manager?action_takeout=1";
return "https://takeout.google.com/takeout/custom/youtube?continue=https%3A%2F%2Fmyaccount.google.com%2Fyourdata%2Fyoutube%3Fhl%3Den_GB%26authuser%3D0";
}

@Override
public List<SubscriptionItem> fromInputStream(InputStream contentInputStream) throws ExtractionException {
if (contentInputStream == null) throw new InvalidSourceException("input stream is null");

return getItemsFromOPML(contentInputStream);
}

/*//////////////////////////////////////////////////////////////////////////
// OPML implementation
//////////////////////////////////////////////////////////////////////////*/

private static final String ID_PATTERN = "/videos.xml\\?channel_id=([A-Za-z0-9_-]*)";
private static final String BASE_CHANNEL_URL = "https://www.youtube.com/channel/";

private List<SubscriptionItem> getItemsFromOPML(InputStream contentInputStream) throws ExtractionException {
final List<SubscriptionItem> result = new ArrayList<>();

final String contentString = readFromInputStream(contentInputStream);
Document document = Jsoup.parse(contentString, "", org.jsoup.parser.Parser.xmlParser());

if (document.select("opml").isEmpty()) {
throw new InvalidSourceException("document does not have OPML tag");
}

if (document.select("outline").isEmpty()) {
throw new InvalidSourceException("document does not have at least one outline tag");
}

for (Element outline : document.select("outline[type=rss]")) {
String title = outline.attr("title");
String xmlUrl = outline.attr("abs:xmlUrl");

try {
String id = Parser.matchGroup1(ID_PATTERN, xmlUrl);
result.add(new SubscriptionItem(service.getServiceId(), BASE_CHANNEL_URL + id, title));
} catch (Parser.RegexException ignored) { /* ignore invalid subscriptions */ }
}

return result;
}

/*//////////////////////////////////////////////////////////////////////////
// Utils
//////////////////////////////////////////////////////////////////////////*/

/**
* Throws an exception if the string does not have the right tag/string from a valid export.
*/
private void throwIfTagIsNotFound(String content) throws InvalidSourceException {
if (!content.trim().contains("<opml")) {
throw new InvalidSourceException("input stream does not have OPML tag");
}
}

private String readFromInputStream(InputStream inputStream) throws InvalidSourceException {
StringBuilder contentBuilder = new StringBuilder();
boolean hasTag = false;
public List<SubscriptionItem> fromInputStream(@Nonnull final InputStream contentInputStream)
throws ExtractionException {
final JsonArray subscriptions;
try {
byte[] buffer = new byte[16 * 1024];
int read;
while ((read = inputStream.read(buffer)) != -1) {
String currentPartOfContent = new String(buffer, 0, read, "UTF-8");
contentBuilder.append(currentPartOfContent);
subscriptions = JsonParser.array().from(contentInputStream);
} catch (JsonParserException e) {
throw new InvalidSourceException("Invalid json input stream", e);
}

// Fail-fast in case of reading a long unsupported input stream
if (!hasTag && contentBuilder.length() > 128) {
throwIfTagIsNotFound(contentBuilder.toString());
hasTag = true;
}
boolean foundInvalidSubscription = false;
final List<SubscriptionItem> subscriptionItems = new ArrayList<>();
for (final Object subscriptionObject : subscriptions) {
if (!(subscriptionObject instanceof JsonObject)) {
foundInvalidSubscription = true;
continue;
}
} catch (InvalidSourceException e) {
throw e;
} catch (Throwable e) {
throw new InvalidSourceException(e);
} finally {
try {
inputStream.close();
} catch (IOException ignored) {

final JsonObject subscription = ((JsonObject) subscriptionObject).getObject("snippet");
final String id = subscription.getObject("resourceId").getString("channelId", "");
if (id.length() != 24) { // e.g. UCsXVk37bltHxD1rDPwtNM8Q
foundInvalidSubscription = true;
continue;
}
}

final String fileContent = contentBuilder.toString().trim();
if (fileContent.isEmpty()) {
throw new InvalidSourceException("Empty input stream");
subscriptionItems.add(new SubscriptionItem(service.getServiceId(),
BASE_CHANNEL_URL + id, subscription.getString("title", "")));
}

if (!hasTag) {
throwIfTagIsNotFound(fileContent);
if (foundInvalidSubscription && subscriptionItems.isEmpty()) {
throw new InvalidSourceException("Found only invalid channel ids");
}

return fileContent;
return subscriptionItems;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.io.IOException;
import java.io.InputStream;
Expand Down Expand Up @@ -71,8 +72,9 @@ public List<SubscriptionItem> fromChannelUrl(String channelUrl) throws IOExcepti
*
* @throws InvalidSourceException when the content read from the InputStream is invalid and can not be parsed
*/
@SuppressWarnings("RedundantThrows")
public List<SubscriptionItem> fromInputStream(InputStream contentInputStream) throws IOException, ExtractionException {
throw new UnsupportedOperationException("Service " + service.getServiceInfo().getName() + " doesn't support extracting from an InputStream");
public List<SubscriptionItem> fromInputStream(@Nonnull final InputStream contentInputStream)
throws ExtractionException {
throw new UnsupportedOperationException("Service " + service.getServiceInfo().getName()
+ " doesn't support extracting from an InputStream");
}
}
15 changes: 15 additions & 0 deletions extractor/src/test/java/org/schabi/newpipe/FileUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,21 @@ private static void writeFile(final String filename, final String content) throw
writer.close();
}

/**
* Resolves the test resource file based on its filename. Looks in
* {@code extractor/src/test/resources/} and {@code src/test/resources/}
* @param filename the resource filename
* @return the resource file
*/
public static File resolveTestResource(final String filename) {
final File file = new File("extractor/src/test/resources/" + filename);
if (file.exists()) {
return file;
} else {
return new File("src/test/resources/" + filename);
}
}

/**
* Convert a JSON object to String
* toString() does not produce a valid JSON string
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import org.junit.BeforeClass;
import org.junit.Test;
import org.schabi.newpipe.DownloaderTestImpl;
import org.schabi.newpipe.FileUtils;
import org.schabi.newpipe.extractor.NewPipe;
import org.schabi.newpipe.extractor.ServiceList;
import org.schabi.newpipe.extractor.linkhandler.LinkHandlerFactory;
Expand All @@ -13,10 +14,12 @@
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.List;

import static org.junit.Assert.*;
import static org.schabi.newpipe.FileUtils.resolveTestResource;

/**
* Test for {@link YoutubeSubscriptionExtractor}
Expand All @@ -34,81 +37,75 @@ public static void setupClass() {

@Test
public void testFromInputStream() throws Exception {
File testFile = new File("extractor/src/test/resources/youtube_export_test.xml");
if (!testFile.exists()) testFile = new File("src/test/resources/youtube_export_test.xml");
final List<SubscriptionItem> subscriptionItems = subscriptionExtractor.fromInputStream(
new FileInputStream(resolveTestResource("youtube_takeout_import_test.json")));
assertEquals(7, subscriptionItems.size());

List<SubscriptionItem> subscriptionItems = subscriptionExtractor.fromInputStream(new FileInputStream(testFile));
assertTrue("List doesn't have exactly 8 items (had " + subscriptionItems.size() + ")", subscriptionItems.size() == 8);

for (SubscriptionItem item : subscriptionItems) {
for (final SubscriptionItem item : subscriptionItems) {
assertNotNull(item.getName());
assertNotNull(item.getUrl());
assertTrue(urlHandler.acceptUrl(item.getUrl()));
assertFalse(item.getServiceId() == -1);
assertEquals(ServiceList.YouTube.getServiceId(), item.getServiceId());
}
}

@Test
public void testEmptySourceException() throws Exception {
String emptySource = "<opml version=\"1.1\"><body>" +
"<outline text=\"Testing\" title=\"123\" />" +
"</body></opml>";

List<SubscriptionItem> items = subscriptionExtractor.fromInputStream(new ByteArrayInputStream(emptySource.getBytes("UTF-8")));
final List<SubscriptionItem> items = subscriptionExtractor.fromInputStream(
new ByteArrayInputStream("[]".getBytes(StandardCharsets.UTF_8)));
assertTrue(items.isEmpty());
}

@Test
public void testSubscriptionWithEmptyTitleInSource() throws Exception {
String channelId = "AA0AaAa0AaaaAAAAAA0aa0AA";
String source = "<opml version=\"1.1\"><body><outline text=\"YouTube Subscriptions\" title=\"YouTube Subscriptions\">" +
"<outline text=\"\" title=\"\" type=\"rss\" xmlUrl=\"https://www.youtube.com/feeds/videos.xml?channel_id=" + channelId + "\" />" +
"</outline></body></opml>";

List<SubscriptionItem> items = subscriptionExtractor.fromInputStream(new ByteArrayInputStream(source.getBytes("UTF-8")));
assertTrue("List doesn't have exactly 1 item (had " + items.size() + ")", items.size() == 1);
assertTrue("Item does not have an empty title (had \"" + items.get(0).getName() + "\")", items.get(0).getName().isEmpty());
assertTrue("Item does not have the right channel id \"" + channelId + "\" (the whole url is \"" + items.get(0).getUrl() + "\")", items.get(0).getUrl().endsWith(channelId));
final String source = "[{\"snippet\":{\"resourceId\":{\"channelId\":\"UCEOXxzW2vU0P-0THehuIIeg\"}}}]";
final List<SubscriptionItem> items = subscriptionExtractor.fromInputStream(
new ByteArrayInputStream(source.getBytes(StandardCharsets.UTF_8)));

assertEquals(1, items.size());
assertEquals(ServiceList.YouTube.getServiceId(), items.get(0).getServiceId());
assertEquals("https://www.youtube.com/channel/UCEOXxzW2vU0P-0THehuIIeg", items.get(0).getUrl());
assertEquals("", items.get(0).getName());
}

@Test
public void testSubscriptionWithInvalidUrlInSource() throws Exception {
String source = "<opml version=\"1.1\"><body><outline text=\"YouTube Subscriptions\" title=\"YouTube Subscriptions\">" +
"<outline text=\"invalid\" title=\"url\" type=\"rss\" xmlUrl=\"https://www.youtube.com/feeds/videos.xml?channel_not_id=|||||||\"/>" +
"<outline text=\"fail\" title=\"fail\" type=\"rss\" xmlUgrl=\"invalidTag\"/>" +
"<outline text=\"invalid\" title=\"url\" type=\"rss\" xmlUrl=\"\"/>" +
"<outline text=\"\" title=\"\" type=\"rss\" xmlUrl=\"\"/>" +
"</outline></body></opml>";

List<SubscriptionItem> items = subscriptionExtractor.fromInputStream(new ByteArrayInputStream(source.getBytes("UTF-8")));
assertTrue(items.isEmpty());
final String source = "[{\"snippet\":{\"resourceId\":{\"channelId\":\"gibberish\"},\"title\":\"name1\"}}," +
"{\"snippet\":{\"resourceId\":{\"channelId\":\"UCEOXxzW2vU0P-0THehuIIeg\"},\"title\":\"name2\"}}]";
final List<SubscriptionItem> items = subscriptionExtractor.fromInputStream(
new ByteArrayInputStream(source.getBytes(StandardCharsets.UTF_8)));

assertEquals(1, items.size());
assertEquals(ServiceList.YouTube.getServiceId(), items.get(0).getServiceId());
assertEquals("https://www.youtube.com/channel/UCEOXxzW2vU0P-0THehuIIeg", items.get(0).getUrl());
assertEquals("name2", items.get(0).getName());
}

@Test
public void testInvalidSourceException() {
List<String> invalidList = Arrays.asList(
"<xml><notvalid></notvalid></xml>",
"<opml><notvalid></notvalid></opml>",
"<opml><body></body></opml>",
"{\"a\":\"b\"}",
"[{}]",
"[\"\", 5]",
"[{\"snippet\":{\"title\":\"name\"}}]",
"[{\"snippet\":{\"resourceId\":{\"channelId\":\"gibberish\"}}}]",
"",
null,
"\uD83D\uDC28\uD83D\uDC28\uD83D\uDC28",
"gibberish");

for (String invalidContent : invalidList) {
try {
if (invalidContent != null) {
byte[] bytes = invalidContent.getBytes("UTF-8");
subscriptionExtractor.fromInputStream(new ByteArrayInputStream(bytes));
fail("Extracting from \"" + invalidContent + "\" didn't throw an exception");
} else {
subscriptionExtractor.fromInputStream(null);
fail("Extracting from null String didn't throw an exception");
byte[] bytes = invalidContent.getBytes(StandardCharsets.UTF_8);
subscriptionExtractor.fromInputStream(new ByteArrayInputStream(bytes));
fail("Extracting from \"" + invalidContent + "\" didn't throw an exception");
} catch (final Exception e) {
boolean correctType = e instanceof SubscriptionExtractor.InvalidSourceException;
if (!correctType) {
e.printStackTrace();
}
} catch (Exception e) {
// System.out.println(" -> " + e);
boolean isExpectedException = e instanceof SubscriptionExtractor.InvalidSourceException;
assertTrue("\"" + e.getClass().getSimpleName() + "\" is not the expected exception", isExpectedException);
assertTrue(e.getClass().getSimpleName() + " is not InvalidSourceException", correctType);
}
}
}
Expand Down
23 changes: 0 additions & 23 deletions extractor/src/test/resources/youtube_export_test.xml

This file was deleted.

Loading

0 comments on commit bfdb1d1

Please sign in to comment.