Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for Google search redirect url #362

Merged
merged 3 commits into from
Oct 31, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import org.schabi.newpipe.extractor.stream.StreamExtractor;
import org.schabi.newpipe.extractor.subscription.SubscriptionExtractor;
import org.schabi.newpipe.extractor.suggestion.SuggestionExtractor;
import org.schabi.newpipe.extractor.utils.Utils;

import javax.annotation.Nullable;
import java.util.Collections;
Expand Down Expand Up @@ -277,18 +278,19 @@ public CommentsExtractor getCommentsExtractor(String url) throws ExtractionExcep
* Figures out where the link is pointing to (a channel, a video, a playlist, etc.)
* @param url the url on which it should be decided of which link type it is
* @return the link type of url
* @throws ParsingException
*/
public final LinkType getLinkTypeByUrl(String url) throws ParsingException {
LinkHandlerFactory sH = getStreamLHFactory();
LinkHandlerFactory cH = getChannelLHFactory();
LinkHandlerFactory pH = getPlaylistLHFactory();
public final LinkType getLinkTypeByUrl(final String url) throws ParsingException {
final String polishedUrl = Utils.followGoogleRedirectIfNeeded(url);

if (sH != null && sH.acceptUrl(url)) {
final LinkHandlerFactory sH = getStreamLHFactory();
final LinkHandlerFactory cH = getChannelLHFactory();
final LinkHandlerFactory pH = getPlaylistLHFactory();

if (sH != null && sH.acceptUrl(polishedUrl)) {
return LinkType.STREAM;
} else if (cH != null && cH.acceptUrl(url)) {
} else if (cH != null && cH.acceptUrl(polishedUrl)) {
return LinkType.CHANNEL;
} else if (pH != null && pH.acceptUrl(url)) {
} else if (pH != null && pH.acceptUrl(polishedUrl)) {
return LinkType.PLAYLIST;
} else {
return LinkType.NONE;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,29 @@ public String getUrl(String id, String baseUrl) throws ParsingException {
// Logic
///////////////////////////////////

public LinkHandler fromUrl(String url) throws ParsingException {
if (url == null) throw new IllegalArgumentException("url can not be null");
final String baseUrl = Utils.getBaseUrl(url);
return fromUrl(url, baseUrl);
/**
* Builds a {@link LinkHandler} from a url.<br>
* Be sure to call {@link Utils#followGoogleRedirectIfNeeded(String)} on the url if overriding
* this function.
* @param url the url to extract path and id from
* @return a {@link LinkHandler} complete with information
*/
public LinkHandler fromUrl(final String url) throws ParsingException {
final String polishedUrl = Utils.followGoogleRedirectIfNeeded(url);
final String baseUrl = Utils.getBaseUrl(polishedUrl);
return fromUrl(polishedUrl, baseUrl);
}

/**
* Builds a {@link LinkHandler} from a url and a base url. The url is expected to be already
* polished from google search redirects (otherwise how could {@code baseUrl} have been
* extracted?).<br>
* So do not call {@link Utils#followGoogleRedirectIfNeeded(String)} on the url if overriding
* this function, since that should be done in {@link #fromUrl(String)}.
* @param url the url without google search redirects to extract id from
* @param baseUrl the base url
* @return a {@link LinkHandler} complete with information
*/
public LinkHandler fromUrl(String url, String baseUrl) throws ParsingException {
if (url == null) throw new IllegalArgumentException("url can not be null");
if (!acceptUrl(url)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,10 @@ public String getUrl(String id, List<String> contentFilter, String sortFilter, S
///////////////////////////////////

@Override
public ListLinkHandler fromUrl(String url) throws ParsingException {
String baseUrl = Utils.getBaseUrl(url);
return fromUrl(url, baseUrl);
public ListLinkHandler fromUrl(final String url) throws ParsingException {
final String polishedUrl = Utils.followGoogleRedirectIfNeeded(url);
final String baseUrl = Utils.getBaseUrl(polishedUrl);
return fromUrl(polishedUrl, baseUrl);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,6 @@ public class YoutubeParsingHelper {
private YoutubeParsingHelper() {
}

/**
* The official youtube app supports intents in this format, where after the ':' is the videoId.
* Accordingly there are other apps sharing streams in this format.
*/
public final static String BASE_YOUTUBE_INTENT_URL = "vnd.youtube";

private static final String HARDCODED_CLIENT_VERSION = "2.20200214.04.00";
private static String clientVersion;

Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
package org.schabi.newpipe.extractor.services.youtube.linkHandler;

import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.BASE_YOUTUBE_INTENT_URL;

import org.schabi.newpipe.extractor.exceptions.FoundAdException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory;

import java.util.List;
Expand All @@ -17,15 +14,6 @@ public static YoutubeCommentsLinkHandlerFactory getInstance() {
return instance;
}

@Override
public ListLinkHandler fromUrl(String url) throws ParsingException {
if (url.startsWith(BASE_YOUTUBE_INTENT_URL)){
return super.fromUrl(url, BASE_YOUTUBE_INTENT_URL);
} else {
return super.fromUrl(url);
}
}

@Override
public String getUrl(String id) {
return "https://m.youtube.com/watch?v=" + id;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import org.schabi.newpipe.extractor.exceptions.FoundAdException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.linkhandler.LinkHandler;
import org.schabi.newpipe.extractor.linkhandler.LinkHandlerFactory;
import org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper;
import org.schabi.newpipe.extractor.utils.Utils;
Expand All @@ -15,8 +14,6 @@
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.BASE_YOUTUBE_INTENT_URL;

/*
* Created by Christian Schabesberger on 02.02.16.
*
Expand Down Expand Up @@ -67,15 +64,6 @@ private static String assertIsId(@Nullable final String id) throws ParsingExcept
}
}

@Override
public LinkHandler fromUrl(String url) throws ParsingException {
if (url.startsWith(BASE_YOUTUBE_INTENT_URL)) {
return super.fromUrl(url, BASE_YOUTUBE_INTENT_URL);
} else {
return super.fromUrl(url);
}
}

@Override
public String getUrl(String id) {
return "https://www.youtube.com/watch?v=" + id;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -181,14 +181,39 @@ public static String removeUTF8BOM(String s) {
return s;
}

public static String getBaseUrl(String url) throws ParsingException {
URL uri;
public static String getBaseUrl(final String url) throws ParsingException {
try {
uri = stringToURL(url);
} catch (MalformedURLException e) {
final URL uri = stringToURL(url);
return uri.getProtocol() + "://" + uri.getAuthority();
} catch (final MalformedURLException e) {
final String message = e.getMessage();
if (message.startsWith("unknown protocol: ")) {
// return just the protocol (e.g. vnd.youtube)
return message.substring("unknown protocol: ".length());
}

throw new ParsingException("Malformed url: " + url, e);
}
return uri.getProtocol() + "://" + uri.getAuthority();
}

/**
* If the provided url is a Google search redirect, then the actual url is extracted from the
* {@code url=} query value and returned, otherwise the original url is returned.
* @param url the url which can possibly be a Google search redirect
* @return an url with no Google search redirects
*/
public static String followGoogleRedirectIfNeeded(final String url) {
// if the url is a redirect from a Google search, extract the actual url
try {
final URL decoded = Utils.stringToURL(url);
if (decoded.getHost().contains("google") && decoded.getPath().equals("/url")) {
return URLDecoder.decode(Parser.matchGroup1("&url=([^&]+)(?:&|$)", url), "UTF-8");
}
} catch (final Exception ignored) {
}

// url is not a google search redirect
return url;
}

public static boolean isNullOrEmpty(final String str) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import static org.junit.Assert.*;
import static org.schabi.newpipe.extractor.NewPipe.getServiceByUrl;
import static org.schabi.newpipe.extractor.ServiceList.SoundCloud;
import static org.schabi.newpipe.extractor.ServiceList.YouTube;

public class NewPipeTest {
Expand Down Expand Up @@ -39,8 +40,10 @@ public void getServiceWithUrl() throws Exception {
assertEquals(getServiceByUrl("https://www.youtube.com/watch?v=_r6CgaFNAGg"), YouTube);
assertEquals(getServiceByUrl("https://www.youtube.com/channel/UCi2bIyFtz-JdI-ou8kaqsqg"), YouTube);
assertEquals(getServiceByUrl("https://www.youtube.com/playlist?list=PLRqwX-V7Uu6ZiZxtDDRCi6uhfTH4FilpH"), YouTube);
assertEquals(getServiceByUrl("https://www.google.it/url?sa=t&rct=j&q=&esrc=s&cd=&cad=rja&uact=8&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DHu80uDzh8RY&source=video"), YouTube);

assertNotEquals(getServiceByUrl("https://soundcloud.com/pegboardnerds"), YouTube);
assertEquals(getServiceByUrl("https://soundcloud.com/pegboardnerds"), SoundCloud);
assertEquals(getServiceByUrl("https://www.google.com/url?sa=t&url=https%3A%2F%2Fsoundcloud.com%2Fciaoproduction&rct=j&q=&esrc=s&source=web&cd="), SoundCloud);
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,28 @@ public void testMixedNumberWordToLong() throws ParsingException {
public void testJoin() {
assertEquals("some,random,stuff", Utils.join(",", Arrays.asList("some", "random", "stuff")));
}

@Test
public void testGetBaseUrl() throws ParsingException {
assertEquals("https://www.youtube.com", Utils.getBaseUrl("https://www.youtube.com/watch?v=Hu80uDzh8RY"));
assertEquals("vnd.youtube", Utils.getBaseUrl("vnd.youtube://www.youtube.com/watch?v=jZViOEv90dI"));
assertEquals("vnd.youtube", Utils.getBaseUrl("vnd.youtube:jZViOEv90dI"));
assertEquals("vnd.youtube", Utils.getBaseUrl("vnd.youtube://n8X9_MgEdCg"));
assertEquals("https://music.youtube.com", Utils.getBaseUrl("https://music.youtube.com/watch?v=O0EDx9WAelc"));
}

@Test
public void testFollowGoogleRedirect() {
assertEquals("https://www.youtube.com/watch?v=Hu80uDzh8RY",
Utils.followGoogleRedirectIfNeeded("https://www.google.it/url?sa=t&rct=j&q=&esrc=s&cd=&cad=rja&uact=8&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DHu80uDzh8RY&source=video"));
assertEquals("https://www.youtube.com/watch?v=0b6cFWG45kA",
Utils.followGoogleRedirectIfNeeded("https://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=video&cd=&cad=rja&uact=8&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3D0b6cFWG45kA"));
assertEquals("https://soundcloud.com/ciaoproduction",
Utils.followGoogleRedirectIfNeeded("https://www.google.com/url?sa=t&url=https%3A%2F%2Fsoundcloud.com%2Fciaoproduction&rct=j&q=&esrc=s&source=web&cd="));

assertEquals("https://www.youtube.com/watch?v=Hu80uDzh8RY&param=xyz",
Utils.followGoogleRedirectIfNeeded("https://www.youtube.com/watch?v=Hu80uDzh8RY&param=xyz"));
assertEquals("https://www.youtube.com/watch?v=Hu80uDzh8RY&url=hello",
Utils.followGoogleRedirectIfNeeded("https://www.youtube.com/watch?v=Hu80uDzh8RY&url=hello"));
}
}