Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support fedilinks link previews #4429

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
127 changes: 126 additions & 1 deletion crates/api_common/src/request.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,20 @@ pub async fn fetch_link_metadata(
context: &LemmyContext,
) -> Result<LinkMetadata, LemmyError> {
info!("Fetching site metadata for url: {}", url);
let response = context.client().get(url.as_str()).send().await?;

// fedilinks support (see https://fedilinks.org/4 )
let request_url = std::borrow::Cow::Borrowed(url.as_str());
let request_url = if request_url.starts_with("web+") {
if let Ok(preview_url) = get_fallback(&request_url) {
std::borrow::Cow::Owned(preview_url)
} else {
request_url
}
} else {
request_url
};

let response = context.client().get(&*request_url).send().await?;

let content_type: Option<Mime> = response
.headers()
Expand Down Expand Up @@ -308,6 +321,103 @@ async fn is_image_content_type(client: &ClientWithMiddleware, url: &Url) -> Resu
}
}

// Fedi-To get_fallback implementation
// adapted to use urlencoding instead of percent_encoding
// see also https://fedilinks.org/4

/// Error kind returned when trying to find the fallback protocol handler.
#[derive(Copy, Clone, Debug)]
enum FallbackError {
/// Returned when the given URL, while valid, does not provide a fallback
/// handler.
NoHandler,
/// Returned when the given target is not an URL.
NotAnUrl,
}

impl std::error::Error for FallbackError {}

impl std::fmt::Display for FallbackError {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Self::NoHandler => {
write!(f, "url does not contain a fallback handler")
}
Self::NotAnUrl => {
write!(f, "url is not an appropriate web+ url")
}
}
}
}

/// Checks whether the `scheme` part of `web+scheme` satisfies the desired
/// constraints.
fn is_scheme_invalid(scheme: &str) -> bool {
// valid schemes are non-empty and are entirely ascii lowercase
// so invalid schemes are empty or contain non-ascii-lowercase.
scheme.is_empty()
|| !scheme
.trim_start_matches(|c: char| -> bool { c.is_ascii_lowercase() })
.is_empty()
}

/// Attempts to find a fallback protocol handler for the given target URL.
///
/// The target is assumed to be normalized, as per the WHATWG URL spec. (Note
/// that Fedi-To doesn't actually check that it is, but that's a Fedi-To
/// issue.)
fn get_fallback(target: &str) -> Result<String, FallbackError> {
use FallbackError::*;
// find the scheme
let scheme = {
let colon = target.find(':').ok_or(NotAnUrl)?;
let scheme = target
.get(..colon)
.expect("find returned a bogus value - broken std");
if !scheme.starts_with("web+") {
return Err(NotAnUrl);
}
let scheme = scheme
.get(4..)
.expect("starts_with returned a bogus value - broken std");
if is_scheme_invalid(scheme) {
return Err(NotAnUrl);
}
scheme
};
// replace web+scheme with https
// this allows us to handle web+ URLs with the semantics we actually
// want, which is roughly the same as https, with a few differences
let mut as_if_https = target.to_string();
as_if_https.replace_range(0..4 + scheme.len(), "https");
// the main difference is that unlike https, authority is optional.
// so, first check that there should be an authority.
if !as_if_https.starts_with("https://") {
return Err(NoHandler);
}
// then also check that the authority actually exists.
// this is necessary so we don't end up parsing web+example:///bar as
// web+example://bar/ (which would be wrong).
// note that we do parse web+example://bar\ as an authority! (but
// everything else - like the path - we treat as opaque to us)
if as_if_https.starts_with("https:///") || as_if_https.starts_with("https://\\") {
return Err(NoHandler);
}
// NOTE: we only do this parse to extract the domain/port, it is up to
// the protocol-handler to deal with malformed or malicious input.
// NOTE: this is the same URL parser as used by browsers when handling
// `href` so this is correct.
let mut url = url::Url::parse(&as_if_https).map_err(|_| NoHandler)?;
url.set_path("/.well-known/protocol-handler");
let _ = url.set_username("");
let _ = url.set_password(None);
let mut params = "target=".to_owned();
params.push_str(&encode(target));
url.set_query(Some(&*params));
url.set_fragment(None);
Ok(url.into())
}

#[cfg(test)]
mod tests {
#![allow(clippy::unwrap_used)]
Expand Down Expand Up @@ -354,6 +464,21 @@ mod tests {
assert_eq!(None, sample_res.thumbnail);
}

#[tokio::test]
#[serial]
async fn test_fedilinks_metadata() {
let context = LemmyContext::init_test_context_with_networking().await;
let sample_url = Url::parse("web+ap://is-a.cat/@ar").unwrap();
let sample_res = fetch_link_metadata(&sample_url, false, &context)
.await
.unwrap();
assert!(sample_res.opengraph_data.title.is_some());
assert_eq!(
Some(mime::TEXT_HTML_UTF_8.to_string()),
sample_res.content_type
);
}

// #[test]
// fn test_pictshare() {
// let res = fetch_pictshare("https://upload.wikimedia.org/wikipedia/en/2/27/The_Mandalorian_logo.jpg");
Expand Down
15 changes: 12 additions & 3 deletions crates/utils/src/utils/validation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -276,10 +276,16 @@ pub fn check_site_visibility_valid(

pub fn check_url_scheme(url: &Option<Url>) -> LemmyResult<()> {
if let Some(url) = url {
if !ALLOWED_POST_URL_SCHEMES.contains(&url.scheme()) {
Err(LemmyErrorType::InvalidUrlScheme.into())
} else {
if ALLOWED_POST_URL_SCHEMES.contains(&url.scheme())
|| (url
.scheme()
.trim_end_matches(|c: char| c.is_ascii_lowercase())
== "web+"
&& url.scheme() != "web+")
{
Ok(())
} else {
Err(LemmyErrorType::InvalidUrlScheme.into())
}
} else {
Ok(())
Expand Down Expand Up @@ -531,6 +537,9 @@ mod tests {
assert!(check_url_scheme(&Some(Url::parse("http://example.com").unwrap())).is_ok());
assert!(check_url_scheme(&Some(Url::parse("https://example.com").unwrap())).is_ok());
assert!(check_url_scheme(&Some(Url::parse("https://example.com").unwrap())).is_ok());
assert!(check_url_scheme(&Some(Url::parse("web+ap://example.com").unwrap())).is_ok());
assert!(check_url_scheme(&Some(Url::parse("web+://example.com").unwrap())).is_err());
assert!(check_url_scheme(&Some(Url::parse("web+no-way://example.com").unwrap())).is_err());
assert!(check_url_scheme(&Some(Url::parse("ftp://example.com").unwrap())).is_err());
assert!(check_url_scheme(&Some(Url::parse("javascript:void").unwrap())).is_err());

Expand Down