Skip to content

Commit

Permalink
Generate post thumbnail/metadata in background (ref #4529)
Browse files Browse the repository at this point in the history
  • Loading branch information
Nutomic committed Mar 25, 2024
1 parent ef4bb3c commit 456054f
Show file tree
Hide file tree
Showing 5 changed files with 75 additions and 104 deletions.
6 changes: 0 additions & 6 deletions crates/api_common/src/post.rs
Expand Up @@ -30,9 +30,6 @@ pub struct CreatePost {
pub honeypot: Option<String>,
pub nsfw: Option<bool>,
pub language_id: Option<LanguageId>,
#[cfg_attr(feature = "full", ts(type = "string"))]
/// Instead of fetching a thumbnail, use a custom one.
pub custom_thumbnail: Option<Url>,
}

#[derive(Debug, Serialize, Deserialize, Clone)]
Expand Down Expand Up @@ -122,9 +119,6 @@ pub struct EditPost {
pub alt_text: Option<String>,
pub nsfw: Option<bool>,
pub language_id: Option<LanguageId>,
#[cfg_attr(feature = "full", ts(type = "string"))]
/// Instead of fetching a thumbnail, use a custom one.
pub custom_thumbnail: Option<Url>,
}

#[derive(Debug, Serialize, Deserialize, Clone, Copy, Default, PartialEq, Eq, Hash)]
Expand Down
41 changes: 39 additions & 2 deletions crates/api_common/src/request.rs
@@ -1,16 +1,22 @@
use crate::{
context::LemmyContext,
lemmy_db_schema::traits::Crud,
post::{LinkMetadata, OpenGraphData},
utils::proxy_image_link,
utils::{local_site_opt_to_sensitive, proxy_image_link},
};
use encoding::{all::encodings, DecoderTrap};
use lemmy_db_schema::{
newtypes::DbUrl,
source::images::{LocalImage, LocalImageForm},
source::{
images::{LocalImage, LocalImageForm},
local_site::LocalSite,
post::{Post, PostUpdateForm},
},
};
use lemmy_utils::{
error::{LemmyError, LemmyErrorType},
settings::structs::{PictrsImageMode, Settings},
spawn_try_task,
version::VERSION,
REQWEST_TIMEOUT,
};
Expand Down Expand Up @@ -83,6 +89,37 @@ pub async fn fetch_link_metadata_opt(
}
}

/// Generate post thumbnail in background task, because some sites can be very slow to respond.
pub fn generate_post_link_metadata(
post: Post,
local_site: Option<LocalSite>,
context: LemmyContext,
) {
spawn_try_task(async move {
let allow_sensitive = local_site_opt_to_sensitive(&local_site);
let allow_generate_thumbnail = allow_sensitive || !post.nsfw;
let url: Option<Url> = post.url.map(Into::into);
let metadata = fetch_link_metadata_opt(url.as_ref(), allow_generate_thumbnail, &context).await;
let thumbnail_url = if let Some(thumbnail_url_) = metadata.thumbnail {
Some(Some(
proxy_image_link(thumbnail_url_.into(), &context).await?,
))
} else {
None
};
let form = PostUpdateForm {
embed_title: Some(metadata.opengraph_data.title),
embed_description: Some(metadata.opengraph_data.description),
embed_video_url: Some(metadata.opengraph_data.embed_video_url),
thumbnail_url,
url_content_type: Some(metadata.content_type),
..Default::default()
};
Post::update(&mut context.pool(), post.id, &form).await?;
Ok(())
});
}

/// Extract site metadata from HTML Opengraph attributes.
fn extract_opengraph_data(html_bytes: &[u8], url: &Url) -> Result<OpenGraphData, LemmyError> {
let html = String::from_utf8_lossy(html_bytes);
Expand Down
26 changes: 7 additions & 19 deletions crates/api_crud/src/post/create.rs
Expand Up @@ -4,7 +4,7 @@ use lemmy_api_common::{
build_response::build_post_response,
context::LemmyContext,
post::{CreatePost, PostResponse},
request::fetch_link_metadata_opt,
request::generate_post_link_metadata,
send_activity::{ActivityChannel, SendActivityData},
utils::{
check_community_user_action,
Expand Down Expand Up @@ -67,14 +67,12 @@ pub async fn create_post(
let body = process_markdown_opt(&data.body, &slur_regex, &url_blocklist, &context).await?;
let data_url = data.url.as_ref();
let url = data_url.map(clean_url_params); // TODO no good way to handle a "clear"
let custom_thumbnail = data.custom_thumbnail.as_ref().map(clean_url_params);

is_valid_post_title(&data.name)?;
is_valid_body_field(&body, true)?;
is_valid_alt_text_field(&data.alt_text)?;
is_url_blocked(&url, &url_blocklist)?;
check_url_scheme(&url)?;
check_url_scheme(&custom_thumbnail)?;

check_community_user_action(
&local_user_view.person,
Expand All @@ -97,18 +95,7 @@ pub async fn create_post(
Err(LemmyErrorType::OnlyModsCanPostInCommunity)?
}
}

// Only generate the thumbnail if there's no custom thumbnail provided,
// otherwise it will save it in pictrs
let generate_thumbnail = custom_thumbnail.is_none();

// Fetch post links and pictrs cached image
let metadata = fetch_link_metadata_opt(url.as_ref(), generate_thumbnail, &context).await;
let url = proxy_image_link_opt_apub(url, &context).await?;
let thumbnail_url = proxy_image_link_opt_apub(custom_thumbnail, &context)
.await?
.map(Into::into)
.or(metadata.thumbnail);

// Only need to check if language is allowed in case user set it explicitly. When using default
// language, it already only returns allowed languages.
Expand All @@ -134,24 +121,25 @@ pub async fn create_post(

let post_form = PostInsertForm::builder()
.name(data.name.trim().to_string())
.url_content_type(metadata.content_type)
.url(url)
.body(body)
.alt_text(data.alt_text.clone())
.community_id(data.community_id)
.creator_id(local_user_view.person.id)
.nsfw(data.nsfw)
.embed_title(metadata.opengraph_data.title)
.embed_description(metadata.opengraph_data.description)
.embed_video_url(metadata.opengraph_data.embed_video_url)
.language_id(language_id)
.thumbnail_url(thumbnail_url)
.build();

let inserted_post = Post::create(&mut context.pool(), &post_form)
.await
.with_lemmy_type(LemmyErrorType::CouldntCreatePost)?;

generate_post_link_metadata(
inserted_post.clone(),
Some(local_site),
context.app_data().clone(),
);

let inserted_post_id = inserted_post.id;
let protocol_and_hostname = context.settings().get_protocol_and_hostname();
let apub_id = generate_local_apub_endpoint(
Expand Down
44 changes: 7 additions & 37 deletions crates/api_crud/src/post/update.rs
Expand Up @@ -4,7 +4,7 @@ use lemmy_api_common::{
build_response::build_post_response,
context::LemmyContext,
post::{EditPost, PostResponse},
request::fetch_link_metadata,
request::generate_post_link_metadata,
send_activity::{ActivityChannel, SendActivityData},
utils::{
check_community_user_action,
Expand Down Expand Up @@ -51,7 +51,6 @@ pub async fn update_post(
// TODO No good way to handle a clear.
// Issue link: https://github.com/LemmyNet/lemmy/issues/2287
let url = data.url.as_ref().map(clean_url_params);
let custom_thumbnail = data.custom_thumbnail.as_ref().map(clean_url_params);

let url_blocklist = get_url_blocklist(&context).await?;

Expand All @@ -67,7 +66,6 @@ pub async fn update_post(
is_valid_alt_text_field(&data.alt_text)?;
is_url_blocked(&url, &url_blocklist)?;
check_url_scheme(&url)?;
check_url_scheme(&custom_thumbnail)?;

let post_id = data.post_id;
let orig_post = Post::read(&mut context.pool(), post_id).await?;
Expand All @@ -84,40 +82,11 @@ pub async fn update_post(
Err(LemmyErrorType::NoPostEditAllowed)?
}

// Fetch post links and thumbnail if url was updated
let (embed_title, embed_description, embed_video_url, metadata_thumbnail, metadata_content_type) =
match &url {
Some(url) => {
// Only generate the thumbnail if there's no custom thumbnail provided,
// otherwise it will save it in pictrs
let generate_thumbnail = custom_thumbnail.is_none() || orig_post.thumbnail_url.is_none();

let metadata = fetch_link_metadata(url, generate_thumbnail, &context).await?;
(
Some(metadata.opengraph_data.title),
Some(metadata.opengraph_data.description),
Some(metadata.opengraph_data.embed_video_url),
Some(metadata.thumbnail),
Some(metadata.content_type),
)
}
_ => Default::default(),
};

let url = match url {
Some(url) => Some(proxy_image_link_opt_apub(Some(url), &context).await?),
_ => Default::default(),
};

let custom_thumbnail = match custom_thumbnail {
Some(custom_thumbnail) => {
Some(proxy_image_link_opt_apub(Some(custom_thumbnail), &context).await?)
}
_ => Default::default(),
};

let thumbnail_url = custom_thumbnail.or(metadata_thumbnail);

let language_id = data.language_id;
CommunityLanguage::is_allowed_community_language(
&mut context.pool(),
Expand All @@ -129,15 +98,10 @@ pub async fn update_post(
let post_form = PostUpdateForm {
name: data.name.clone(),
url,
url_content_type: metadata_content_type,
body: diesel_option_overwrite(body),
alt_text: diesel_option_overwrite(data.alt_text.clone()),
nsfw: data.nsfw,
embed_title,
embed_description,
embed_video_url,
language_id: data.language_id,
thumbnail_url,
updated: Some(Some(naive_now())),
..Default::default()
};
Expand All @@ -147,6 +111,12 @@ pub async fn update_post(
.await
.with_lemmy_type(LemmyErrorType::CouldntUpdatePost)?;

generate_post_link_metadata(
updated_post.clone(),
Some(local_site),
context.app_data().clone(),
);

ActivityChannel::submit_activity(SendActivityData::UpdatePost(updated_post), &context).await?;

build_post_response(
Expand Down
62 changes: 22 additions & 40 deletions crates/apub/src/objects/post.rs
Expand Up @@ -24,10 +24,9 @@ use chrono::{DateTime, Utc};
use html2text::{from_read_with_decorator, render::text_renderer::TrivialDecorator};
use lemmy_api_common::{
context::LemmyContext,
request::fetch_link_metadata_opt,
request::generate_post_link_metadata,
utils::{
get_url_blocklist,
local_site_opt_to_sensitive,
local_site_opt_to_slur_regex,
process_markdown_opt,
proxy_image_link_opt_apub,
Expand Down Expand Up @@ -219,6 +218,8 @@ impl Object for ApubPost {

let first_attachment = page.attachment.first();

let local_site = LocalSite::read(&mut context.pool()).await.ok();

let form = if !page.is_mod_action(context).await? {
let url = if let Some(attachment) = first_attachment.cloned() {
Some(attachment.url())
Expand All @@ -231,21 +232,8 @@ impl Object for ApubPost {
check_url_scheme(&url)?;

let alt_text = first_attachment.cloned().and_then(Attachment::alt_text);
let local_site = LocalSite::read(&mut context.pool()).await.ok();
let allow_sensitive = local_site_opt_to_sensitive(&local_site);
let page_is_sensitive = page.sensitive.unwrap_or(false);
let allow_generate_thumbnail = allow_sensitive || !page_is_sensitive;
let mut thumbnail_url = page.image.map(|i| i.url);
let do_generate_thumbnail = thumbnail_url.is_none() && allow_generate_thumbnail;

// Generate local thumbnail only if no thumbnail was federated and 'sensitive' attributes allow it.
let metadata = fetch_link_metadata_opt(url.as_ref(), do_generate_thumbnail, context).await;
if let Some(thumbnail_url_) = metadata.thumbnail {
thumbnail_url = Some(thumbnail_url_.into());
}
let url = proxy_image_link_opt_apub(url, context).await?;
let thumbnail_url = proxy_image_link_opt_apub(thumbnail_url, context).await?;

let url = proxy_image_link_opt_apub(url, context).await?;
let slur_regex = &local_site_opt_to_slur_regex(&local_site);
let url_blocklist = get_url_blocklist(context).await?;

Expand All @@ -254,30 +242,22 @@ impl Object for ApubPost {
let language_id =
LanguageTag::to_language_id_single(page.language, &mut context.pool()).await?;

PostInsertForm {
name,
url: url.map(Into::into),
body,
alt_text,
creator_id: creator.id,
community_id: community.id,
removed: None,
locked: page.comments_enabled.map(|e| !e),
published: page.published.map(Into::into),
updated: page.updated.map(Into::into),
deleted: Some(false),
nsfw: page.sensitive,
embed_title: metadata.opengraph_data.title,
embed_description: metadata.opengraph_data.description,
embed_video_url: metadata.opengraph_data.embed_video_url,
thumbnail_url,
ap_id: Some(page.id.clone().into()),
local: Some(false),
language_id,
featured_community: None,
featured_local: None,
url_content_type: metadata.content_type,
}
PostInsertForm::builder()
.name(name)
.url(url.map(Into::into))
.body(body)
.alt_text(alt_text)
.creator_id(creator.id)
.community_id(community.id)
.locked(page.comments_enabled.map(|e| !e))
.published(page.published.map(Into::into))
.updated(page.updated.map(Into::into))
.deleted(Some(false))
.nsfw(page.sensitive)
.ap_id(Some(page.id.clone().into()))
.local(Some(false))
.language_id(language_id)
.build()
} else {
// if is mod action, only update locked/stickied fields, nothing else
PostInsertForm::builder()
Expand All @@ -292,6 +272,8 @@ impl Object for ApubPost {

let post = Post::create(&mut context.pool(), &form).await?;

generate_post_link_metadata(post.clone(), local_site, context.app_data().clone());

// write mod log entry for lock
if Page::is_locked_changed(&old_post, &page.comments_enabled) {
let form = ModLockPostForm {
Expand Down

0 comments on commit 456054f

Please sign in to comment.