Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cache federation blocklist #3486

Merged
merged 2 commits into from
Jul 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
293 changes: 287 additions & 6 deletions Cargo.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions crates/apub/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ once_cell = { workspace = true }
html2md = "0.2.14"
serde_with = { workspace = true }
enum_delegate = "0.2.0"
moka = { version = "0.11", features = ["future"] }

[dev-dependencies]
serial_test = { workspace = true }
Expand Down
70 changes: 45 additions & 25 deletions crates/apub/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use crate::fetcher::post_or_comment::PostOrComment;
use activitypub_federation::config::{Data, UrlVerifier};
use async_trait::async_trait;
use futures::future::join3;
use lemmy_api_common::context::LemmyContext;
use lemmy_db_schema::{
source::{
Expand All @@ -11,9 +12,11 @@ use lemmy_db_schema::{
traits::Crud,
utils::DbPool,
};
use lemmy_utils::{error::LemmyError, settings::structs::Settings};
use lemmy_utils::error::{LemmyError, LemmyResult};
use moka::future::Cache;
use once_cell::sync::Lazy;
use serde::Serialize;
use std::{sync::Arc, time::Duration};
use url::Url;

pub mod activities;
Expand All @@ -27,6 +30,11 @@ pub mod objects;
pub mod protocol;

pub const FEDERATION_HTTP_FETCH_LIMIT: u32 = 50;
/// All incoming and outgoing federation actions read the blocklist/allowlist and slur filters
/// multiple times. This causes a huge number of database reads if we hit the db directly. So we
/// cache these values for a short time, which will already make a huge difference and ensures that
/// changes take effect quickly.
const BLOCKLIST_CACHE_DURATION: Duration = Duration::from_secs(60);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can def live with this short of a cache.


static CONTEXT: Lazy<Vec<serde_json::Value>> = Lazy::new(|| {
serde_json::from_str(include_str!("../assets/lemmy/context.json")).expect("parse context")
Expand All @@ -38,7 +46,7 @@ pub struct VerifyUrlData(pub DbPool);
#[async_trait]
impl UrlVerifier for VerifyUrlData {
async fn verify(&self, url: &Url) -> Result<(), &'static str> {
let local_site_data = fetch_local_site_data(&self.0)
let local_site_data = local_site_data_cached(&self.0)
.await
.expect("read local site data");
check_apub_id_valid(url, &local_site_data)?;
Expand All @@ -53,9 +61,6 @@ impl UrlVerifier for VerifyUrlData {
/// - the correct scheme (either http or https)
/// - URL being in the allowlist (if it is active)
/// - URL not being in the blocklist (if it is active)
///
/// `use_strict_allowlist` should be true only when parsing a remote community, or when parsing a
/// post/comment in a local community.
#[tracing::instrument(skip(local_site_data))]
fn check_apub_id_valid(apub_id: &Url, local_site_data: &LocalSiteData) -> Result<(), &'static str> {
let domain = apub_id.domain().expect("apud id has domain").to_string();
Expand Down Expand Up @@ -97,36 +102,50 @@ pub(crate) struct LocalSiteData {
blocked_instances: Vec<Instance>,
}

pub(crate) async fn fetch_local_site_data(
pool: &DbPool,
) -> Result<LocalSiteData, diesel::result::Error> {
// LocalSite may be missing
let local_site = LocalSite::read(pool).await.ok();
let allowed_instances = Instance::allowlist(pool).await?;
let blocked_instances = Instance::blocklist(pool).await?;

Ok(LocalSiteData {
local_site,
allowed_instances,
blocked_instances,
})
pub(crate) async fn local_site_data_cached(pool: &DbPool) -> LemmyResult<Arc<LocalSiteData>> {
static CACHE: Lazy<Cache<(), Arc<LocalSiteData>>> = Lazy::new(|| {
Cache::builder()
.max_capacity(1)
.time_to_live(BLOCKLIST_CACHE_DURATION)
.build()
});
Ok(
CACHE
.try_get_with((), async {
let (local_site, allowed_instances, blocked_instances) = join3(
LocalSite::read(pool),
Instance::allowlist(pool),
Instance::blocklist(pool),
)
.await;

Ok::<_, diesel::result::Error>(Arc::new(LocalSiteData {
// LocalSite may be missing
local_site: local_site.ok(),
allowed_instances: allowed_instances?,
blocked_instances: blocked_instances?,
}))
})
.await?,
)
}

#[tracing::instrument(skip(settings, local_site_data))]
pub(crate) fn check_apub_id_valid_with_strictness(
pub(crate) async fn check_apub_id_valid_with_strictness(
apub_id: &Url,
is_strict: bool,
local_site_data: &LocalSiteData,
settings: &Settings,
context: &LemmyContext,
) -> Result<(), LemmyError> {
let domain = apub_id.domain().expect("apud id has domain").to_string();
let local_instance = settings
let local_instance = context
.settings()
.get_hostname_without_port()
.expect("local hostname is valid");
if domain == local_instance {
return Ok(());
}
check_apub_id_valid(apub_id, local_site_data).map_err(LemmyError::from_message)?;

let local_site_data = local_site_data_cached(context.pool()).await?;
check_apub_id_valid(apub_id, &local_site_data).map_err(LemmyError::from_message)?;

// Only check allowlist if this is a community, and there are instances in the allowlist
if is_strict && !local_site_data.allowed_instances.is_empty() {
Expand All @@ -137,7 +156,8 @@ pub(crate) fn check_apub_id_valid_with_strictness(
.iter()
.map(|i| i.domain.clone())
.collect::<Vec<String>>();
let local_instance = settings
let local_instance = context
.settings()
.get_hostname_without_port()
.expect("local hostname is valid");
allowed_and_local.push(local_instance);
Expand Down
9 changes: 1 addition & 8 deletions crates/apub/src/objects/comment.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use crate::{
activities::{verify_is_public, verify_person_in_community},
check_apub_id_valid_with_strictness,
fetch_local_site_data,
mentions::collect_non_local_mentions,
objects::{read_from_string_or_source, verify_is_remote_object},
protocol::{
Expand Down Expand Up @@ -132,14 +131,8 @@ impl Object for ApubComment {
verify_domains_match(note.attributed_to.inner(), note.id.inner())?;
verify_is_public(&note.to, &note.cc)?;
let community = note.community(context).await?;
let local_site_data = fetch_local_site_data(context.pool()).await?;

check_apub_id_valid_with_strictness(
note.id.inner(),
community.local,
&local_site_data,
context.settings(),
)?;
check_apub_id_valid_with_strictness(note.id.inner(), community.local, context).await?;
verify_is_remote_object(note.id.inner(), context.settings())?;
verify_person_in_community(&note.attributed_to, &community, context).await?;
let (post, _) = note.get_parents(context).await?;
Expand Down
11 changes: 4 additions & 7 deletions crates/apub/src/objects/community.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::{
check_apub_id_valid_with_strictness,
fetch_local_site_data,
check_apub_id_valid,
local_site_data_cached,
objects::instance::fetch_instance_actor_for_object,
protocol::{
objects::{group::Group, Endpoints, LanguageTag},
Expand Down Expand Up @@ -187,7 +187,7 @@ impl ApubCommunity {
) -> Result<Vec<Url>, LemmyError> {
let id = self.id;

let local_site_data = fetch_local_site_data(context.pool()).await?;
let local_site_data = local_site_data_cached(context.pool()).await?;
let follows = CommunityFollowerView::for_community(context.pool(), id).await?;
let inboxes: Vec<Url> = follows
.into_iter()
Expand All @@ -201,10 +201,7 @@ impl ApubCommunity {
.unique()
.filter(|inbox: &Url| inbox.host_str() != Some(&context.settings().hostname))
// Don't send to blocked instances
.filter(|inbox| {
check_apub_id_valid_with_strictness(inbox, false, &local_site_data, context.settings())
.is_ok()
})
.filter(|inbox| check_apub_id_valid(inbox, &local_site_data).is_ok())
.collect();

Ok(inboxes)
Expand Down
9 changes: 4 additions & 5 deletions crates/apub/src/objects/instance.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::{
check_apub_id_valid_with_strictness,
fetch_local_site_data,
local_site_data_cached,
objects::read_from_string_or_source_opt,
protocol::{
objects::{instance::Instance, LanguageTag},
Expand Down Expand Up @@ -113,15 +113,14 @@ impl Object for ApubSite {
expected_domain: &Url,
data: &Data<Self::DataType>,
) -> Result<(), LemmyError> {
let local_site_data = fetch_local_site_data(data.pool()).await?;

check_apub_id_valid_with_strictness(apub.id.inner(), true, &local_site_data, data.settings())?;
check_apub_id_valid_with_strictness(apub.id.inner(), true, data).await?;
verify_domains_match(expected_domain, apub.id.inner())?;

let local_site_data = local_site_data_cached(data.pool()).await?;
let slur_regex = &local_site_opt_to_slur_regex(&local_site_data.local_site);

check_slurs(&apub.name, slur_regex)?;
check_slurs_opt(&apub.summary, slur_regex)?;

Ok(())
}

Expand Down
12 changes: 3 additions & 9 deletions crates/apub/src/objects/person.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::{
check_apub_id_valid_with_strictness,
fetch_local_site_data,
local_site_data_cached,
objects::{instance::fetch_instance_actor_for_object, read_from_string_or_source_opt},
protocol::{
objects::{
Expand Down Expand Up @@ -118,19 +118,13 @@ impl Object for ApubPerson {
expected_domain: &Url,
context: &Data<Self::DataType>,
) -> Result<(), LemmyError> {
let local_site_data = fetch_local_site_data(context.pool()).await?;
let local_site_data = local_site_data_cached(context.pool()).await?;
let slur_regex = &local_site_opt_to_slur_regex(&local_site_data.local_site);

check_slurs(&person.preferred_username, slur_regex)?;
check_slurs_opt(&person.name, slur_regex)?;

verify_domains_match(person.id.inner(), expected_domain)?;
check_apub_id_valid_with_strictness(
person.id.inner(),
false,
&local_site_data,
context.settings(),
)?;
check_apub_id_valid_with_strictness(person.id.inner(), false, context).await?;

let bio = read_from_string_or_source_opt(&person.summary, &None, &person.source);
check_slurs_opt(&bio, slur_regex)?;
Expand Down
12 changes: 3 additions & 9 deletions crates/apub/src/objects/post.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::{
activities::{verify_is_public, verify_person_in_community},
check_apub_id_valid_with_strictness,
fetch_local_site_data,
local_site_data_cached,
objects::{read_from_string_or_source_opt, verify_is_remote_object},
protocol::{
objects::{
Expand Down Expand Up @@ -143,17 +143,11 @@ impl Object for ApubPost {
verify_is_remote_object(page.id.inner(), context.settings())?;
};

let local_site_data = fetch_local_site_data(context.pool()).await?;

let community = page.community(context).await?;
check_apub_id_valid_with_strictness(
page.id.inner(),
community.local,
&local_site_data,
context.settings(),
)?;
check_apub_id_valid_with_strictness(page.id.inner(), community.local, context).await?;
verify_person_in_community(&page.creator()?, &community, context).await?;

let local_site_data = local_site_data_cached(context.pool()).await?;
let slur_regex = &local_site_opt_to_slur_regex(&local_site_data.local_site);
check_slurs_opt(&page.name, slur_regex)?;

Expand Down
10 changes: 1 addition & 9 deletions crates/apub/src/objects/private_message.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
use crate::{
check_apub_id_valid_with_strictness,
fetch_local_site_data,
objects::read_from_string_or_source,
protocol::{
objects::chat_message::{ChatMessage, ChatMessageType},
Expand Down Expand Up @@ -102,14 +101,7 @@ impl Object for ApubPrivateMessage {
verify_domains_match(note.id.inner(), expected_domain)?;
verify_domains_match(note.attributed_to.inner(), note.id.inner())?;

let local_site_data = fetch_local_site_data(context.pool()).await?;

check_apub_id_valid_with_strictness(
note.id.inner(),
false,
&local_site_data,
context.settings(),
)?;
check_apub_id_valid_with_strictness(note.id.inner(), false, context).await?;
let person = note.attributed_to.dereference(context).await?;
if person.banned {
return Err(LemmyError::from_message("Person is banned from site"));
Expand Down
12 changes: 3 additions & 9 deletions crates/apub/src/protocol/objects/group.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use crate::{
community_moderators::ApubCommunityModerators,
community_outbox::ApubCommunityOutbox,
},
fetch_local_site_data,
local_site_data_cached,
objects::{community::ApubCommunity, read_from_string_or_source_opt},
protocol::{
objects::{Endpoints, LanguageTag},
Expand Down Expand Up @@ -80,16 +80,10 @@ impl Group {
expected_domain: &Url,
context: &LemmyContext,
) -> Result<(), LemmyError> {
let local_site_data = fetch_local_site_data(context.pool()).await?;

check_apub_id_valid_with_strictness(
self.id.inner(),
true,
&local_site_data,
context.settings(),
)?;
check_apub_id_valid_with_strictness(self.id.inner(), true, context).await?;
verify_domains_match(expected_domain, self.id.inner())?;

let local_site_data = local_site_data_cached(context.pool()).await?;
let slur_regex = &local_site_opt_to_slur_regex(&local_site_data.local_site);

check_slurs(&self.preferred_username, slur_regex)?;
Expand Down