Skip to content

Commit

Permalink
move federation blocklist cache to #3486
Browse files Browse the repository at this point in the history
  • Loading branch information
Nutomic committed Jul 5, 2023
1 parent 93b6410 commit a898aca
Show file tree
Hide file tree
Showing 12 changed files with 95 additions and 67 deletions.
9 changes: 6 additions & 3 deletions crates/apub/src/activities/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ use crate::{
insert_activity,
objects::{community::ApubCommunity, person::ApubPerson},
CONTEXT,
DB_QUERY_CACHE_DURATION,
};
use activitypub_federation::{
activity_queue::send_activity,
Expand All @@ -23,7 +22,7 @@ use lemmy_utils::error::LemmyError;
use moka::future::Cache;
use once_cell::sync::Lazy;
use serde::Serialize;
use std::{ops::Deref, sync::Arc};
use std::{ops::Deref, sync::Arc, time::Duration};
use tracing::info;
use url::{ParseError, Url};
use uuid::Uuid;
Expand All @@ -36,6 +35,10 @@ pub mod following;
pub mod unfederated;
pub mod voting;

/// Amount of time that the list of dead instances is cached. This is only updated once a day,
/// so there is no harm in caching it for a longer time.
pub static DEAD_INSTANCE_LIST_CACHE_DURATION: Duration = Duration::from_secs(30 * 60);

/// Checks that the specified Url actually identifies a Person (by fetching it), and that the person
/// doesn't have a site ban.
#[tracing::instrument(skip_all)]
Expand Down Expand Up @@ -167,7 +170,7 @@ where
static CACHE: Lazy<Cache<(), Arc<Vec<String>>>> = Lazy::new(|| {
Cache::builder()
.max_capacity(1)
.time_to_live(DB_QUERY_CACHE_DURATION)
.time_to_live(DEAD_INSTANCE_LIST_CACHE_DURATION)
.build()
});
let dead_instances = CACHE
Expand Down
63 changes: 25 additions & 38 deletions crates/apub/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,9 @@ use lemmy_db_schema::{
traits::Crud,
utils::DbPool,
};
use lemmy_utils::error::{LemmyError, LemmyResult};
use moka::future::Cache;
use lemmy_utils::{error::LemmyError, settings::structs::Settings};
use once_cell::sync::Lazy;
use serde::Serialize;
use std::{sync::Arc, time::Duration};
use url::Url;

pub mod activities;
Expand All @@ -29,8 +27,6 @@ pub mod objects;
pub mod protocol;

pub const FEDERATION_HTTP_FETCH_LIMIT: u32 = 50;
/// Amount of time that common db queries are cached (blocklist and dead instances)
const DB_QUERY_CACHE_DURATION: Duration = Duration::from_secs(30 * 60);

static CONTEXT: Lazy<Vec<serde_json::Value>> = Lazy::new(|| {
serde_json::from_str(include_str!("../assets/lemmy/context.json")).expect("parse context")
Expand All @@ -42,7 +38,7 @@ pub struct VerifyUrlData(pub DbPool);
#[async_trait]
impl UrlVerifier for VerifyUrlData {
async fn verify(&self, url: &Url) -> Result<(), &'static str> {
let local_site_data = local_site_data_cached(&self.0)
let local_site_data = fetch_local_site_data(&self.0)
.await
.expect("read local site data");
check_apub_id_valid(url, &local_site_data)?;
Expand All @@ -57,6 +53,9 @@ impl UrlVerifier for VerifyUrlData {
/// - the correct scheme (either http or https)
/// - URL being in the allowlist (if it is active)
/// - URL not being in the blocklist (if it is active)
///
/// `use_strict_allowlist` should be true only when parsing a remote community, or when parsing a
/// post/comment in a local community.
#[tracing::instrument(skip(local_site_data))]
fn check_apub_id_valid(apub_id: &Url, local_site_data: &LocalSiteData) -> Result<(), &'static str> {
let domain = apub_id.domain().expect("apud id has domain").to_string();
Expand Down Expand Up @@ -98,47 +97,36 @@ pub(crate) struct LocalSiteData {
blocked_instances: Vec<Instance>,
}

pub(crate) async fn local_site_data_cached(pool: &DbPool) -> LemmyResult<Arc<LocalSiteData>> {
static CACHE: Lazy<Cache<(), Arc<LocalSiteData>>> = Lazy::new(|| {
Cache::builder()
.max_capacity(1)
.time_to_live(DB_QUERY_CACHE_DURATION)
.build()
});
Ok(
CACHE
.try_get_with((), async {
// LocalSite may be missing
let local_site = LocalSite::read(pool).await.ok();
let allowed_instances = Instance::allowlist(pool).await?;
let blocked_instances = Instance::blocklist(pool).await?;

Ok::<_, diesel::result::Error>(Arc::new(LocalSiteData {
local_site,
allowed_instances,
blocked_instances,
}))
})
.await?,
)
pub(crate) async fn fetch_local_site_data(
pool: &DbPool,
) -> Result<LocalSiteData, diesel::result::Error> {
// LocalSite may be missing
let local_site = LocalSite::read(pool).await.ok();
let allowed_instances = Instance::allowlist(pool).await?;
let blocked_instances = Instance::blocklist(pool).await?;

Ok(LocalSiteData {
local_site,
allowed_instances,
blocked_instances,
})
}

pub(crate) async fn check_apub_id_valid_with_strictness(
#[tracing::instrument(skip(settings, local_site_data))]
pub(crate) fn check_apub_id_valid_with_strictness(
apub_id: &Url,
is_strict: bool,
context: &LemmyContext,
local_site_data: &LocalSiteData,
settings: &Settings,
) -> Result<(), LemmyError> {
let domain = apub_id.domain().expect("apud id has domain").to_string();
let local_instance = context
.settings()
let local_instance = settings
.get_hostname_without_port()
.expect("local hostname is valid");
if domain == local_instance {
return Ok(());
}

let local_site_data = local_site_data_cached(context.pool()).await?;
check_apub_id_valid(apub_id, &local_site_data).map_err(LemmyError::from_message)?;
check_apub_id_valid(apub_id, local_site_data).map_err(LemmyError::from_message)?;

// Only check allowlist if this is a community, and there are instances in the allowlist
if is_strict && !local_site_data.allowed_instances.is_empty() {
Expand All @@ -149,8 +137,7 @@ pub(crate) async fn check_apub_id_valid_with_strictness(
.iter()
.map(|i| i.domain.clone())
.collect::<Vec<String>>();
let local_instance = context
.settings()
let local_instance = settings
.get_hostname_without_port()
.expect("local hostname is valid");
allowed_and_local.push(local_instance);
Expand Down
9 changes: 8 additions & 1 deletion crates/apub/src/objects/comment.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use crate::{
activities::{verify_is_public, verify_person_in_community},
check_apub_id_valid_with_strictness,
fetch_local_site_data,
mentions::collect_non_local_mentions,
objects::{read_from_string_or_source, verify_is_remote_object},
protocol::{
Expand Down Expand Up @@ -131,8 +132,14 @@ impl Object for ApubComment {
verify_domains_match(note.attributed_to.inner(), note.id.inner())?;
verify_is_public(&note.to, &note.cc)?;
let community = note.community(context).await?;
let local_site_data = fetch_local_site_data(context.pool()).await?;

check_apub_id_valid_with_strictness(note.id.inner(), community.local, context).await?;
check_apub_id_valid_with_strictness(
note.id.inner(),
community.local,
&local_site_data,
context.settings(),
)?;
verify_is_remote_object(note.id.inner(), context.settings())?;
verify_person_in_community(&note.attributed_to, &community, context).await?;
let (post, _) = note.get_parents(context).await?;
Expand Down
11 changes: 7 additions & 4 deletions crates/apub/src/objects/community.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::{
check_apub_id_valid,
local_site_data_cached,
check_apub_id_valid_with_strictness,
fetch_local_site_data,
objects::instance::fetch_instance_actor_for_object,
protocol::{
objects::{group::Group, Endpoints, LanguageTag},
Expand Down Expand Up @@ -187,7 +187,7 @@ impl ApubCommunity {
) -> Result<Vec<Url>, LemmyError> {
let id = self.id;

let local_site_data = local_site_data_cached(context.pool()).await?;
let local_site_data = fetch_local_site_data(context.pool()).await?;
let follows = CommunityFollowerView::for_community(context.pool(), id).await?;
let inboxes: Vec<Url> = follows
.into_iter()
Expand All @@ -201,7 +201,10 @@ impl ApubCommunity {
.unique()
.filter(|inbox: &Url| inbox.host_str() != Some(&context.settings().hostname))
// Don't send to blocked instances
.filter(|inbox| check_apub_id_valid(inbox, &local_site_data).is_ok())
.filter(|inbox| {
check_apub_id_valid_with_strictness(inbox, false, &local_site_data, context.settings())
.is_ok()
})
.collect();

Ok(inboxes)
Expand Down
9 changes: 5 additions & 4 deletions crates/apub/src/objects/instance.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::{
check_apub_id_valid_with_strictness,
local_site_data_cached,
fetch_local_site_data,
objects::read_from_string_or_source_opt,
protocol::{
objects::{instance::Instance, LanguageTag},
Expand Down Expand Up @@ -113,14 +113,15 @@ impl Object for ApubSite {
expected_domain: &Url,
data: &Data<Self::DataType>,
) -> Result<(), LemmyError> {
check_apub_id_valid_with_strictness(apub.id.inner(), true, data).await?;
let local_site_data = fetch_local_site_data(data.pool()).await?;

check_apub_id_valid_with_strictness(apub.id.inner(), true, &local_site_data, data.settings())?;
verify_domains_match(expected_domain, apub.id.inner())?;

let local_site_data = local_site_data_cached(data.pool()).await?;
let slur_regex = &local_site_opt_to_slur_regex(&local_site_data.local_site);

check_slurs(&apub.name, slur_regex)?;
check_slurs_opt(&apub.summary, slur_regex)?;

Ok(())
}

Expand Down
12 changes: 9 additions & 3 deletions crates/apub/src/objects/person.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::{
check_apub_id_valid_with_strictness,
local_site_data_cached,
fetch_local_site_data,
objects::{instance::fetch_instance_actor_for_object, read_from_string_or_source_opt},
protocol::{
objects::{
Expand Down Expand Up @@ -118,13 +118,19 @@ impl Object for ApubPerson {
expected_domain: &Url,
context: &Data<Self::DataType>,
) -> Result<(), LemmyError> {
let local_site_data = local_site_data_cached(context.pool()).await?;
let local_site_data = fetch_local_site_data(context.pool()).await?;
let slur_regex = &local_site_opt_to_slur_regex(&local_site_data.local_site);

check_slurs(&person.preferred_username, slur_regex)?;
check_slurs_opt(&person.name, slur_regex)?;

verify_domains_match(person.id.inner(), expected_domain)?;
check_apub_id_valid_with_strictness(person.id.inner(), false, context).await?;
check_apub_id_valid_with_strictness(
person.id.inner(),
false,
&local_site_data,
context.settings(),
)?;

let bio = read_from_string_or_source_opt(&person.summary, &None, &person.source);
check_slurs_opt(&bio, slur_regex)?;
Expand Down
12 changes: 9 additions & 3 deletions crates/apub/src/objects/post.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::{
activities::{verify_is_public, verify_person_in_community},
check_apub_id_valid_with_strictness,
local_site_data_cached,
fetch_local_site_data,
objects::{read_from_string_or_source_opt, verify_is_remote_object},
protocol::{
objects::{
Expand Down Expand Up @@ -143,11 +143,17 @@ impl Object for ApubPost {
verify_is_remote_object(page.id.inner(), context.settings())?;
};

let local_site_data = fetch_local_site_data(context.pool()).await?;

let community = page.community(context).await?;
check_apub_id_valid_with_strictness(page.id.inner(), community.local, context).await?;
check_apub_id_valid_with_strictness(
page.id.inner(),
community.local,
&local_site_data,
context.settings(),
)?;
verify_person_in_community(&page.creator()?, &community, context).await?;

let local_site_data = local_site_data_cached(context.pool()).await?;
let slur_regex = &local_site_opt_to_slur_regex(&local_site_data.local_site);
check_slurs_opt(&page.name, slur_regex)?;

Expand Down
10 changes: 9 additions & 1 deletion crates/apub/src/objects/private_message.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use crate::{
check_apub_id_valid_with_strictness,
fetch_local_site_data,
objects::read_from_string_or_source,
protocol::{
objects::chat_message::{ChatMessage, ChatMessageType},
Expand Down Expand Up @@ -101,7 +102,14 @@ impl Object for ApubPrivateMessage {
verify_domains_match(note.id.inner(), expected_domain)?;
verify_domains_match(note.attributed_to.inner(), note.id.inner())?;

check_apub_id_valid_with_strictness(note.id.inner(), false, context).await?;
let local_site_data = fetch_local_site_data(context.pool()).await?;

check_apub_id_valid_with_strictness(
note.id.inner(),
false,
&local_site_data,
context.settings(),
)?;
let person = note.attributed_to.dereference(context).await?;
if person.banned {
return Err(LemmyError::from_message("Person is banned from site"));
Expand Down
14 changes: 10 additions & 4 deletions crates/apub/src/protocol/objects/group.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use crate::{
community_moderators::ApubCommunityModerators,
community_outbox::ApubCommunityOutbox,
},
local_site_data_cached,
fetch_local_site_data,
objects::{community::ApubCommunity, read_from_string_or_source_opt},
protocol::{
objects::{Endpoints, LanguageTag},
Expand Down Expand Up @@ -80,14 +80,20 @@ impl Group {
expected_domain: &Url,
context: &LemmyContext,
) -> Result<(), LemmyError> {
check_apub_id_valid_with_strictness(self.id.inner(), true, context).await?;
let local_site_data = fetch_local_site_data(context.pool()).await?;

check_apub_id_valid_with_strictness(
self.id.inner(),
true,
&local_site_data,
context.settings(),
)?;
verify_domains_match(expected_domain, self.id.inner())?;

let local_site_data = local_site_data_cached(context.pool()).await?;
let slur_regex = &local_site_opt_to_slur_regex(&local_site_data.local_site);

check_slurs(&self.preferred_username, slur_regex)?;
check_slurs_opt(&self.name, slur_regex)?;

let description = read_from_string_or_source_opt(&self.summary, &None, &self.source);
check_slurs_opt(&description, slur_regex)?;
Ok(())
Expand Down
7 changes: 5 additions & 2 deletions crates/db_schema/src/impls/instance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@ use crate::{
newtypes::InstanceId,
schema::{federation_allowlist, federation_blocklist, instance},
source::instance::{Instance, InstanceForm},
utils::{functions::coalesce_time, get_conn, naive_now, DbPool},
utils::{get_conn, naive_now, DbPool},
};
use diesel::{
dsl::{insert_into, now},
result::Error,
sql_types::{Nullable, Timestamp},
ExpressionMethods,
QueryDsl,
};
Expand Down Expand Up @@ -72,7 +73,7 @@ impl Instance {
let conn = &mut get_conn(pool).await?;
instance::table
.select(instance::domain)
.filter(coalesce_time(instance::updated, instance::published).lt(now - 3.days()))
.filter(coalesce(instance::updated, instance::published).lt(now - 3.days()))
.get_results(conn)
.await
}
Expand Down Expand Up @@ -110,3 +111,5 @@ impl Instance {
.await
}
}

sql_function! { fn coalesce(x: Nullable<Timestamp>, y: Timestamp) -> Timestamp; }
4 changes: 1 addition & 3 deletions crates/db_schema/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -272,15 +272,13 @@ static EMAIL_REGEX: Lazy<Regex> = Lazy::new(|| {
});

pub mod functions {
use diesel::sql_types::{BigInt, Nullable, Text, Timestamp};
use diesel::sql_types::{BigInt, Text, Timestamp};

sql_function! {
fn hot_rank(score: BigInt, time: Timestamp) -> Integer;
}

sql_function!(fn lower(x: Text) -> Text);

sql_function! { fn coalesce_time(x: Nullable<Timestamp>, y: Timestamp) -> Timestamp; }
}

pub const DELETED_REPLACEMENT_TEXT: &str = "*Permanently Deleted*";
Expand Down

0 comments on commit a898aca

Please sign in to comment.