Skip to content

Commit

Permalink
Improved remote thread fetching (mastodon#10106)
Browse files Browse the repository at this point in the history
* Fetch up to 5 replies when discovering a new remote status

This is used for resolving threads downwards. The originating
server must add a “replies” attributes with such replies for it to
be useful.

* Add some tests for ActivityPub::FetchRepliesWorker

* Add specs for ActivityPub::FetchRepliesService

* Serialize up to 5 public self-replies for ActivityPub notes

* Add specs for ActivityPub::NoteSerializer

* Move exponential backoff logic to a worker concern

* Fetch first page of paginated collections when fetching thread replies

* Add specs for paginated collections in replies

* Move Note replies serialization to a first CollectionPage

The collection isn't actually paginable yet as it has no id nor
a `next` field. This may come in another PR.

* Use pluck(:uri) instead of map(&:uri) to improve performances

* Fix fetching replies when they are in a CollectionPage
  • Loading branch information
ClearlyClaire authored and hiyuki2578 committed Oct 2, 2019
1 parent 1bcdb53 commit 401ec89
Show file tree
Hide file tree
Showing 13 changed files with 333 additions and 7 deletions.
10 changes: 10 additions & 0 deletions app/lib/activitypub/activity/create.rb
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def process_status
end

resolve_thread(@status)
fetch_replies(@status)
distribute(@status)
forward_for_reply if @status.public_visibility? || @status.unlisted_visibility?
end
Expand Down Expand Up @@ -213,6 +214,15 @@ def resolve_thread(status)
ThreadResolveWorker.perform_async(status.id, in_reply_to_uri)
end

def fetch_replies(status)
collection = @object['replies']
return if collection.nil?
replies = ActivityPub::FetchRepliesService.new.call(status, collection, false)
return if replies.present?
uri = value_or_id(collection)
ActivityPub::FetchRepliesWorker.perform_async(status.id, uri) unless uri.nil?
end

def conversation_from_uri(uri)
return nil if uri.nil?
return Conversation.find_by(id: OStatus::TagManager.instance.unique_tag_to_local_id(uri, 'Conversation')) if OStatus::TagManager.instance.local_id?(uri)
Expand Down
4 changes: 4 additions & 0 deletions app/models/concerns/status_threading_concern.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ def descendants(limit, account = nil, max_child_id = nil, since_child_id = nil,
find_statuses_from_tree_path(descendant_ids(limit, max_child_id, since_child_id, depth), account, promote: true)
end

def self_replies(limit)
account.statuses.where(in_reply_to_id: id, visibility: [:public, :unlisted]).reorder(id: :asc).limit(limit)
end

private

def ancestor_ids(limit)
Expand Down
2 changes: 1 addition & 1 deletion app/presenters/activitypub/collection_presenter.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# frozen_string_literal: true

class ActivityPub::CollectionPresenter < ActiveModelSerializers::Model
attributes :id, :type, :size, :items, :part_of, :first, :last, :next, :prev
attributes :id, :type, :size, :items, :page, :part_of, :first, :last, :next, :prev
end
5 changes: 3 additions & 2 deletions app/serializers/activitypub/collection_serializer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ def self.serializer_for(model, options)
super
end

attributes :id, :type
attribute :id, if: -> { object.id.present? }
attribute :type
attribute :total_items, if: -> { object.size.present? }
attribute :next, if: -> { object.next.present? }
attribute :prev, if: -> { object.prev.present? }
Expand Down Expand Up @@ -37,6 +38,6 @@ def ordered?
end

def page?
object.part_of.present?
object.part_of.present? || object.page.present?
end
end
13 changes: 13 additions & 0 deletions app/serializers/activitypub/note_serializer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ class ActivityPub::NoteSerializer < ActiveModel::Serializer
has_many :media_attachments, key: :attachment
has_many :virtual_tags, key: :tag

has_one :replies, serializer: ActivityPub::CollectionSerializer

def id
ActivityPub::TagManager.instance.uri_for(object)
end
Expand All @@ -33,6 +35,17 @@ def content_map
{ object.language => Formatter.instance.format(object) }
end

def replies
ActivityPub::CollectionPresenter.new(
type: :unordered,
first: ActivityPub::CollectionPresenter.new(
type: :unordered,
page: true,
items: object.self_replies(5).pluck(:uri)
)
)
end

def language?
object.language.present?
end
Expand Down
60 changes: 60 additions & 0 deletions app/services/activitypub/fetch_replies_service.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# frozen_string_literal: true

class ActivityPub::FetchRepliesService < BaseService
include JsonLdHelper

def call(parent_status, collection_or_uri, allow_synchronous_requests = true)
@account = parent_status.account
@allow_synchronous_requests = allow_synchronous_requests

@items = collection_items(collection_or_uri)
return if @items.nil?

FetchReplyWorker.push_bulk(filtered_replies)

@items
end

private

def collection_items(collection_or_uri)
collection = fetch_collection(collection_or_uri)
return unless collection.is_a?(Hash)

collection = fetch_collection(collection['first']) if collection['first'].present?
return unless collection.is_a?(Hash)

case collection['type']
when 'Collection', 'CollectionPage'
collection['items']
when 'OrderedCollection', 'OrderedCollectionPage'
collection['orderedItems']
end
end

def fetch_collection(collection_or_uri)
return collection_or_uri if collection_or_uri.is_a?(Hash)
return unless @allow_synchronous_requests
return if invalid_origin?(collection_or_uri)
collection = fetch_resource_without_id_validation(collection_or_uri)
raise Mastodon::UnexpectedResponseError if collection.nil?
collection
end

def filtered_replies
# Only fetch replies to the same server as the original status to avoid
# amplification attacks.

# Also limit to 5 fetched replies to limit potential for DoS.
@items.map { |item| value_or_id(item) }.reject { |uri| invalid_origin?(uri) }.take(5)
end

def invalid_origin?(url)
return true if unsupported_uri_scheme?(url)

needle = Addressable::URI.parse(url).host
haystack = Addressable::URI.parse(@account.uri).host

!haystack.casecmp(needle).zero?
end
end
12 changes: 12 additions & 0 deletions app/workers/activitypub/fetch_replies_worker.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# frozen_string_literal: true

class ActivityPub::FetchRepliesWorker
include Sidekiq::Worker
include ExponentialBackoff

sidekiq_options queue: 'pull', retry: 3

def perform(parent_status_id, replies_uri)
ActivityPub::FetchRepliesService.new.call(Status.find(parent_status_id), replies_uri)
end
end
11 changes: 11 additions & 0 deletions app/workers/concerns/exponential_backoff.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# frozen_string_literal: true

module ExponentialBackoff
extend ActiveSupport::Concern

included do
sidekiq_retry_in do |count|
15 + 10 * (count**4) + rand(10 * (count**4))
end
end
end
12 changes: 12 additions & 0 deletions app/workers/fetch_reply_worker.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# frozen_string_literal: true

class FetchReplyWorker
include Sidekiq::Worker
include ExponentialBackoff

sidekiq_options queue: 'pull', retry: 3

def perform(child_url)
FetchRemoteStatusService.new.call(child_url)
end
end
5 changes: 1 addition & 4 deletions app/workers/thread_resolve_worker.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,10 @@

class ThreadResolveWorker
include Sidekiq::Worker
include ExponentialBackoff

sidekiq_options queue: 'pull', retry: 3

sidekiq_retry_in do |count|
15 + 10 * (count**4) + rand(10 * (count**4))
end

def perform(child_status_id, parent_url)
child_status = Status.find(child_status_id)
parent_status = FetchRemoteStatusService.new.call(parent_url)
Expand Down
44 changes: 44 additions & 0 deletions spec/serializers/activitypub/note_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# frozen_string_literal: true

require 'rails_helper'

describe ActivityPub::NoteSerializer do
let!(:account) { Fabricate(:account) }
let!(:other) { Fabricate(:account) }
let!(:parent) { Fabricate(:status, account: account, visibility: :public) }
let!(:reply1) { Fabricate(:status, account: account, thread: parent, visibility: :public) }
let!(:reply2) { Fabricate(:status, account: account, thread: parent, visibility: :public) }
let!(:reply3) { Fabricate(:status, account: other, thread: parent, visibility: :public) }
let!(:reply4) { Fabricate(:status, account: account, thread: parent, visibility: :public) }
let!(:reply5) { Fabricate(:status, account: account, thread: parent, visibility: :direct) }

before(:each) do
@serialization = ActiveModelSerializers::SerializableResource.new(parent, serializer: ActivityPub::NoteSerializer, adapter: ActivityPub::Adapter)
end

subject { JSON.parse(@serialization.to_json) }

it 'has a Note type' do
expect(subject['type']).to eql('Note')
end

it 'has a replies collection' do
expect(subject['replies']['type']).to eql('Collection')
end

it 'has a replies collection with a first Page' do
expect(subject['replies']['first']['type']).to eql('CollectionPage')
end

it 'includes public self-replies in its replies collection' do
expect(subject['replies']['first']['items']).to include(reply1.uri, reply2.uri, reply4.uri)
end

it 'does not include replies from others in its replies collection' do
expect(subject['replies']['first']['items']).to_not include(reply3.uri)
end

it 'does not include replies with direct visibility in its replies collection' do
expect(subject['replies']['first']['items']).to_not include(reply5.uri)
end
end
122 changes: 122 additions & 0 deletions spec/services/activitypub/fetch_replies_service_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
require 'rails_helper'

RSpec.describe ActivityPub::FetchRepliesService, type: :service do
let(:actor) { Fabricate(:account, domain: 'example.com', uri: 'http://example.com/account') }
let(:status) { Fabricate(:status, account: actor) }
let(:collection_uri) { 'http://example.com/replies/1' }

let(:items) do
[
'http://example.com/self-reply-1',
'http://example.com/self-reply-2',
'http://example.com/self-reply-3',
'http://other.com/other-reply-1',
'http://other.com/other-reply-2',
'http://other.com/other-reply-3',
'http://example.com/self-reply-4',
'http://example.com/self-reply-5',
'http://example.com/self-reply-6',
]
end

let(:payload) do
{
'@context': 'https://www.w3.org/ns/activitystreams',
type: 'Collection',
id: collection_uri,
items: items,
}.with_indifferent_access
end

subject { described_class.new }

describe '#call' do
context 'when the payload is a Collection with inlined replies' do
context 'when passing the collection itself' do
it 'spawns workers for up to 5 replies on the same server' do
allow(FetchReplyWorker).to receive(:push_bulk)
subject.call(status, payload)
expect(FetchReplyWorker).to have_received(:push_bulk).with(['http://example.com/self-reply-1', 'http://example.com/self-reply-2', 'http://example.com/self-reply-3', 'http://example.com/self-reply-4', 'http://example.com/self-reply-5'])
end
end

context 'when passing the URL to the collection' do
before do
stub_request(:get, collection_uri).to_return(status: 200, body: Oj.dump(payload))
end

it 'spawns workers for up to 5 replies on the same server' do
allow(FetchReplyWorker).to receive(:push_bulk)
subject.call(status, collection_uri)
expect(FetchReplyWorker).to have_received(:push_bulk).with(['http://example.com/self-reply-1', 'http://example.com/self-reply-2', 'http://example.com/self-reply-3', 'http://example.com/self-reply-4', 'http://example.com/self-reply-5'])
end
end
end

context 'when the payload is an OrderedCollection with inlined replies' do
let(:payload) do
{
'@context': 'https://www.w3.org/ns/activitystreams',
type: 'OrderedCollection',
id: collection_uri,
orderedItems: items,
}.with_indifferent_access
end

context 'when passing the collection itself' do
it 'spawns workers for up to 5 replies on the same server' do
allow(FetchReplyWorker).to receive(:push_bulk)
subject.call(status, payload)
expect(FetchReplyWorker).to have_received(:push_bulk).with(['http://example.com/self-reply-1', 'http://example.com/self-reply-2', 'http://example.com/self-reply-3', 'http://example.com/self-reply-4', 'http://example.com/self-reply-5'])
end
end

context 'when passing the URL to the collection' do
before do
stub_request(:get, collection_uri).to_return(status: 200, body: Oj.dump(payload))
end

it 'spawns workers for up to 5 replies on the same server' do
allow(FetchReplyWorker).to receive(:push_bulk)
subject.call(status, collection_uri)
expect(FetchReplyWorker).to have_received(:push_bulk).with(['http://example.com/self-reply-1', 'http://example.com/self-reply-2', 'http://example.com/self-reply-3', 'http://example.com/self-reply-4', 'http://example.com/self-reply-5'])
end
end
end

context 'when the payload is a paginated Collection with inlined replies' do
let(:payload) do
{
'@context': 'https://www.w3.org/ns/activitystreams',
type: 'Collection',
id: collection_uri,
first: {
type: 'CollectionPage',
partOf: collection_uri,
items: items,
}
}.with_indifferent_access
end

context 'when passing the collection itself' do
it 'spawns workers for up to 5 replies on the same server' do
allow(FetchReplyWorker).to receive(:push_bulk)
subject.call(status, payload)
expect(FetchReplyWorker).to have_received(:push_bulk).with(['http://example.com/self-reply-1', 'http://example.com/self-reply-2', 'http://example.com/self-reply-3', 'http://example.com/self-reply-4', 'http://example.com/self-reply-5'])
end
end

context 'when passing the URL to the collection' do
before do
stub_request(:get, collection_uri).to_return(status: 200, body: Oj.dump(payload))
end

it 'spawns workers for up to 5 replies on the same server' do
allow(FetchReplyWorker).to receive(:push_bulk)
subject.call(status, collection_uri)
expect(FetchReplyWorker).to have_received(:push_bulk).with(['http://example.com/self-reply-1', 'http://example.com/self-reply-2', 'http://example.com/self-reply-3', 'http://example.com/self-reply-4', 'http://example.com/self-reply-5'])
end
end
end
end
end
Loading

0 comments on commit 401ec89

Please sign in to comment.