Skip to content
This repository has been archived by the owner on Sep 21, 2023. It is now read-only.

Commit

Permalink
Add full queue sync (#49)
Browse files Browse the repository at this point in the history
* Improve actions in auto report handling

The actions on Blossom are now always executed if necessary, even if the Reddit actions are not necessary

* Add sync of Blossom queue to archiving steps

* Fix formatting
  • Loading branch information
TimJentzsch committed Jun 30, 2023
1 parent 216a9d9 commit 08384ec
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 6 deletions.
6 changes: 4 additions & 2 deletions tor_archivist/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,16 @@ def foo(self):
# as expected because the lookup logic is replicated in __get__ for
# manual invocation.

def __init__(self, func: Callable, name: Optional[str]=None, doc: Optional[str]=None) -> None:
def __init__(
self, func: Callable, name: Optional[str] = None, doc: Optional[str] = None
) -> None:
"""Create a new cachable property."""
self.__name__ = name or func.__name__
self.__module__ = func.__module__
self.__doc__ = doc or func.__doc__
self.func = func

def __get__(self, obj: Any, _type: Any=None) -> Any:
def __get__(self, obj: Any, _type: Any = None) -> Any:
if obj is None:
return self
value = obj.__dict__.get(self.__name__, _missing)
Expand Down
56 changes: 53 additions & 3 deletions tor_archivist/core/queue_sync.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
"""Functionality to sync the Blossom queue with the queue on Reddit."""
import logging
import time
from datetime import datetime, timedelta, timezone
from typing import Any, Dict, Optional

from tor_archivist.core.blossom import (
Expand All @@ -20,6 +22,7 @@

NSFW_POST_REPORT_REASON = "Post should be marked as NSFW"
BOT_USERNAMES = ["tor_archivist", "blossom", "tor_tester"]
QUEUE_TIMEOUT = timedelta(hours=18)


def _get_report_reason(r_submission: Any) -> Optional[str]:
Expand Down Expand Up @@ -51,16 +54,26 @@ def _auto_report_handling(cfg: Config, r_submission: Any, b_submission: Dict, re
partner_submission = cfg.reddit.submission(url=r_submission.url)

# Check if the post is marked as NSFW on the partner sub
if not r_submission.over_18 and partner_submission.over_18:
nsfw_on_reddit(r_submission)
nsfw_on_blossom(cfg, b_submission)
if partner_submission.over_18:
if not r_submission.over_18:
nsfw_on_reddit(r_submission)
if not b_submission["nsfw"]:
nsfw_on_blossom(cfg, b_submission)

# Check if the post has been removed on the partner sub
if partner_submission.removed_by_category:
# Removed on the partner sub, it's safe to remove
# But only do it if the submission is not marked as removed already
if not r_submission.removed_by_category:
remove_on_reddit(r_submission)
if not b_submission["removed_from_queue"]:
remove_on_blossom(cfg, b_submission)
# We can ignore the report
return True

# Check if the post has been removed by a mod
if r_submission.removed_by_category:
if not b_submission["removed_from_reddit"]:
remove_on_blossom(cfg, b_submission)
# We can ignore the report
return True
Expand Down Expand Up @@ -132,3 +145,40 @@ def track_post_reports(cfg: Config) -> None:
continue

report_on_blossom(cfg, b_submission, reason)


def full_blossom_queue_sync(cfg: Config) -> None:
"""Make sure all posts in Blossom's queue still exist in Reddit."""
queue_start = datetime.now(tz=timezone.utc) - QUEUE_TIMEOUT

size = 500
page = 1

# Fetch all unclaimed posts from the queue
while True:
queue_response = cfg.blossom.get(
"submission/",
params={
"page_size": size,
"page": page,
"claimed_by__isnull": True,
"removed_from_queue": False,
"create_time__gte": queue_start.isoformat(),
},
)
if not queue_response.ok:
logging.error(f"Failed to get queue from Blossom:\n{queue_response}")
return

data = queue_response.json()["results"]
page += 1

# Sync up the queue submissions
for b_submission in data:
logging.info(f"Syncing up Blossom queue for {b_submission['tor_url']}")
r_submission = cfg.reddit.submission(url=b_submission["tor_url"])
_auto_report_handling(cfg, r_submission, b_submission, "")
time.sleep(1)

if len(data) < size or queue_response.json()["next"] is None:
break
10 changes: 9 additions & 1 deletion tor_archivist/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,11 @@
from tor_archivist.core.config import Config, config
from tor_archivist.core.helpers import get_id_from_url, run_until_dead
from tor_archivist.core.initialize import build_bot
from tor_archivist.core.queue_sync import track_post_removal, track_post_reports
from tor_archivist.core.queue_sync import (
full_blossom_queue_sync,
track_post_removal,
track_post_reports,
)
from tor_archivist.core.reddit import nsfw_on_reddit

with current_zipfile() as archive:
Expand Down Expand Up @@ -165,6 +169,10 @@ def run(cfg: Config) -> None:
process_expired_posts(cfg)
else:
logging.info("Archiving of expired posts is disabled!")

logging.info("Doing sync of Blossom queue...")
full_blossom_queue_sync(cfg)

# Reset counter
cfg.archive_run_step = 0
else:
Expand Down

0 comments on commit 08384ec

Please sign in to comment.