Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion admin/management/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,10 +150,12 @@ def post(self, request):
class BulkResync(ManagementCommandPermissionView):

def post(self, request):
missing_dois_only = request.POST.get('missing_preprint_dois_only', False)
sync_doi_metadata.apply_async(kwargs={
'modified_date': timezone.now(),
'batch_size': None,
'dry_run': False
'dry_run': False,
'missing_preprint_dois_only': missing_dois_only
})
messages.success(request, 'Resyncing with CrossRef and DataCite! It will take some time.')
return redirect(reverse('management:commands'))
Expand Down
3 changes: 2 additions & 1 deletion admin/templates/management/commands.html
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ <h4> <u>Ban spam users by regular expression</u>
<label>Nodes:</label> <input type="checkbox" name="node_ban" checked /><br>
<label>Registrations:</label> <input type="checkbox" name="registration_ban" checked /><br>
<label>Preprints:</label> <input type="checkbox" name="preprint_ban" checked /><br>
<input class="btn btn-danger" type="submit" value="Run" style="color: red" />
<input class="btn btn-danger" type="submit" value="Run" style="color: white" />
</form>
</ul>
<section>
Expand Down Expand Up @@ -133,6 +133,7 @@ <h4><u>Resync with CrossRef and DataCite</u></h4>
<form method="post"
action="{% url 'management:bulk-resync'%}">
{% csrf_token %}
<label>Only preprints missing DOI:</label> <input type="checkbox" name="missing_preprint_dois_only"/><br>
<nav>
<input class="btn btn-success" type="submit" value="Run" />
</nav>
Expand Down
35 changes: 30 additions & 5 deletions osf/management/commands/sync_doi_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from django.contrib.contenttypes.models import ContentType
from django.core.management.base import BaseCommand
from osf.models import GuidMetadataRecord, Identifier, Registration
from osf.models import GuidMetadataRecord, Identifier, Registration, Preprint
from framework.celery_tasks import app

logger = logging.getLogger(__name__)
Expand All @@ -14,26 +14,30 @@
RATE_LIMIT_RETRY_DELAY = 60 * 5


@app.task(name='osf.management.commands.sync_doi_metadata', max_retries=5, default_retry_delay=RATE_LIMIT_RETRY_DELAY)
def sync_identifier_doi(identifier_id):
@app.task(name='osf.management.commands.sync_doi_metadata', bind=True, acks_late=True, max_retries=5, default_retry_delay=RATE_LIMIT_RETRY_DELAY)
def sync_identifier_doi(self, identifier_id):
try:
identifier = Identifier.objects.get(id=identifier_id)
identifier.referent.request_identifier_update('doi')
identifier.save()
logger.info(f'Doi update for {identifier.value} complete')
except Exception as err:
logger.warning(f'[{err.__class__.__name__}] Doi update for {identifier.value} failed because of error: {err}')
sync_identifier_doi.retry(exc=err, countdown=RATE_LIMIT_RETRY_DELAY)
self.retry()


@app.task(name='osf.management.commands.sync_doi_metadata_command', max_retries=5, default_retry_delay=RATE_LIMIT_RETRY_DELAY)
def sync_doi_metadata(modified_date, batch_size=100, dry_run=True, sync_private=False, rate_limit=100):
def sync_doi_metadata(modified_date, batch_size=100, dry_run=True, sync_private=False, rate_limit=100, missing_preprint_dois_only=False):
identifiers = Identifier.objects.filter(
category='doi',
deleted__isnull=True,
modified__lte=modified_date,
object_id__isnull=False,
)
if missing_preprint_dois_only:
sync_preprint_missing_dois.apply_async(kwargs={'rate_limit': rate_limit})
identifiers = identifiers.exclude(content_type=ContentType.objects.get_for_model(Preprint))

if batch_size:
identifiers = identifiers[:batch_size]
rate_limit = batch_size if batch_size > rate_limit else rate_limit
Expand All @@ -55,6 +59,27 @@ def sync_doi_metadata(modified_date, batch_size=100, dry_run=True, sync_private=
sync_identifier_doi.apply_async(kwargs={'identifier_id': identifier.id})


@app.task(name='osf.management.commands.sync_preprint_missing_dois', max_retries=5, default_retry_delay=RATE_LIMIT_RETRY_DELAY)
def sync_preprint_missing_dois(rate_limit):
preprints = Preprint.objects.filter(preprint_doi_created=None)
for record_number, preprint in enumerate(preprints, 1):
# in order to not reach rate limit that CrossRef has, we make delay
if not record_number % rate_limit:
time.sleep(RATE_LIMIT_RETRY_DELAY)

async_request_identifier_update.apply_async(kwargs={'preprint_id': preprint._id})


@app.task(name='osf.management.commands.async_request_identifier_update', bind=True, acks_late=True, max_retries=5, default_retry_delay=RATE_LIMIT_RETRY_DELAY)
def async_request_identifier_update(self, preprint_id):
preprint = Preprint.load(preprint_id)
try:
preprint.request_identifier_update('doi', create=True)
except Exception as err:
logger.warning(f'[{err.__class__.__name__}] Doi creation failed for the preprint with id {preprint._id} because of error: {err}')
self.retry()


@app.task(name='osf.management.commands.sync_doi_empty_metadata_dataarchive_registrations_command', max_retries=5, default_retry_delay=RATE_LIMIT_RETRY_DELAY)
def sync_doi_empty_metadata_dataarchive_registrations(modified_date, batch_size=100, dry_run=True, sync_private=False, rate_limit=100):
registrations_ids = list(
Expand Down
Loading