Skip to content

Commit

Permalink
Merge branch 'master' into resending-email-invitations
Browse files Browse the repository at this point in the history
  • Loading branch information
alisonrclarke committed May 25, 2022
2 parents 2acb07f + 3040d67 commit 7bb4ecd
Show file tree
Hide file tree
Showing 11 changed files with 94 additions and 20 deletions.
2 changes: 1 addition & 1 deletion hepdata/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ def do_unload(records_to_unload):
@with_appcontext
def find_and_add_record_analyses():
"""Finds analyses such as Rivet and adds them to records."""
update_analyses.delay()
update_analyses()


@utils.command()
Expand Down
5 changes: 3 additions & 2 deletions hepdata/ext/elasticsearch/document_enhancers.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from dateutil.parser import parse
from flask import current_app

from hepdata.config import CFG_PUB_TYPE, CFG_DATA_TYPE, HISTFACTORY_FILE_TYPE, SITE_URL
from hepdata.config import CFG_PUB_TYPE, CFG_DATA_TYPE, HISTFACTORY_FILE_TYPE
from hepdata.ext.elasticsearch.config.record_mapping import mapping as es_mapping
from hepdata.modules.permissions.models import SubmissionParticipant
from hepdata.modules.submission.api import get_latest_hepsubmission
Expand Down Expand Up @@ -95,14 +95,15 @@ def add_analyses(doc):
:param doc:
:return:
"""
latest_submission = get_latest_hepsubmission(publication_recid=doc['recid'])
latest_submission = get_latest_hepsubmission(publication_recid=doc['recid'], overall_status='finished')

if latest_submission:
doc["analyses"] = []
for reference in latest_submission.resources:
if reference.file_type in current_app.config['ANALYSES_ENDPOINTS']:
doc["analyses"].append({'type': reference.file_type, 'analysis': reference.file_location})
elif reference.file_type == HISTFACTORY_FILE_TYPE:
SITE_URL = current_app.config.get('SITE_URL', 'https://www.hepdata.net')
landing_page_url = f"{SITE_URL}/record/resource/{reference.id}?landing_page=True"
doc["analyses"].append({'type': reference.file_type, 'analysis': landing_page_url})

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,9 @@


<div class="record-doi">
<a href="https://doi.org/{{ ctx.record.hepdata_doi }}"
target="_blank">https://doi.org/{{ ctx.record.hepdata_doi }}</a>
{% set hepdata_doi = ctx.record.hepdata_doi|replace('None', '10.17182/hepdata.' + ctx.record.recid|string) %}
<a href="https://doi.org/{{ hepdata_doi }}"
target="_blank">https://doi.org/{{ hepdata_doi }}</a>
</div>

<br/>
Expand Down
58 changes: 51 additions & 7 deletions hepdata/modules/records/utils/analyses.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,16 +30,16 @@
import requests

from hepdata.ext.elasticsearch.api import index_record_ids
from hepdata.modules.submission.api import get_latest_hepsubmission, \
is_resource_added_to_submission
from hepdata.modules.submission.models import DataResource
from hepdata.modules.submission.api import get_latest_hepsubmission, is_resource_added_to_submission
from hepdata.modules.submission.models import DataResource, HEPSubmission, data_reference_link

logging.basicConfig()
log = logging.getLogger(__name__)


@shared_task
def update_analyses():
"""Update (Rivet) analyses and remove outdated resources."""
endpoints = current_app.config["ANALYSES_ENDPOINTS"]
for analysis_endpoint in endpoints:

Expand All @@ -49,10 +49,13 @@ def update_analyses():

response = requests.get(endpoints[analysis_endpoint]["endpoint_url"])

if response:
if response and response.status_code == 200:

analyses = response.json()

analysis_resources = DataResource.query.filter_by(file_type=analysis_endpoint).all()

# Check for missing analyses.
for record in analyses:
submission = get_latest_hepsubmission(inspire_id=record, overall_status='finished')

Expand All @@ -61,29 +64,70 @@ def update_analyses():

for analysis in analyses[record]:
_resource_url = endpoints[analysis_endpoint]["url_template"].format(analysis)

if not is_resource_added_to_submission(submission.publication_recid, submission.version,
_resource_url):
print('Adding {} analysis to ins{} with URL {}'
.format(analysis_endpoint, record, _resource_url))

log.info('Adding {} analysis to ins{} with URL {}'.format(
analysis_endpoint, record, _resource_url)
)
new_resource = DataResource(
file_location=_resource_url,
file_type=analysis_endpoint)

submission.resources.append(new_resource)
num_new_resources += 1

else:

# Remove resource from 'analysis_resources' list.
resource = list(filter(lambda a: a.file_location == _resource_url, analysis_resources))[0]
analysis_resources.remove(resource)

if num_new_resources:

try:
db.session.add(submission)
db.session.commit()
index_record_ids([submission.publication_recid])
latest_submission = get_latest_hepsubmission(inspire_id=record)
if submission.version == latest_submission.version:
index_record_ids([submission.publication_recid])
except Exception as e:
db.session.rollback()
log.error(e)

else:
log.debug("An analysis is available in {0} but with no equivalent in HEPData (ins{1}).".format(
analysis_endpoint, record))

if analysis_resources:
# Extra resources that were not found in the analyses JSON file.
# Need to delete extra resources then reindex affected submissions.
# Only take action if latest version is finished (most important case).
try:
recids_to_reindex = []
for extra_analysis_resource in analysis_resources:
query = db.select([data_reference_link.columns.submission_id]).where(
data_reference_link.columns.dataresource_id == extra_analysis_resource.id)
results = db.session.execute(query)
for result in results:
submission_id = result[0]
submission = HEPSubmission.query.filter_by(id=submission_id).first()
latest_submission = get_latest_hepsubmission(
publication_recid=submission.publication_recid, overall_status='finished'
)
if submission and latest_submission and submission.version == latest_submission.version:
log.info('Removing {} analysis with URL {} from submission {} version {}'
.format(analysis_endpoint, extra_analysis_resource.file_location,
submission.publication_recid, submission.version))
db.session.delete(extra_analysis_resource)
recids_to_reindex.append(submission.publication_recid)
db.session.commit()
if recids_to_reindex:
index_record_ids(list(set(recids_to_reindex))) # remove duplicates before indexing
except Exception as e:
db.session.rollback()
log.error(e)

else:
log.debug("No endpoint url configured for {0}".format(analysis_endpoint))
4 changes: 2 additions & 2 deletions hepdata/modules/records/utils/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@

ALLOWED_EXTENSIONS = ('.zip', '.tar', '.tar.gz', '.tgz', '.oldhepdata', '.yaml', '.yaml.gz')

HISTFACTORY_EXTENSIONS = ALLOWED_EXTENSIONS[:4]
HISTFACTORY_TERMS = ("histfactory", "pyhf", "likelihoods")
HISTFACTORY_EXTENSIONS = ALLOWED_EXTENSIONS[:4] + ('.tar.xz', '.json')
HISTFACTORY_TERMS = ("histfactory", "pyhf", "likelihoods", "workspaces")


def contains_accepted_url(file):
Expand Down
3 changes: 2 additions & 1 deletion hepdata/modules/records/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import logging
import json
import time
import mimetypes
from dateutil import parser
from invenio_accounts.models import User
from flask_login import login_required, login_user
Expand Down Expand Up @@ -695,7 +696,7 @@ def get_resource(resource_id):
print("Resource is at: " + resource_obj.file_location)
try:
with open(resource_obj.file_location, 'r', encoding='utf-8') as resource_file:
contents = resource_file.read()
contents = resource_file.read() if mimetypes.guess_type(resource_obj.file_location)[0] != 'application/x-tar' else 'Binary'
except UnicodeDecodeError:
contents = 'Binary'

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ <h4 class="record-header">
<div class="analysis">
<a href="{{ analysis.analysis }}" target="_blank"><span
class="fa fa-line-chart"></span>
{{ analysis.type }}{% if analysis.type != config.HISTFACTORY_FILE_TYPE %}Analysis{% endif %}
{{ analysis.type }}{% if analysis.type != config.HISTFACTORY_FILE_TYPE %} Analysis{% endif %}
</a>
</div>
{% endfor %}
Expand Down
8 changes: 6 additions & 2 deletions hepdata/modules/submission/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
# 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
#

from flask import Blueprint, render_template, request, jsonify
from flask import Blueprint, render_template, request, jsonify, abort
from flask_login import login_required, current_user
from invenio_db import db

Expand All @@ -28,6 +28,7 @@
from hepdata.modules.records.utils.submission import \
get_or_create_hepsubmission
from hepdata.modules.records.utils.workflow import create_record
from hepdata.utils.users import user_is_admin_or_coordinator

blueprint = Blueprint(
'submission',
Expand All @@ -41,7 +42,10 @@
@blueprint.route('', methods=['GET'])
@login_required
def submit_ui():
return render_template('hepdata_submission/submit.html')
if user_is_admin_or_coordinator(current_user):
return render_template('hepdata_submission/submit.html')
else:
abort(403)


@blueprint.route('', methods=['POST'])
Expand Down
2 changes: 1 addition & 1 deletion hepdata/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,4 @@
and parsed by ``setup.py``.
"""

__version__ = "0.9.4dev20220505"
__version__ = "0.9.4dev20220520"
23 changes: 23 additions & 0 deletions tests/records_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
get_resource_mimetype, create_breadcrumb_text, format_submission, \
format_resource
from hepdata.modules.records.importer.api import import_records
from hepdata.modules.records.utils.analyses import update_analyses
from hepdata.modules.records.utils.submission import get_or_create_hepsubmission, process_submission_directory, do_finalise, unload_submission
from hepdata.modules.records.utils.common import get_record_by_id, get_record_contents
from hepdata.modules.records.utils.data_processing_utils import generate_table_structure
Expand Down Expand Up @@ -1013,3 +1014,25 @@ def test_create_breadcrumb_text():
assert ctx == {
'breadcrumb_text': 'Suzy Sheep et al.'
}


def test_update_analyses(app):
""" Test update of Rivet analyses """

# Import a record that already has a Rivet analysis attached (but with '#' in the URL)
import_records(['ins1203852'], synchronous=True)
analysis_resources = DataResource.query.filter_by(file_type='rivet').all()
assert len(analysis_resources) == 1
assert analysis_resources[0].file_location == 'http://rivet.hepforge.org/analyses#ATLAS_2012_I1203852'

# Call update_analyses(): should add new resource and delete existing one
update_analyses()
analysis_resources = DataResource.query.filter_by(file_type='rivet').all()
assert len(analysis_resources) == 1
assert analysis_resources[0].file_location == 'http://rivet.hepforge.org/analyses/ATLAS_2012_I1203852'

# Call update_analyses() again: should be no further changes (but covers more lines of code)
update_analyses()
analysis_resources = DataResource.query.filter_by(file_type='rivet').all()
assert len(analysis_resources) == 1
assert analysis_resources[0].file_location == 'http://rivet.hepforge.org/analyses/ATLAS_2012_I1203852'
2 changes: 1 addition & 1 deletion tests/submission_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def test_url_pattern():
("test.zip", "Some sort of file", "HistFactory", True),
("test.zip", "Some sort of file", "histfactory", True),
("pyhf.tar.gz", "A file", None, False),
("pyhf.json", "HistFactory JSON file", None, False),
("pyhf.json", "HistFactory JSON file", None, True),
("test.zip", "Some sort of file", "json", False),
]
)
Expand Down

0 comments on commit 7bb4ecd

Please sign in to comment.