Skip to content

Commit

Permalink
Merge branch 'master' into retry-reset-failed-submissions
Browse files Browse the repository at this point in the history
  • Loading branch information
alisonrclarke committed Feb 25, 2021
2 parents 50d1ca4 + ce81b54 commit 43b8090
Show file tree
Hide file tree
Showing 5 changed files with 96 additions and 27 deletions.
51 changes: 30 additions & 21 deletions hepdata/modules/converter/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,14 @@

"""HEPData Converter."""

import contextlib
import tempfile
import zipfile
from shutil import rmtree

from shutil import move

from flask import current_app
from hepdata_converter_ws_client import convert

from hepdata.config import CFG_CONVERTER_URL, CFG_CONVERTER_TIMEOUT
Expand All @@ -43,30 +45,21 @@ def convert_zip_archive(input_archive, output_archive, options):
:param options:
:return: output_file
"""
input_root_dir = tempfile.mkdtemp()
with zipfile.ZipFile(input_archive, 'r') as zip_archive:
zip_archive.extractall(path=input_root_dir)

# Find the appropriate file/directory in the input archive
input = options.get('input_format', 'yaml')
validation = find_file_in_directory(
input_root_dir,
lambda x: x == 'submission.yaml' if input == 'yaml' else x.endswith('.oldhepdata')
)
if not validation:
return None
with prepare_data_folder(input_archive, input) as validation:
if not validation:
return None

input_directory, input_file = validation
input_directory, input_file = validation

successful = convert(
CFG_CONVERTER_URL,
input_directory if input == 'yaml' else input_file,
output=output_archive,
options=options,
extract=False,
timeout=CFG_CONVERTER_TIMEOUT,
)
rmtree(input_root_dir)
successful = convert(
CFG_CONVERTER_URL,
input_directory if input == 'yaml' else input_file,
output=output_archive,
options=options,
extract=False,
timeout=CFG_CONVERTER_TIMEOUT,
)

# Error occurred, the output is a HTML file
if not successful:
Expand Down Expand Up @@ -99,3 +92,19 @@ def convert_oldhepdata_to_yaml(input_path, output_path):
)

return successful


@contextlib.contextmanager
def prepare_data_folder(input_archive, input_format):
input_root_dir = tempfile.mkdtemp(dir=current_app.config['CFG_TMPDIR'])
try:
with zipfile.ZipFile(input_archive, 'r') as zip_archive:
zip_archive.extractall(path=input_root_dir)

# Find the appropriate file/directory in the input archive
yield find_file_in_directory(
input_root_dir,
lambda x: x == 'submission.yaml' if input_format == 'yaml' else x.endswith('.oldhepdata')
)
finally:
rmtree(input_root_dir)
2 changes: 1 addition & 1 deletion hepdata/modules/converter/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -620,7 +620,7 @@ def create_original_with_resources(submission, data_filepath, output_path):
# There is a resources directory from when this record was imported
# from the old hepdata site. We need to create a new zip with the
# contents of data_filepath and resources
with tempfile.TemporaryDirectory() as tmpdir:
with tempfile.TemporaryDirectory(dir=current_app.config['CFG_TMPDIR']) as tmpdir:
# Copy resources directory into 'contents' dir in temp directory
contents_path = os.path.join(tmpdir, 'contents')
shutil.copytree(resource_location, contents_path)
Expand Down
37 changes: 33 additions & 4 deletions hepdata/modules/records/utils/submission.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,12 @@
from flask import current_app
from flask_celeryext import create_celery_app
from flask_login import current_user
from hepdata_converter_ws_client import get_data_size
from hepdata.config import CFG_DATA_TYPE, CFG_PUB_TYPE
from hepdata.ext.elasticsearch.admin_view.api import AdminIndexer
from hepdata.ext.elasticsearch.api import get_records_matching_field, \
delete_item_from_index, index_record_ids, push_data_keywords
from hepdata.modules.converter import prepare_data_folder
from hepdata.modules.converter.tasks import convert_and_store
from hepdata.modules.email.api import send_finalised_email
from hepdata.modules.permissions.models import SubmissionParticipant
Expand All @@ -47,7 +49,8 @@
get_license, infer_file_type, encode_string, zipdir, get_record_by_id, contains_accepted_url
from hepdata.modules.records.utils.common import get_or_create
from hepdata.modules.records.utils.data_files import get_data_path_for_record, \
cleanup_old_files, delete_all_files, delete_packaged_file
cleanup_old_files, delete_all_files, delete_packaged_file, \
find_submission_data_file_path
from hepdata.modules.records.utils.doi_minter import reserve_dois_for_data_submissions, reserve_doi_for_hepsubmission, \
generate_dois_for_submission
from hepdata.modules.records.utils.validators import get_data_validator, get_submission_validator
Expand Down Expand Up @@ -564,10 +567,36 @@ def process_submission_directory(basepath, submission_file_path, recid,

if len(errors) is 0:
errors = package_submission(basepath, recid, hepsubmission)
reserve_dois_for_data_submissions(publication_recid=recid, version=hepsubmission.version)

admin_indexer = AdminIndexer()
admin_indexer.index_submission(hepsubmission)
# Check the size of the upload to ensure it can be converted
data_filepath = find_submission_data_file_path(hepsubmission)
with prepare_data_folder(data_filepath, 'yaml') as filepaths:
input_directory, input_file = filepaths
# Create options that look like a worst-case (biggest)
# conversions (using yoda-like options as they include rivet
# analysis
dummy_inspire_id = hepsubmission.inspire_id or '0000000'
options = {
'input_format': 'yaml',
'output_format': 'yoda',
'filename': f'HEPData-ins{dummy_inspire_id}-v{hepsubmission.version}-yoda',
'validator_schema_version': '0.1.0',
'hepdata_doi': f'10.17182/hepdata.{recid}.v{hepsubmission.version}',
'rivet_analysis_name': f'ATLAS_2020_I{dummy_inspire_id}'
}
data_size = get_data_size(input_directory, options)
if data_size > current_app.config['UPLOAD_MAX_SIZE']:
errors["Archive"] = [{
"level": "error",
"message": "Archive is too big for conversion to other formats. (%s bytes would be sent to converter; maximum size is %s.)"
% (data_size, current_app.config['UPLOAD_MAX_SIZE'])
}]

if len(errors) == 0:
reserve_dois_for_data_submissions(publication_recid=recid, version=hepsubmission.version)

admin_indexer = AdminIndexer()
admin_indexer.index_submission(hepsubmission)

else:

Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ Flask-Cors==3.0.2
Flask-Login==0.3.2
gevent==1.4.0
gunicorn==19.5.0
hepdata-converter-ws-client==0.2.0
hepdata-converter-ws-client==0.2.1
hepdata-validator==0.2.3
idna<2.8,>=2.5 # Indirect ('invenio-search', 'email-validator')
invenio-access==1.0.2 # Indirect (needed by invenio-admin)
Expand Down
31 changes: 31 additions & 0 deletions tests/submission_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,37 @@ def test_invalid_data_yaml(app, admin_idx):
assert(errors['data1.yaml'][0]['message'].startswith("There was a problem parsing the file"))


def test_submission_too_big(app, mocker):
"""
Test the right thing happens when the submission data is too big
:return:
"""

base_dir = os.path.dirname(os.path.realpath(__file__))

hepsubmission = HEPSubmission(publication_recid=12345,
overall_status='todo',
version=1)
db.session.add(hepsubmission)
db.session.commit()

# Patch the app config to reduce the max upload size
mocker.patch.dict('flask.current_app.config',
{'UPLOAD_MAX_SIZE': 1000})

test_directory = os.path.join(base_dir, 'test_data/test_submission')
errors = process_submission_directory(
test_directory,
os.path.join(test_directory, 'submission.yaml'),
12345
)

assert('Archive' in errors)
assert(len(errors['Archive']) == 1)
assert(errors['Archive'][0]['level'] == 'error')
assert(errors['Archive'][0]['message'].startswith("Archive is too big for conversion to other formats."))


def test_duplicate_table_names(app):
"""
Test that an error is returned for a submission.yaml file with duplicate table names.
Expand Down

0 comments on commit 43b8090

Please sign in to comment.