Skip to content

Commit

Permalink
Merge e6e771b into 1b71f57
Browse files Browse the repository at this point in the history
  • Loading branch information
alisonrclarke committed May 10, 2021
2 parents 1b71f57 + e6e771b commit b9ba176
Show file tree
Hide file tree
Showing 5 changed files with 104 additions and 44 deletions.
49 changes: 31 additions & 18 deletions hepdata/ext/elasticsearch/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,27 +128,40 @@ def search(query,

search = search.source(includes=include, excludes=exclude)
search = search[offset:offset+size]
pub_result = search.execute().to_dict()

parent_filter = {
"terms": {
"_id": [hit["_id"] for hit in pub_result['hits']['hits']]
}
}

data_search = RecordsSearch(using=es, index=index)
data_search = data_search.query('has_parent',
parent_type="parent_publication",
query=parent_filter)
if query:
data_search = data_search.query(QueryString(query=query))
try:
pub_result = search.execute().to_dict()

data_search_size = size * ELASTICSEARCH_MAX_RESULT_WINDOW // LIMIT_MAX_RESULTS_PER_PAGE
data_search = data_search[0:data_search_size]
data_result = data_search.execute().to_dict()
parent_filter = {
"terms": {
"_id": [hit["_id"] for hit in pub_result['hits']['hits']]
}
}

merged_results = merge_results(pub_result, data_result)
return map_result(merged_results, filters)
data_search = RecordsSearch(using=es, index=index)
data_search = data_search.query('has_parent',
parent_type="parent_publication",
query=parent_filter)
if query:
data_search = data_search.query(QueryString(query=query))

data_search_size = size * ELASTICSEARCH_MAX_RESULT_WINDOW // LIMIT_MAX_RESULTS_PER_PAGE
data_search = data_search[0:data_search_size]
data_result = data_search.execute().to_dict()

merged_results = merge_results(pub_result, data_result)
return map_result(merged_results, filters)
except TransportError as e:
# For search phase execution exceptions we pass the reason as it's
# likely to be user error (e.g. invalid search query)
if e.error == 'search_phase_execution_exception' and e.info \
and "error" in e.info and isinstance(e.info['error'], dict):
reason = e.info['error']['root_cause'][0]['reason']
# Otherwise we hide the details from the user
else:
log.error(f'An unexpected error occurred when searching: {e}')
reason = f'An unexpected error occurred: {e.error}'
return { 'error': reason }


@author_index
Expand Down
32 changes: 22 additions & 10 deletions hepdata/ext/elasticsearch/query_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,10 @@ def parse_query(query_string):
# would translate to data_keywords.observables:ASYM
mapping = {
"keys": {
"observables": "data_keywords.observables:{0}",
"cmenergies": "data_keywords.cmenergies:{0}",
"phrases": "data_keywords.phrases:{0}",
"reactions": "data_keywords.reactions:{0}"
"observables": "data_keywords.observables",
"cmenergies": "data_keywords.cmenergies",
"phrases": "data_keywords.phrases",
"reactions": "data_keywords.reactions"
}
}

Expand All @@ -59,11 +59,23 @@ def parse_query(query_string):
for query_part in re.split("AND|OR", query_string):
query_part = query_part.strip()
if ':' in query_part:
try:
_key_value = query_part.split(':')
_key = mapping['keys'][_key_value[0]].format(_key_value[1])
new_query_string = new_query_string.replace(query_part, "{0}".format(_key))
except KeyError:
continue
_key, _value = query_part.split(':')
_key = mapping['keys'].get(_key, _key)
_value = HEPDataQueryParser._quote_phrase(_value)
new_query_string = new_query_string.replace(query_part, f"{_key}:{_value}")
else:
new_query_string = new_query_string.replace(
query_part, HEPDataQueryParser()._quote_phrase(query_part)
)

return new_query_string

@staticmethod
def _quote_phrase(phrase):
# Match phrases containing a reaction (including "-->") or a doi (word
# chars with / in the middle) and quote them
pattern = re.compile("(.*-->.*|[\w\.]+\/[\w\.]+)")

if '"' not in phrase and pattern.fullmatch(phrase):
return f'"{phrase}"'
return phrase
Original file line number Diff line number Diff line change
Expand Up @@ -52,13 +52,19 @@
</div>

<div class="search-results container-fluid">
{% include "hepdata_search/display_results_options.html" %}
{% if not no_results %}
{% include "hepdata_search/display_results_options.html" %}
{% endif %}
<div class="row-fluid">
{% if not no_results %}
{% include "hepdata_search/facet_column.html" %}
{% endif %}
<div class="col-md-10" style="padding-bottom: 3em;">
{% if no_results %}
{% if ctx.error %}
<p>Unable to search for <b>{{ctx.q}}</b>: {{ctx.error}}</p>
<p>Please see <a data-toggle="modal"
data-target="#searchHelpWidget">Advanced Search</a> for details of correct search syntax.<p>
{% elif no_results %}
<p>No results found. Please edit your search and try again.</p>
{% endif %}
{% if ctx.pages and not no_results %}
Expand Down
33 changes: 21 additions & 12 deletions hepdata/modules/search/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,21 +244,29 @@ def search():
sort_order=query_params['sorting_order'],
offset=query_params['offset'])

total_pages = calculate_total_pages(query_result, query_params['size'])

if query_params['current_page'] > total_pages:
query_params['current_page'] = total_pages

facets = filter_facets(query_result['facets'], query_result['total'])
facets = sort_facets(facets)

year_facet = process_year_facet(request, facets)

if ('format' in request.args and request.args['format'] == 'json') \
or 'json' in request.headers.get('accept', ''):
or 'json' in request.headers.get('accept', ''):
query_result['hits'] = {'total': query_result['total']}
return jsonify(query_result)

if 'error' in query_result:
ctx = {
'q': query_params['q'],
'error': query_result['error'],
'results': [],
'filters': {}
}
else:
total_pages = calculate_total_pages(query_result, query_params['size'])

if query_params['current_page'] > total_pages:
query_params['current_page'] = total_pages

facets = filter_facets(query_result['facets'], query_result['total'])
facets = sort_facets(facets)

year_facet = process_year_facet(request, facets)

ctx = {
'results': query_result['results'],
'total_hits': query_result['total'],
Expand All @@ -270,6 +278,7 @@ def search():
'total': total_pages,
'endpoint': '.search'},
'filters': dict(query_params['filters']),
'error': None
}

if query_params['min_date'] is not sys.maxsize:
Expand All @@ -278,7 +287,7 @@ def search():

ctx['modify_query'] = modify_query

return render_template('hepdata_search/search_results.html', ctx=ctx)
return render_template('hepdata_search/search_results.html', ctx=ctx)


@blueprint.route('/ids', methods=['GET'])
Expand Down
24 changes: 22 additions & 2 deletions tests/search_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,18 @@ def test_query_parser():
assert (parsed_query_string3 == "data_keywords.observables:ASYM "
"AND unknown_field:hello")

_test_query4 = 'reactions:P P --> LQ LQ X AND doi:10.1007/s100520000432'
parsed_query_string4 = HEPDataQueryParser.parse_query(_test_query4)

assert (parsed_query_string4 == 'data_keywords.reactions:"P P --> LQ LQ X"'
' AND doi:"10.1007/s100520000432"')

_test_query5 = 'P P --> LQ LQ X'
parsed_query_string5 = HEPDataQueryParser.parse_query(_test_query5)

assert (parsed_query_string5 == '"P P --> LQ LQ X"')



def test_search(app, load_default_data, identifiers):
"""
Expand Down Expand Up @@ -184,6 +196,14 @@ def test_search(app, load_default_data, identifiers):
for author in expected:
assert(author in results)

# Test a search query that ES can't parse
results = es_api.search('/', index=index)
assert results == {'error': 'Failed to parse query [/]'}

# Test a search query to an invalid index
results = es_api.search('hello', index='thisisnotanindex')
assert results == {'error': 'An unexpected error occurred: index_not_found_exception'}


def test_merge_results():
pub_result = {
Expand Down Expand Up @@ -376,8 +396,8 @@ def test_reindex_all(app, load_default_data, identifiers):
es.indices.delete(index=index)

# Check we can't search
with pytest.raises(NotFoundError, match=r"no such index "):
es_api.search('', index=index)
results = es_api.search('', index=index)
assert results == {'error': 'An unexpected error occurred: index_not_found_exception'}

# Reindex, recreating the index
es_api.reindex_all(index=index, recreate=True, synchronous=True)
Expand Down

0 comments on commit b9ba176

Please sign in to comment.