diff --git a/.circleci/config.yml b/.circleci/config.yml index 944ab45935..9a16be9d56 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -13,7 +13,7 @@ executors: - JAVA_HOME: /usr/lib/jvm/java-11-openjdk-amd64 - ES_JAVA_OPTS: -Xms2g -Xmx2g - ES_BIN: /usr/share/elasticsearch/bin - - ES_MAJOR_VERSION: 5 + - ES_MAJOR_VERSION: 7 - PG_VERSION: 10 - NODE_VERSION: 10 @@ -48,7 +48,7 @@ commands: postgresql-${PG_VERSION} \ ruby2.3 \ ruby2.3-dev - sudo chown -R circleci /etc/elasticsearch + sudo chown -R circleci /var/log/elasticsearch/ /var/lib/elasticsearch /etc/default/elasticsearch /etc/elasticsearch sed -i "1s;^;export PATH=${ES_BIN}:${PG_BIN}:$PATH\n;" $BASH_ENV sudo apt-get install -y python3.7-dev python3-pip sudo update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.7 0 diff --git a/base.ini b/base.ini index aad6e6f68b..6dfc87a2c7 100644 --- a/base.ini +++ b/base.ini @@ -53,14 +53,6 @@ set queue_worker_processes = 16 set queue_worker_chunk_size = 1024 set queue_worker_batch_size = 2000000 -[composite:regionindexer] -use = egg:snovault#indexer -app = app -path = /index_file -timeout = 60 -set embed_cache.capacity = 5000 -set regionindexer = true - [filter:memlimit] use = egg:snovault#memlimit rss_limit = 500MB diff --git a/buildout.cfg b/buildout.cfg index 86254660c2..99306c27d7 100644 --- a/buildout.cfg +++ b/buildout.cfg @@ -44,7 +44,7 @@ pytest-bdd = git https://github.com/lrowe/pytest-bdd.git branch=allow-any-step-o [versions] # Hand set versions -elasticsearch = 5.4.0 +elasticsearch = 7.9.1 [snovault] recipe = zc.recipe.egg diff --git a/development.ini b/development.ini index f30543ec58..5858a2cefd 100644 --- a/development.ini +++ b/development.ini @@ -37,9 +37,6 @@ use = egg:rutter#urlmap [composite:indexer] use = config:base.ini#indexer -[composite:regionindexer] -use = config:base.ini#regionindexer - ### # wsgi server configuration ### diff --git a/production.ini.in b/production.ini.in index 7f43a07ff0..e2e8255c50 100644 --- a/production.ini.in +++ b/production.ini.in @@ -10,9 +10,6 @@ indexer.processes = ${indexer_processes} [composite:indexer] use = config:base.ini#indexer -[composite:regionindexer] -use = config:base.ini#regionindexer - [pipeline:main] pipeline = config:base.ini#memlimit diff --git a/scripts/embeds.py b/scripts/embeds.py index 42e9e74e94..6248a7a82d 100644 --- a/scripts/embeds.py +++ b/scripts/embeds.py @@ -22,7 +22,7 @@ def embeds_uuid(es, uuid, item_type): return { 'uuid': uuid, 'item_type': item_type, - 'embeds': res['hits']['total'], + 'embeds': res['hits']['total']['value'], 'buckets': res['aggregations']['item_type']['buckets'], } diff --git a/setup.py b/setup.py index 03c8aeebf7..edca5dea40 100644 --- a/setup.py +++ b/setup.py @@ -16,8 +16,8 @@ 'botocore', 'jmespath', 'boto3', - 'elasticsearch>=5.2', - 'elasticsearch-dsl==5.4.0', + 'elasticsearch==7.9.1', + 'elasticsearch-dsl==7.2.1', 'lucenequery', 'future', 'humanfriendly', diff --git a/src/snovault/elasticsearch/create_mapping.py b/src/snovault/elasticsearch/create_mapping.py index 8500f21ca6..9c5ea6f762 100644 --- a/src/snovault/elasticsearch/create_mapping.py +++ b/src/snovault/elasticsearch/create_mapping.py @@ -31,10 +31,6 @@ # An index to store non-content metadata META_MAPPING = { - '_all': { - 'enabled': False, - 'analyzer': 'snovault_search_analyzer' - }, 'dynamic_templates': [ { 'store_generic': { @@ -52,9 +48,6 @@ PATH_FIELDS = ['submitted_file_name'] NON_SUBSTRING_FIELDS = ['uuid', '@id', 'submitted_by', 'md5sum', 'references', 'submitted_file_name'] -KEYWORD_FIELDS = ['schema_version', 'uuid', 'accession', 'alternate_accessions', - 'aliases', 'status', 'date_created', 'submitted_by', - 'internal_status', 'target', 'biosample_type'] TEXT_FIELDS = ['pipeline_error_detail', 'description', 'notes'] @@ -66,7 +59,7 @@ def sorted_dict(d): return json.loads(json.dumps(d), object_pairs_hook=sorted_pairs_hook) -def schema_mapping(name, schema): +def schema_mapping(name, schema, depth=0, parent=None): # If a mapping is explicitly defined, use it if 'mapping' in schema: return schema['mapping'] @@ -80,19 +73,27 @@ def schema_mapping(name, schema): # Elasticsearch handles multiple values for a field if type_ == 'array': - return schema_mapping(name, schema['items']) + return schema_mapping(name, + schema['items'], + depth=depth+1, + parent=type_) if type_ == 'object': properties = {} for k, v in schema.get('properties', {}).items(): - mapping = schema_mapping(k, v) + mapping = schema_mapping(k, v, depth=depth+1) if mapping is not None: properties[k] = mapping - return { - 'type': 'object', - 'include_in_all': False, - 'properties': properties, - } + if properties: + return { + 'type': 'object', + 'properties': properties, + } + else: + return { + 'type': 'object', + 'enabled': False + } if type_ == ["number", "string"]: return { @@ -122,9 +123,7 @@ def schema_mapping(name, schema): if type_ == 'string': - if name in KEYWORD_FIELDS: - field_type = 'keyword' - elif name in TEXT_FIELDS: + if name in TEXT_FIELDS: field_type = 'text' else: field_type = 'keyword' @@ -133,10 +132,12 @@ def schema_mapping(name, schema): 'type': field_type } - # these fields are unintentially partially matching some small search - # keywords because fields are analyzed by nGram analyzer - if name in NON_SUBSTRING_FIELDS: - sub_mapping['include_in_all'] = False + if name not in NON_SUBSTRING_FIELDS: + if depth == 1 or (depth == 2 and parent == 'array'): + sub_mapping.update({ + 'copy_to': '_all' + }) + return sub_mapping if type_ == 'number': @@ -167,12 +168,13 @@ def index_settings(): 'settings': { 'index.max_result_window': 99999, 'index.mapping.total_fields.limit': 5000, - 'index.number_of_shards': 5, - 'index.number_of_replicas': 2, + 'index.number_of_shards': 1, + 'index.number_of_replicas': 0, + 'index.max_ngram_diff': 32, 'analysis': { 'filter': { 'substring': { - 'type': 'nGram', + 'type': 'ngram', 'min_gram': 1, 'max_gram': 33 }, @@ -273,10 +275,6 @@ def audit_mapping(): def es_mapping(mapping): return { - '_all': { - 'enabled': True, - 'analyzer': 'snovault_search_analyzer' - }, 'dynamic_templates': [ { 'template_principals_allowed': { @@ -325,16 +323,19 @@ def es_mapping(mapping): } }, }, - } + }, ], 'properties': { + '_all': { + 'type': 'text', + 'store': False, + 'analyzer': 'snovault_search_analyzer' + }, 'uuid': { 'type': 'keyword', - 'include_in_all': False, }, 'tid': { 'type': 'keyword', - 'include_in_all': False, }, 'item_type': { 'type': 'keyword', @@ -343,33 +344,26 @@ def es_mapping(mapping): 'object': { 'type': 'object', 'enabled': False, - 'include_in_all': False, }, 'properties': { 'type': 'object', 'enabled': False, - 'include_in_all': False, }, 'propsheets': { 'type': 'object', 'enabled': False, - 'include_in_all': False, }, 'embedded_uuids': { 'type': 'keyword', - 'include_in_all': False, }, 'linked_uuids': { 'type': 'keyword', - 'include_in_all': False, }, 'paths': { 'type': 'keyword', - 'include_in_all': False, }, 'audit': { 'type': 'object', - 'include_in_all': False, 'properties': { 'ERROR': { 'type': 'object', @@ -447,7 +441,7 @@ def type_mapping(types, item_type, embed=True): # Check if mapping for property is already an object # multiple subobjects may be embedded, so be carful here if m['properties'][p]['type'] in ['keyword', 'text']: - m['properties'][p] = schema_mapping(p, s) + m['properties'][p] = schema_mapping(p, s, depth=1) m = m['properties'][p] @@ -465,10 +459,7 @@ def type_mapping(types, item_type, embed=True): for prop in props: new_mapping = new_mapping[prop]['properties'] new_mapping[last]['boost'] = boost - if last in NON_SUBSTRING_FIELDS: - new_mapping[last]['include_in_all'] = False - else: - new_mapping[last]['include_in_all'] = True + new_mapping[last]['copy_to'] = '_all' return mapping @@ -476,8 +467,8 @@ def create_elasticsearch_index(es, index, body): es.indices.create(index=index, body=body, wait_for_active_shards=1, ignore=[400, 404], master_timeout='5m', request_timeout=300) -def set_index_mapping(es, index, doc_type, mapping): - es.indices.put_mapping(index=index, doc_type=doc_type, body=mapping, ignore=[400], request_timeout=300) +def set_index_mapping(es, index, mapping): + es.indices.put_mapping(index=index, body=mapping, ignore=[400], request_timeout=300) def create_snovault_index_alias(es, indices): @@ -497,20 +488,19 @@ def run(app, collections=None, dry_run=False): indices = [] for collection_name in collections: if collection_name == 'meta': - doc_type = 'meta' mapping = META_MAPPING else: - index = doc_type = collection_name + index = collection_name collection = registry[COLLECTIONS].by_item_type[collection_name] mapping = es_mapping(type_mapping(registry[TYPES], collection.type_info.item_type)) if mapping is None: continue # Testing collections if dry_run: - print(json.dumps(sorted_dict({index: {doc_type: mapping}}), indent=4)) + print(json.dumps(sorted_dict({index: {collection_name: mapping}}), indent=4)) continue create_elasticsearch_index(es, index, index_settings()) - set_index_mapping(es, index, doc_type, {doc_type: mapping}) + set_index_mapping(es, index, mapping) if collection_name != 'meta': indices.append(index) diff --git a/src/snovault/elasticsearch/esstorage.py b/src/snovault/elasticsearch/esstorage.py index d3dfd0b730..9227bf56d5 100644 --- a/src/snovault/elasticsearch/esstorage.py +++ b/src/snovault/elasticsearch/esstorage.py @@ -151,7 +151,7 @@ def get_by_uuid(self, uuid): 'version': True } result = self.es.search(index=self.index, body=query, _source=True, size=1) - if result['hits']['total'] == 0: + if result['hits']['total']['value'] == 0: return None hit = result['hits']['hits'][0] return CachedModel(hit) diff --git a/src/snovault/elasticsearch/indexer.py b/src/snovault/elasticsearch/indexer.py index 2fb9aab14c..c966e5a676 100644 --- a/src/snovault/elasticsearch/indexer.py +++ b/src/snovault/elasticsearch/indexer.py @@ -135,13 +135,11 @@ def get_related_uuids(request, es, updated, renamed): { 'terms': { 'embedded_uuids': updated, - '_cache': False, }, }, { 'terms': { 'linked_uuids': renamed, - '_cache': False, }, }, ], @@ -151,7 +149,7 @@ def get_related_uuids(request, es, updated, renamed): } res = es.search(index=RESOURCES_INDEX, size=SEARCH_MAX, request_timeout=60, body=query) - if res['hits']['total'] > SEARCH_MAX: + if res['hits']['total']['value'] > SEARCH_MAX: return (list(all_uuids(request.registry)), True) # guaranteed unique related_set = {hit['_id'] for hit in res['hits']['hits']} @@ -387,7 +385,6 @@ def _load_indexing(request, session, connection, indexer_state): else: status = request.registry[ELASTIC_SEARCH].get( index=request.registry.settings['snovault.elasticsearch.index'], - doc_type='meta', id='indexing', ignore=[400, 404] ) @@ -512,7 +509,6 @@ def _run_indexing( try: request.registry[ELASTIC_SEARCH].index( index=request.registry.settings['snovault.elasticsearch.index'], - doc_type='meta', body=result, id='indexing' ) @@ -521,7 +517,6 @@ def _run_indexing( del result['errors'] request.registry[ELASTIC_SEARCH].index( index=request.registry.settings['snovault.elasticsearch.index'], - doc_type='meta', body=result, id='indexing' ) @@ -535,7 +530,7 @@ def _run_indexing( request.registry[ELASTIC_SEARCH].indices.refresh(RESOURCES_INDEX) if flush: try: - request.registry[ELASTIC_SEARCH].indices.flush_synced(index=RESOURCES_INDEX) # Faster recovery on ES restart + request.registry[ELASTIC_SEARCH].indices.flush(index=RESOURCES_INDEX) # Faster recovery on ES restart except ConflictError: pass return result, indexing_update_infos @@ -983,9 +978,9 @@ def update_object(encoded_es, request, uuid, xmin, restart=False): } try: encoded_es.index( - index=doc['item_type'], doc_type=doc['item_type'], body=doc, - id=str(uuid), version=xmin, version_type='external_gte', - request_timeout=30, + index=doc['item_type'], body=doc, + id=str(uuid), version=xmin, + version_type='external_gte', request_timeout=30 ) except StatementError: # Can't reconnect until invalid transaction is rolled back diff --git a/src/snovault/elasticsearch/indexer_state.py b/src/snovault/elasticsearch/indexer_state.py index 09925bd4e7..e4157eb2b4 100644 --- a/src/snovault/elasticsearch/indexer_state.py +++ b/src/snovault/elasticsearch/indexer_state.py @@ -209,22 +209,22 @@ def log_reindex_init_state(self): log.info('%s is initially indexing', self.title) # Private-ish primitives... - def get_obj(self, id, doc_type='meta'): + def get_obj(self, id): try: - return self.es.get(index=self.index, doc_type=doc_type, id=id).get('_source',{}) # TODO: snovault/meta + return self.es.get(index=self.index, id=id).get('_source',{}) # TODO: snovault/meta except: return {} - def put_obj(self, id, obj, doc_type='meta'): + def put_obj(self, id, obj): try: - self.es.index(index=self.index, doc_type=doc_type, id=id, body=obj) + self.es.index(index=self.index, id=id, body=obj) except: log.warn("Failed to save to es: " + id, exc_info=True) - def delete_objs(self, ids, doc_type='meta'): + def delete_objs(self, ids): for id in ids: try: - self.es.delete(index=self.index, doc_type=doc_type, id=id) + self.es.delete(index=self.index, id=id) except: pass @@ -512,7 +512,7 @@ def set_notices(self, from_host, who=None, bot_token=None, which=None): else: return "ERROR: unknown indexer to monitor: %s" % (which) - notify = self.get_obj('notify', 'default') + notify = self.get_obj('notify') if bot_token is not None: notify['bot_token'] = bot_token @@ -539,7 +539,7 @@ def set_notices(self, from_host, who=None, bot_token=None, which=None): notify[which] = indexer_notices # either self.state_id: {who: [...]} or 'all_indexers': {'indexers': [...], 'who': [...]} - self.put_obj('notify', notify, 'default') + self.put_obj('notify', notify) if user_warns != '': user_warns = 'Unknown users: ' + user_warns[2:] if 'bot_token' not in notify: @@ -551,7 +551,7 @@ def set_notices(self, from_host, who=None, bot_token=None, which=None): def get_notices(self, full=False): '''Get the notifications''' - notify = self.get_obj('notify','default') + notify = self.get_obj('notify') if full: return notify notify.pop('bot_token', None) @@ -577,7 +577,7 @@ def get_notices(self, full=False): def send_notices(self): '''Sends notifications when indexer is done.''' # https://slack.com/api/chat.postMessage?token=xoxb-1974789...&channel=U1KPQK1HN&text=Yay! - notify = self.get_obj('notify','default') + notify = self.get_obj('notify') if not notify: return if 'bot_token' not in notify or 'from' not in notify: @@ -636,7 +636,7 @@ def send_notices(self): log.warn("Failed to notify via slack: [%s]" % (msg)) if changed: # alter notify even if error, so the same error doesn't flood log. - self.put_obj('notify', notify, 'default') + self.put_obj('notify', notify) def display(self, uuids=None): display = {} diff --git a/src/snovault/elasticsearch/searches/fields.py b/src/snovault/elasticsearch/searches/fields.py index 955a5526c6..6f2e463463 100644 --- a/src/snovault/elasticsearch/searches/fields.py +++ b/src/snovault/elasticsearch/searches/fields.py @@ -141,7 +141,7 @@ def _format_results(self): { GRAPH: self.results.to_graph(), FACETS: self.results.to_facets(), - TOTAL: self.results.results.hits.total + TOTAL: self.results.results.hits.total.value } ) @@ -239,7 +239,7 @@ def _format_results(self): { FACETS: self.results.to_facets(), MATRIX: self.results.to_matrix(), - TOTAL: self.results.results.hits.total + TOTAL: self.results.results.hits.total.value } ) diff --git a/src/snovault/elasticsearch/searches/queries.py b/src/snovault/elasticsearch/searches/queries.py index 4c93fc436e..c38ad84ee3 100644 --- a/src/snovault/elasticsearch/searches/queries.py +++ b/src/snovault/elasticsearch/searches/queries.py @@ -850,6 +850,11 @@ def add_sort(self): *sort_by ) + def add_exact_counting(self): + self.search = self._get_or_create_search().extra( + track_total_hits=True + ) + def build_query(self): ''' Public method to be implemented by children. @@ -869,6 +874,7 @@ def build_query(self): self.add_filters() self.add_post_filters() self.add_source() + self.add_exact_counting() self.add_slice() return self.search @@ -1051,6 +1057,7 @@ def build_query(self): self.add_query_string_query() self.add_filters() self.add_post_filters() + self.add_exact_counting() self.add_slice() self.add_aggregations_and_aggregation_filters() self.add_matrix_aggregations() diff --git a/src/snovault/elasticsearch/tests/test_indexer_simple.py b/src/snovault/elasticsearch/tests/test_indexer_simple.py index ab7103e2c4..fd7e2a3a40 100644 --- a/src/snovault/elasticsearch/tests/test_indexer_simple.py +++ b/src/snovault/elasticsearch/tests/test_indexer_simple.py @@ -48,7 +48,6 @@ class MockES(object): # pylint: disable=too-few-public-methods @staticmethod def index( index=None, - doc_type=None, body=None, id=None, version=None, diff --git a/src/snovault/tests/elasticsearch_fixture.py b/src/snovault/tests/elasticsearch_fixture.py index 3994e099e6..52692ff082 100644 --- a/src/snovault/tests/elasticsearch_fixture.py +++ b/src/snovault/tests/elasticsearch_fixture.py @@ -15,13 +15,6 @@ def server_process(datadir, host='127.0.0.1', port=9201, prefix='', echo=False): '-Epath.data=%s' % os.path.join(datadir, 'data'), '-Epath.logs=%s' % os.path.join(datadir, 'logs'), ] - if os.environ.get('TRAVIS'): - print('IN TRAVIS') - echo = True - args.append('-Epath.conf=%s/conf' % os.environ['TRAVIS_BUILD_DIR']) - elif os.path.exists('/etc/elasticsearch'): - print('NOT IN TRAVIS') - args.append('-Epath.conf=./conf') print(args) process = subprocess.Popen( args, diff --git a/src/snovault/tests/serverfixtures.py b/src/snovault/tests/serverfixtures.py index 05f064d8fc..c1f0235991 100644 --- a/src/snovault/tests/serverfixtures.py +++ b/src/snovault/tests/serverfixtures.py @@ -70,8 +70,7 @@ def elasticsearch_server(request, elasticsearch_host_port): host, port = elasticsearch_host_port tmpdir = request.config._tmpdirhandler.mktemp('elasticsearch', numbered=True) tmpdir = str(tmpdir) - process = server_process(str(tmpdir), host=host, port=9201, echo=True) - print('PORT CHANGED') + process = server_process(str(tmpdir), host=host, port=9201, echo=False) yield 'http://%s:%d' % (host, 9201) if 'process' in locals() and process.poll() is None: diff --git a/src/snovault/tests/test_searches_fields.py b/src/snovault/tests/test_searches_fields.py index 511ca9fa92..075ff6bfe4 100644 --- a/src/snovault/tests/test_searches_fields.py +++ b/src/snovault/tests/test_searches_fields.py @@ -559,7 +559,6 @@ def test_searches_fields_debug_query_response_field(dummy_parent, mocker): from snovault.elasticsearch.searches.fields import DebugQueryResponseField dbr = DebugQueryResponseField() r = dbr.render(parent=dummy_parent) - assert 'query' in r['debug']['raw_query'] assert 'post_filter' in r['debug']['raw_query'] diff --git a/src/snovault/tests/test_searches_queries.py b/src/snovault/tests/test_searches_queries.py index c07b4f626c..747709af44 100644 --- a/src/snovault/tests/test_searches_queries.py +++ b/src/snovault/tests/test_searches_queries.py @@ -1474,7 +1474,7 @@ def test_searches_queries_abstract_query_factory_make_bool_filter_and_query_cont } ] } - } + }, } fa = aq._make_filter_aggregation( filter_context=aq._make_must_equal_terms_query( @@ -2240,7 +2240,7 @@ def test_searches_queries_abstract_query_factory_add_must_equal_terms_filter(par } ] } - } + }, } @@ -2252,9 +2252,6 @@ def test_searches_queries_abstract_query_factory_add_must_equal_terms_post_filte terms=['released', 'archived'] ) assert aq.search.to_dict() == { - 'query': { - 'match_all': {} - }, 'post_filter': { 'terms': {'status': ['released', 'archived']}} } @@ -2268,9 +2265,6 @@ def test_searches_queries_abstract_query_factory_add_must_not_equal_terms_post_f terms=['released', 'archived'] ) assert aq.search.to_dict() == { - 'query': { - 'match_all': {} - }, 'post_filter': { 'bool': { 'filter': [ @@ -2307,7 +2301,7 @@ def test_searches_queries_abstract_query_factory_add_must_not_equal_terms_filter } ] } - } + }, } @@ -2322,7 +2316,7 @@ def test_searches_queries_abstract_query_factory_add_field_must_exist_filter(par 'bool': { 'filter': [{'exists': {'field': 'embedded.status'}}] } - } + }, } @@ -2339,9 +2333,6 @@ def test_searches_queries_abstract_query_factory_add_field_must_exist_post_filte {'exists': {'field': 'embedded.status'}}] } }, - 'query': { - 'match_all': {} - } } @@ -2362,7 +2353,7 @@ def test_searches_queries_abstract_query_factory_add_field_must_exist_filter_mul {'exists': {'field': 'embedded.lab'}} ] } - } + }, } @@ -2377,7 +2368,7 @@ def test_searches_queries_abstract_query_factory_add_field_must_not_exist_filter 'bool': { 'filter': [{'bool': {'must_not': [{'exists': {'field': 'embedded.file_size'}}]}}] } - } + }, } @@ -2388,9 +2379,6 @@ def test_searches_queries_abstract_query_factory_add_field_must_not_exist_post_f 'embedded.file_size' ) assert aq.search.to_dict() == { - 'query': { - 'match_all': {} - }, 'post_filter': { 'bool': { 'filter': [ @@ -2417,7 +2405,7 @@ def test_searches_queries_abstract_query_factory_add_field_must_and_must_not_exi {'bool': {'must_not': [{'exists': {'field': 'embedded.file_size'}}]}} ] } - } + }, } @@ -2434,7 +2422,6 @@ def test_searches_queries_abstract_query_factory_add_terms_aggregation(params_pa } } }, - 'query': {'match_all': {}} } @@ -2524,9 +2511,6 @@ def test_searches_queries_abstract_query_factory_add_must_equal_terms_post_filte terms=['released', 'archived'] ) assert aq.search.to_dict() == { - 'query': { - 'match_all': {} - }, 'post_filter': { 'terms': {'status': ['released', 'archived']}} } @@ -2542,9 +2526,6 @@ def test_searches_queries_abstract_query_factory_add_must_not_equal_terms_post_f terms=['released', 'archived'] ) assert aq.search.to_dict() == { - 'query': { - 'match_all': {} - }, 'post_filter': { 'bool': { 'filter': [ @@ -2583,7 +2564,7 @@ def test_searches_queries_abstract_query_factory_add_must_not_equal_terms_filter } ] } - } + }, } @@ -2600,7 +2581,7 @@ def test_searches_queries_abstract_query_factory_add_field_must_exist_filter(par 'bool': { 'filter': [{'exists': {'field': 'embedded.status'}}] } - } + }, } @@ -2619,9 +2600,6 @@ def test_searches_queries_abstract_query_factory_add_field_must_exist_post_filte {'exists': {'field': 'embedded.status'}}] } }, - 'query': { - 'match_all': {} - } } @@ -2644,7 +2622,7 @@ def test_searches_queries_abstract_query_factory_add_field_must_exist_filter_mul {'exists': {'field': 'embedded.lab'}} ] } - } + }, } @@ -2661,7 +2639,7 @@ def test_searches_queries_abstract_query_factory_add_field_must_not_exist_filter 'bool': { 'filter': [{'bool': {'must_not': [{'exists': {'field': 'embedded.file_size'}}]}}] } - } + }, } @@ -2674,9 +2652,6 @@ def test_searches_queries_abstract_query_factory_add_field_must_not_exist_post_f 'embedded.file_size' ) assert aq.search.to_dict() == { - 'query': { - 'match_all': {} - }, 'post_filter': { 'bool': { 'filter': [ @@ -2705,7 +2680,7 @@ def test_searches_queries_abstract_query_factory_add_field_must_and_must_not_exi {'bool': {'must_not': [{'exists': {'field': 'embedded.file_size'}}]}} ] } - } + }, } @@ -2724,7 +2699,6 @@ def test_searches_queries_abstract_query_factory_add_terms_aggregation(params_pa } } }, - 'query': {'match_all': {}} } @@ -2744,7 +2718,6 @@ def test_searches_queries_abstract_query_factory_add_terms_aggregation_with_excl } } }, - 'query': {'match_all': {}} } @@ -2779,7 +2752,6 @@ def test_searches_queries_abstract_query_factory_add_exists_aggregation(params_p } } }, - 'query': {'match_all': {}} } @@ -2803,7 +2775,7 @@ def test_searches_queries_abstract_query_factory_add_filters(params_parser, mock } ] } - } + }, } @@ -2928,6 +2900,12 @@ def test_searches_queries_abstract_query_factory_add_source_object(dummy_request assert all([e in actual for e in expected]) assert len(expected) == len(actual) +def test_searches_queries_abstract_query_factory_add_exact_counting(params_parser, mocker): + from snovault.elasticsearch.searches.queries import AbstractQueryFactory + aq = AbstractQueryFactory(params_parser) + mocker.patch.object(AbstractQueryFactory, '_get_index') + aq.add_exact_counting() + assert aq.search.to_dict().get('track_total_hits', False) def test_searches_queries_abstract_query_factory_add_slice(params_parser, dummy_request, mocker): from snovault.elasticsearch.searches.queries import AbstractQueryFactory @@ -2936,35 +2914,35 @@ def test_searches_queries_abstract_query_factory_add_slice(params_parser, dummy_ AbstractQueryFactory._get_index.return_value = 'snovault-resources' aq = AbstractQueryFactory(params_parser) aq.add_slice() - assert aq.search.to_dict() == {'from': 0, 'size': 10, 'query': {'match_all': {}}} + assert aq.search.to_dict() == {'from': 0, 'size': 10} dummy_request.environ['QUERY_STRING'] = ( 'searchTerm=chip-seq&type=TestingSearchSchema&frame=object&limit=all' ) params_parser = ParamsParser(dummy_request) aq = AbstractQueryFactory(params_parser) aq.add_slice() - assert aq.search.to_dict() == {'from': 0, 'size': 25, 'query': {'match_all': {}}} + assert aq.search.to_dict() == {'from': 0, 'size': 25} dummy_request.environ['QUERY_STRING'] = ( 'searchTerm=chip-seq&type=TestingSearchSchema&frame=object&limit=3000' ) params_parser = ParamsParser(dummy_request) aq = AbstractQueryFactory(params_parser) aq.add_slice() - assert aq.search.to_dict() == {'from': 0, 'size': 3000, 'query': {'match_all': {}}} + assert aq.search.to_dict() == {'from': 0, 'size': 3000} dummy_request.environ['QUERY_STRING'] = ( 'searchTerm=chip-seq&type=TestingSearchSchema&frame=object&limit=blah' ) params_parser = ParamsParser(dummy_request) aq = AbstractQueryFactory(params_parser) aq.add_slice() - assert aq.search.to_dict() == {'from': 0, 'size': 25, 'query': {'match_all': {}}} + assert aq.search.to_dict() == {'from': 0, 'size': 25} dummy_request.environ['QUERY_STRING'] = ( 'searchTerm=chip-seq&type=TestingSearchSchema&frame=object&limit=10000' ) params_parser = ParamsParser(dummy_request) aq = AbstractQueryFactory(params_parser) aq.add_slice() - assert aq.search.to_dict() == {'from': 0, 'size': 25, 'query': {'match_all': {}}} + assert aq.search.to_dict() == {'from': 0, 'size': 25} def test_searches_queries_abstract_query_factory_subaggregation_factory(params_parser_snovault_types): @@ -2986,9 +2964,6 @@ def test_searches_queries_abstract_query_factory_add_aggregations_and_aggregatio aq = AbstractQueryFactory(params_parser_snovault_types) aq.add_aggregations_and_aggregation_filters() expected = { - 'query': { - 'match_all': {} - }, 'aggs': { 'Audit category: WARNING': { 'aggs': { @@ -3790,9 +3765,6 @@ def test_searches_queries_basic_matrix_query_factory_with_facets_add_matrix_aggr } } }, - 'query': { - 'match_all': {} - } } actual = bmqf.search.to_dict() assert all([e in actual for e in expected]) @@ -4008,6 +3980,7 @@ def test_searches_queries_basic_matrix_query_factory_with_facets_build_query(par ] } }, + 'track_total_hits': True, 'size': 0 } actual = bmqf.search.to_dict() @@ -4187,9 +4160,6 @@ def test_searches_queries_missing_matrix_query_factory_with_facets_add_matrix_ag } } }, - 'query': { - 'match_all': {} - } } actual = mmqf.search.to_dict() assert all([e in actual for e in expected]) @@ -4298,9 +4268,6 @@ def test_searches_queries_missing_matrix_query_factory_with_facets_add_matrix_ag } } }, - 'query': { - 'match_all': {} - } } actual = mmqf.search.to_dict() assert all([e in actual for e in expected]) diff --git a/src/snowflakes/tests/test_searchv2.py b/src/snowflakes/tests/test_searchv2.py index eabe58ec4a..84ee94e572 100644 --- a/src/snowflakes/tests/test_searchv2.py +++ b/src/snowflakes/tests/test_searchv2.py @@ -413,7 +413,7 @@ def test_reportv2_view_values_no_type(workbook, testapp): def test_matrixv2_raw_view_raw_response(workbook, testapp): r = testapp.get('/matrixv2_raw/?type=Snowball') assert 'hits' in r.json - assert r.json['hits']['total'] >= 22 + assert r.json['hits']['total']['value'] >= 22 assert len(r.json['hits']['hits']) == 0 assert 'aggregations' in r.json assert 'x' in r.json['aggregations']