diff --git a/share/apps.py b/share/apps.py index 98e98fc3f..dbf4e2f76 100644 --- a/share/apps.py +++ b/share/apps.py @@ -1,6 +1,6 @@ from django.apps import AppConfig from django.db.models.signals import post_migrate -from share.signals import post_migrate_load_sources +from share.signals import ensure_latest_elastic_mappings, post_migrate_load_sources class ShareConfig(AppConfig): @@ -8,3 +8,4 @@ class ShareConfig(AppConfig): def ready(self): post_migrate.connect(post_migrate_load_sources, sender=self) + post_migrate.connect(ensure_latest_elastic_mappings, sender=self) diff --git a/share/bin/search.py b/share/bin/search.py index 754692276..e0f7db2c8 100644 --- a/share/bin/search.py +++ b/share/bin/search.py @@ -61,6 +61,14 @@ def setup(args, argv): elastic_manager.update_primary_alias(primary_index) +@search.subcommand('Update mappings for an existing index') +def update_mappings(args, argv): + """ + Usage: {0} search update_mappings + """ + ElasticManager().update_mappings(args['']) + + @search.subcommand('Set the "primary" index used to serve search results') def set_primary(args, argv): """ diff --git a/share/metadata_formats/sharev2_elastic.py b/share/metadata_formats/sharev2_elastic.py index 030ab611b..6f6902d5c 100644 --- a/share/metadata_formats/sharev2_elastic.py +++ b/share/metadata_formats/sharev2_elastic.py @@ -90,6 +90,7 @@ def format(self, normalized_datum): 'retracted': bool(central_work['withdrawn']), 'title': central_work['title'], 'withdrawn': central_work['withdrawn'], + 'open_practice_badges': central_work['open_practice_badges'], 'date': ( central_work['date_published'] diff --git a/share/schema/schema-spec.yaml b/share/schema/schema-spec.yaml index 04b29380a..7d76f49dc 100644 --- a/share/schema/schema-spec.yaml +++ b/share/schema/schema-spec.yaml @@ -45,6 +45,8 @@ data_type: boolean - name: justification data_type: string + - name: open_practice_badges + data_type: object - name: extra data_type: object relations: diff --git a/share/search/elastic_manager.py b/share/search/elastic_manager.py index fec529c7b..be96df23d 100644 --- a/share/search/elastic_manager.py +++ b/share/search/elastic_manager.py @@ -49,10 +49,18 @@ def create_index(self, index_name): body={'settings': index_setup.index_settings}, ) + logger.info('Putting Elasticsearch mappings') + self.update_mappings(index_name) + + self.es_client.indices.refresh(index_name) + logger.debug('Waiting for yellow status') self.es_client.cluster.health(wait_for_status='yellow') + logger.info('Finished setting up Elasticsearch index %s', index_name) + + def update_mappings(self, index_name): + index_setup = self.get_index_setup(index_name) - logger.info('Putting Elasticsearch mappings') for doc_type, mapping in index_setup.index_mappings.items(): logger.debug('Putting mapping for %s', doc_type) self.es_client.indices.put_mapping( @@ -61,10 +69,6 @@ def create_index(self, index_name): index=index_name, ) - self.es_client.indices.refresh(index_name) - - logger.info('Finished setting up Elasticsearch index %s', index_name) - def stream_actions(self, actions): stream = elastic_helpers.streaming_bulk( self.es_client, diff --git a/share/search/index_setup/postrend_backcompat.py b/share/search/index_setup/postrend_backcompat.py index f31dd9b1c..4c157b11d 100644 --- a/share/search/index_setup/postrend_backcompat.py +++ b/share/search/index_setup/postrend_backcompat.py @@ -102,6 +102,7 @@ def index_mappings(self): 'identifiers': {'type': 'text', 'fields': exact_field}, 'justification': {'type': 'text', 'include_in_all': False}, 'language': {'type': 'keyword', 'include_in_all': False}, + 'open_practice_badges': {'type': 'object', 'dynamic': True, 'include_in_all': False}, 'publishers': {'type': 'text', 'fields': exact_field}, 'registration_type': {'type': 'keyword', 'include_in_all': False}, 'retracted': {'type': 'boolean', 'include_in_all': False}, diff --git a/share/signals.py b/share/signals.py index afaae1e30..6f6eaaaf8 100644 --- a/share/signals.py +++ b/share/signals.py @@ -9,3 +9,11 @@ def post_migrate_load_sources(sender, **kwargs): except ProgrammingError: return management.call_command('loadsources') + + +def ensure_latest_elastic_mappings(sender, **kwargs): + from share.search.elastic_manager import ElasticManager + elastic_manager = ElasticManager() + + for index_name in elastic_manager.get_primary_indexes(): + elastic_manager.update_mappings(index_name) diff --git a/share/util/graph.py b/share/util/graph.py index 21c7094fb..3dc373328 100644 --- a/share/util/graph.py +++ b/share/util/graph.py @@ -81,7 +81,7 @@ def from_jsonld(cls, nodes): node_id = v elif k == '@type': node_type = v - elif isinstance(v, dict) and k != 'extra': + elif isinstance(v, dict) and set(v.keys()) == {'@id', '@type'}: graph.add_node(v['@id'], v['@type']) attrs[k] = v['@id'] elif isinstance(v, list): @@ -459,7 +459,7 @@ def __getitem__(self, key): If key is the name of incoming edges, return a list of MutableNodes those edges come from """ field = resolve_field(self.type, key) - if field and field.is_relation and field.name != 'extra': + if field and field.is_relation: if field.relation_shape == RelationShape.MANY_TO_ONE: return self.graph.resolve_named_out_edge(self.id, field.name) if field.relation_shape == RelationShape.ONE_TO_MANY: diff --git a/tests/share/bin/test_sharectl.py b/tests/share/bin/test_sharectl.py index 634fe70e5..3af98d4e4 100644 --- a/tests/share/bin/test_sharectl.py +++ b/tests/share/bin/test_sharectl.py @@ -66,6 +66,12 @@ def test_set_primary(self): run_sharectl('search', 'set_primary', 'blazblat') assert mock_elastic_manager.update_primary_alias.mock_calls == [mock.call('blazblat')] + def test_update_mappings(self): + mock_elastic_manager = mock.Mock() + with mock.patch('share.bin.search.ElasticManager', return_value=mock_elastic_manager): + run_sharectl('search', 'update_mappings', 'blazblat') + assert mock_elastic_manager.update_mappings.mock_calls == [mock.call('blazblat')] + def test_daemon(self, settings): expected_indexes = ['bliz', 'blaz', 'bluz'] settings.ELASTICSEARCH['ACTIVE_INDEXES'] = expected_indexes diff --git a/tests/share/metadata_formats/base.py b/tests/share/metadata_formats/base.py index 1938b8420..98b7e4385 100644 --- a/tests/share/metadata_formats/base.py +++ b/tests/share/metadata_formats/base.py @@ -323,6 +323,47 @@ }, }, }, + 'with-open-badges': { + 'suid_id': 99, + 'source_name': 'OsfProbably', + 'raw_datum_kwargs': { + 'date_created': dateutil.parser.isoparse('2017-04-07T21:09:05.023090+00:00'), + }, + 'normalized_datum_kwargs': { + 'created_at': dateutil.parser.isoparse('2017-04-07T21:09:05.023090+00:00'), + 'data': { + '@graph': [ + { + '@id': '_:p', + '@type': 'person', + 'name': 'Open McOperton', + }, + { + '@id': '_:c', + '@type': 'creator', + 'agent': {'@id': '_:p', '@type': 'person'}, + 'creative_work': {'@id': '_:w', '@type': 'creativework'}, + 'cited_as': 'Open McOperton', + 'order_cited': 0, + }, + { + '@id': '_:i', + '@type': 'workidentifier', + 'creative_work': {'@id': '_:w', '@type': 'creativework'}, + 'uri': 'https://example.com/open', + }, + { + '@id': '_:w', + '@type': 'creativework', + 'title': 'So open', + 'date_updated': '2017-03-31T05:39:48+00:00', + 'open_practice_badges': {'foo': True, 'bar': False}, + }, + ], + '@context': {} + }, + }, + }, } diff --git a/tests/share/metadata_formats/test_oai_dc_formatter.py b/tests/share/metadata_formats/test_oai_dc_formatter.py index 5f1bc9b5a..d22b51f46 100644 --- a/tests/share/metadata_formats/test_oai_dc_formatter.py +++ b/tests/share/metadata_formats/test_oai_dc_formatter.py @@ -81,4 +81,18 @@ def assert_formatter_outputs_equal(self, actual_output, expected_output): http://staging.osf.io/vroom/ ''', + 'with-open-badges': ''' + + So open + Open McOperton + 2017-03-31T05:39:48Z + creativework + https://example.com/open + + ''', } diff --git a/tests/share/metadata_formats/test_sharev2_elastic_formatter.py b/tests/share/metadata_formats/test_sharev2_elastic_formatter.py index 91ae75722..0cf82e92f 100644 --- a/tests/share/metadata_formats/test_sharev2_elastic_formatter.py +++ b/tests/share/metadata_formats/test_sharev2_elastic_formatter.py @@ -214,4 +214,47 @@ def assert_formatter_outputs_equal(self, actual_output, expected_output): 'publishers': [], }, }, + 'with-open-badges': { + 'affiliations': [], + 'contributors': ['Open McOperton'], + 'date': '2017-03-31T05:39:48+00:00', + 'date_created': '2017-04-07T21:09:05.023090+00:00', + 'date_modified': '2017-04-07T21:09:05.023090+00:00', + 'date_updated': '2017-03-31T05:39:48+00:00', + 'id': 'encoded-99', + 'identifiers': ['https://example.com/open'], + 'sources': ['OsfProbably'], + 'subject_synonyms': [], + 'subjects': [], + 'title': 'So open', + 'type': 'creative work', + 'types': ['creative work'], + 'retracted': False, + 'funders': [], + 'hosts': [], + 'publishers': [], + 'tags': [], + 'open_practice_badges': { + 'foo': True, + 'bar': False, + }, + 'lists': { + 'affiliations': [], + 'contributors': [ + { + 'cited_as': 'Open McOperton', + 'identifiers': [], + 'name': 'Open McOperton', + 'order_cited': 0, + 'relation': 'creator', + 'type': 'person', + 'types': ['person', 'agent'], + }, + ], + 'lineage': [], + 'funders': [], + 'hosts': [], + 'publishers': [], + }, + }, } diff --git a/tests/share/schema/test_schema.py b/tests/share/schema/test_schema.py index 05eb6cb7a..833d70eb7 100644 --- a/tests/share/schema/test_schema.py +++ b/tests/share/schema/test_schema.py @@ -45,6 +45,7 @@ 'tags', 'related_agents', 'related_works', + 'open_practice_badges', } AGENT_TYPES = {