From 7afc7da276c6f2ae294a6ddcc7f598ee91a730fc Mon Sep 17 00:00:00 2001 From: Abram Booth Date: Mon, 19 Feb 2018 09:49:57 -0500 Subject: [PATCH] Last bit of cleanup maybe (#748) --- api/normalizeddata/views.py | 8 ++++---- bots/elasticsearch/bot.py | 8 ++++---- osf_oauth2_adapter/provider.py | 1 + osf_oauth2_adapter/views.py | 1 + share/admin/jobs.py | 2 +- share/bin/__init__.py | 1 + share/bin/util.py | 1 + share/graphql/agent.py | 1 + share/migrations/0049_jobs.py | 10 +++++----- share/models/core.py | 1 - share/models/creative.py | 1 + share/models/fields.py | 1 + share/models/fuzzycount.py | 1 + share/models/jobs.py | 6 ++++-- share/tasks/jobs.py | 6 +++--- share/transformers/com_mendeley_data.py | 1 + share/util/__init__.py | 12 ++++++------ tests/api/test_generated_endpoints.py | 1 + tests/share/normalize/factories.py | 1 + whitepapers/Tables.md | 4 ++-- whitepapers/tasks/Ingest.md | 4 ++-- 21 files changed, 42 insertions(+), 30 deletions(-) diff --git a/api/normalizeddata/views.py b/api/normalizeddata/views.py index c5e6d8217..add059454 100644 --- a/api/normalizeddata/views.py +++ b/api/normalizeddata/views.py @@ -69,16 +69,16 @@ def create(self, request, *args, **kwargs): ingester = Ingester(serializer.validated_data['data']).as_user(request.user).ingest(apply_changes=False) ingester.job.reschedule(claim=True) - nm_instance = models.NormalizedData.objects.filter( + nd_id = models.NormalizedData.objects.filter( raw=ingester.raw, - ingest_job=ingester.job - ).order_by('-created_at').first() + ingest_jobs=ingester.job + ).order_by('-created_at').values_list('id', flat=True).first() async_result = ingest.delay(job_id=ingester.job.id) # TODO Use an actual serializer return Response({ - 'id': IDObfuscator.encode(nm_instance), + 'id': IDObfuscator.encode_id(nd_id, models.NormalizedData), 'type': 'NormalizedData', 'attributes': { 'task': async_result.id, diff --git a/bots/elasticsearch/bot.py b/bots/elasticsearch/bot.py index 3b0fc5d4f..ff043ae4e 100644 --- a/bots/elasticsearch/bot.py +++ b/bots/elasticsearch/bot.py @@ -20,12 +20,12 @@ def chunk(iterable, size): iterable = iter(iterable) try: while True: - l = [] + chunk = [] for _ in range(size): - l.append(next(iterable)) - yield l + chunk.append(next(iterable)) + yield chunk except StopIteration: - yield l + yield chunk class ElasticSearchBot: diff --git a/osf_oauth2_adapter/provider.py b/osf_oauth2_adapter/provider.py index 8f8feab74..4b1681c92 100644 --- a/osf_oauth2_adapter/provider.py +++ b/osf_oauth2_adapter/provider.py @@ -46,4 +46,5 @@ def extract_uid(self, data): def get_default_scope(self): return OsfOauth2AdapterConfig.default_scopes + provider_classes = [OSFProvider] diff --git a/osf_oauth2_adapter/views.py b/osf_oauth2_adapter/views.py index e1974d48c..654a9461f 100644 --- a/osf_oauth2_adapter/views.py +++ b/osf_oauth2_adapter/views.py @@ -63,6 +63,7 @@ def complete_login(self, request, app, access_token, **kwargs): extra_data.json() ) + oauth2_login = OAuth2LoginView.adapter_view(OSFOAuth2Adapter) oauth2_callback = OAuth2CallbackView.adapter_view(OSFOAuth2Adapter) diff --git a/share/admin/jobs.py b/share/admin/jobs.py index cbe1a54ba..8f851e8a6 100644 --- a/share/admin/jobs.py +++ b/share/admin/jobs.py @@ -89,7 +89,7 @@ class IngestJobAdmin(BaseJobAdmin): list_display = ('id', 'source_config_', 'suid_', 'status_', 'date_started', 'share_version', ) list_select_related = BaseJobAdmin.list_select_related + ('suid',) readonly_fields = BaseJobAdmin.readonly_fields + ('suid', 'raw', 'transformer_version', 'regulator_version', ) - fake_readonly_fields = ('transformed_data', 'regulated_data') + fake_readonly_fields = ('transformed_datum', 'regulated_datum') formfield_overrides = { DateTimeAwareJSONField: { 'widget': PrettyJSONWidget(attrs={ diff --git a/share/bin/__init__.py b/share/bin/__init__.py index b44ac67a0..c1d9d9e11 100644 --- a/share/bin/__init__.py +++ b/share/bin/__init__.py @@ -25,5 +25,6 @@ def main(argv): execute_cmd(argv[1:]) + if __name__ == '__main__': main(sys.argv) diff --git a/share/bin/util.py b/share/bin/util.py index ebf7ea014..617fc4378 100644 --- a/share/bin/util.py +++ b/share/bin/util.py @@ -80,5 +80,6 @@ def _execute_cmd(args, argv): See '{0} --help' for more information on a specific command.""" return 0 + execute_cmd = Command(_execute_cmd, '') command = execute_cmd.subcommand diff --git a/share/graphql/agent.py b/share/graphql/agent.py index d1447473d..7d18d13b7 100644 --- a/share/graphql/agent.py +++ b/share/graphql/agent.py @@ -56,6 +56,7 @@ def resolve_outgoing_agent_relations(self, limit=None, offset=None): offset = (offset or 0) + limit return self.outgoing_agent_relations.all()[offset:limit] + for klass in models.Agent.get_type_classes(): locals()[klass.__name__] = type(klass.__name__, (DjangoObjectType, ), { 'id': graphene.String(), diff --git a/share/migrations/0049_jobs.py b/share/migrations/0049_jobs.py index 1e31efb98..2ba7e66ec 100644 --- a/share/migrations/0049_jobs.py +++ b/share/migrations/0049_jobs.py @@ -78,8 +78,8 @@ class Migration(migrations.Migration): ('source_config_version', models.PositiveIntegerField()), ('transformer_version', models.PositiveIntegerField()), ('regulator_version', models.PositiveIntegerField()), - ('transformed_data', share.models.fields.DateTimeAwareJSONField(null=True)), - ('regulated_data', share.models.fields.DateTimeAwareJSONField(null=True)), + ('transformed_datum', share.models.fields.DateTimeAwareJSONField(null=True)), + ('regulated_datum', share.models.fields.DateTimeAwareJSONField(null=True)), ('retries', models.IntegerField(null=True)), ], ), @@ -158,9 +158,9 @@ class Migration(migrations.Migration): field=models.ForeignKey(editable=False, on_delete=django.db.models.deletion.CASCADE, related_name='ingest_jobs', to='share.SourceUniqueIdentifier'), ), migrations.AddField( - model_name='normalizeddata', - name='ingest_job', - field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.CASCADE, related_name='ingested_normalized_data', to='share.IngestJob'), + model_name='ingestjob', + name='ingested_normalized_data', + field=models.ManyToManyField(related_name='ingest_jobs', to='share.NormalizedData'), ), migrations.AlterUniqueTogether( name='ingestjob', diff --git a/share/models/core.py b/share/models/core.py index a2832b717..3d8579f45 100644 --- a/share/models/core.py +++ b/share/models/core.py @@ -191,7 +191,6 @@ class NormalizedData(models.Model): data = DateTimeAwareJSONField(validators=[JSONLDValidator(), ]) source = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE) tasks = models.ManyToManyField('CeleryTaskResult') - ingest_job = models.ForeignKey('IngestJob', null=True, related_name='ingested_normalized_data', on_delete=models.CASCADE) class JSONAPIMeta(BaseJSONAPIMeta): pass diff --git a/share/models/creative.py b/share/models/creative.py index d0ed0c3b1..643cb0530 100644 --- a/share/models/creative.py +++ b/share/models/creative.py @@ -78,6 +78,7 @@ def defrankenize(self, *_, im_really_sure_about_this=False): def __str__(self): return self.title + generator = ModelGenerator(field_types={ 'text': models.TextField, 'boolean': models.NullBooleanField, # Has to be nullable for types models :( diff --git a/share/models/fields.py b/share/models/fields.py index 632dfd449..feb4c4796 100644 --- a/share/models/fields.py +++ b/share/models/fields.py @@ -91,6 +91,7 @@ def validate(self, value, model_instance): params={'value': value}, ) + DatetimeAwareJSONField = DateTimeAwareJSONField diff --git a/share/models/fuzzycount.py b/share/models/fuzzycount.py index 176e8cb6d..37057800a 100644 --- a/share/models/fuzzycount.py +++ b/share/models/fuzzycount.py @@ -15,4 +15,5 @@ def fuzzy_count(self): return int(cursor.fetchone()[0]) + FuzzyCountManager = Manager.from_queryset(FuzzyCountQuerySet) diff --git a/share/models/jobs.py b/share/models/jobs.py index 699533580..5f5994c6f 100644 --- a/share/models/jobs.py +++ b/share/models/jobs.py @@ -488,8 +488,10 @@ class IngestJob(AbstractBaseJob): transformer_version = models.PositiveIntegerField() regulator_version = models.PositiveIntegerField() - transformed_data = DateTimeAwareJSONField(null=True) - regulated_data = DateTimeAwareJSONField(null=True) + transformed_datum = DateTimeAwareJSONField(null=True) + regulated_datum = DateTimeAwareJSONField(null=True) + + ingested_normalized_data = models.ManyToManyField('NormalizedData', related_name='ingest_jobs') retries = models.IntegerField(null=True) diff --git a/share/tasks/jobs.py b/share/tasks/jobs.py index 50c47a23f..8078f4f88 100644 --- a/share/tasks/jobs.py +++ b/share/tasks/jobs.py @@ -242,8 +242,8 @@ def _consume_job(self, job, superfluous, force, apply_changes=True, index=True): data={'@graph': graph.to_jsonld()}, source=job.suid.source_config.source.user, raw=job.raw, - ingest_job=job, ) + job.ingested_normalized_data.add(datum) if apply_changes and settings.SHARE_LEGACY_PIPELINE: # TODO make this pipeline actually legacy by implementing a new one @@ -267,7 +267,7 @@ def _transform(self, job): logger.warning('Graph was empty for %s, but a normalized data already exists for it', job.raw) return None - job.log_graph('transformed_data', graph) + job.log_graph('transformed_datum', graph) return graph def _regulate(self, job, graph): @@ -276,7 +276,7 @@ def _regulate(self, job, graph): except exceptions.RegulateError as e: job.fail(e) return None - job.log_graph('regulated_data', graph) + job.log_graph('regulated_datum', graph) return graph def _apply_changes(self, job, normalized_datum): diff --git a/share/transformers/com_mendeley_data.py b/share/transformers/com_mendeley_data.py index 49e34513a..c93b5f0ce 100644 --- a/share/transformers/com_mendeley_data.py +++ b/share/transformers/com_mendeley_data.py @@ -10,6 +10,7 @@ def format_mendeley_address(ctx): country=ctx['country'] ) + RELATION_MAP = { 'related_to': 'WorkRelation', 'derived_from': 'IsDerivedFrom', diff --git a/share/util/__init__.py b/share/util/__init__.py index 05421de05..ff9b565f3 100644 --- a/share/util/__init__.py +++ b/share/util/__init__.py @@ -231,15 +231,15 @@ def chunked(iterable, size=25, fail_fast=False): iterable = iter(iterable) try: while True: - l = [] + chunk = [] for _ in range(size): - l.append(next(iterable)) - yield l + chunk.append(next(iterable)) + yield chunk except StopIteration: - yield l + yield chunk except Exception as e: - if not fail_fast and l: - yield l + if not fail_fast and chunk: + yield chunk raise e diff --git a/tests/api/test_generated_endpoints.py b/tests/api/test_generated_endpoints.py index 1cc9361c7..18a1c876c 100644 --- a/tests/api/test_generated_endpoints.py +++ b/tests/api/test_generated_endpoints.py @@ -15,6 +15,7 @@ def camelCase_to_underscore(name): s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name) return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower() + initial = [ Preprint( id=1, diff --git a/tests/share/normalize/factories.py b/tests/share/normalize/factories.py index f5c665ea6..6449f4425 100644 --- a/tests/share/normalize/factories.py +++ b/tests/share/normalize/factories.py @@ -386,6 +386,7 @@ def _params(seed=None, id=None, type=None, **kwargs): ret['seed'] = seed return ret + for model in dir(models): if not hasattr(getattr(models, model), 'VersionModel'): continue diff --git a/whitepapers/Tables.md b/whitepapers/Tables.md index 065846428..c52e0f236 100644 --- a/whitepapers/Tables.md +++ b/whitepapers/Tables.md @@ -118,9 +118,9 @@ Job entries to track the status of an ingest task | `source_config_version` | int | | | | | Version of the SUID's `SourceConfig` on the last attempted run | | `transformer_version` | int | | | | | Version of the Transformer | | `regulator_version` | int | | | | | Version of the Regulator | -| `transformed_data` | text | | ✓ | | | Serialized output from the Transformer | +| `transformed_datum` | text | | ✓ | | | Serialized output from the Transformer | | `regulator_logs` | o2m | | | | | List of RegulatorLogs for this ingestion run | -| `regulated_data` | text | | ✓ | | | Serialized output from the Regulator | +| `regulated_datum` | text | | ✓ | | | Serialized output from the Regulator | #### Other indices * `suid_id`, `latest_raw_id`, `source_config_version`, `transformer_version`, `regulator_version` (unique) diff --git a/whitepapers/tasks/Ingest.md b/whitepapers/tasks/Ingest.md index 345ac8e61..7a5e6f88c 100644 --- a/whitepapers/tasks/Ingest.md +++ b/whitepapers/tasks/Ingest.md @@ -33,13 +33,13 @@ * Load the Transformer from the SUID's SourceConfig. * Update `IngestJob.transformer_version`. * Use the Transformer to transform the raw data into a [MutableGraph](../ingest/Graph.md). - * Serialize the MutableGraph to `IngestJob.transformed_data`. + * Serialize the MutableGraph to `IngestJob.transformed_datum`. * [Regulate](../ingest/Regulator.md) * Load the Regulator. * Update `IngestJob.regulator_version`. * Use the Regulator to clean the MutableGraph. * Save list of modifications with reasons to `IngestJob.regulator_logs`. - * Serialize the cleaned MutableGraph to `IngestJob.regulated_data`. + * Serialize the cleaned MutableGraph to `IngestJob.regulated_datum`. * Use the Regulator to validate the cleaned MutableGraph. * NOT IMPLEMENTED: [Consolidate](../ingest/Consolidator.md) * Load the Consolidator.