Skip to content

Commit

Permalink
Last bit of cleanup maybe (#748)
Browse files Browse the repository at this point in the history
  • Loading branch information
aaxelb committed Feb 19, 2018
1 parent 3999150 commit 7afc7da
Show file tree
Hide file tree
Showing 21 changed files with 42 additions and 30 deletions.
8 changes: 4 additions & 4 deletions api/normalizeddata/views.py
Expand Up @@ -69,16 +69,16 @@ def create(self, request, *args, **kwargs):
ingester = Ingester(serializer.validated_data['data']).as_user(request.user).ingest(apply_changes=False)
ingester.job.reschedule(claim=True)

nm_instance = models.NormalizedData.objects.filter(
nd_id = models.NormalizedData.objects.filter(
raw=ingester.raw,
ingest_job=ingester.job
).order_by('-created_at').first()
ingest_jobs=ingester.job
).order_by('-created_at').values_list('id', flat=True).first()

async_result = ingest.delay(job_id=ingester.job.id)

# TODO Use an actual serializer
return Response({
'id': IDObfuscator.encode(nm_instance),
'id': IDObfuscator.encode_id(nd_id, models.NormalizedData),
'type': 'NormalizedData',
'attributes': {
'task': async_result.id,
Expand Down
8 changes: 4 additions & 4 deletions bots/elasticsearch/bot.py
Expand Up @@ -20,12 +20,12 @@ def chunk(iterable, size):
iterable = iter(iterable)
try:
while True:
l = []
chunk = []
for _ in range(size):
l.append(next(iterable))
yield l
chunk.append(next(iterable))
yield chunk
except StopIteration:
yield l
yield chunk


class ElasticSearchBot:
Expand Down
1 change: 1 addition & 0 deletions osf_oauth2_adapter/provider.py
Expand Up @@ -46,4 +46,5 @@ def extract_uid(self, data):
def get_default_scope(self):
return OsfOauth2AdapterConfig.default_scopes


provider_classes = [OSFProvider]
1 change: 1 addition & 0 deletions osf_oauth2_adapter/views.py
Expand Up @@ -63,6 +63,7 @@ def complete_login(self, request, app, access_token, **kwargs):
extra_data.json()
)


oauth2_login = OAuth2LoginView.adapter_view(OSFOAuth2Adapter)
oauth2_callback = OAuth2CallbackView.adapter_view(OSFOAuth2Adapter)

Expand Down
2 changes: 1 addition & 1 deletion share/admin/jobs.py
Expand Up @@ -89,7 +89,7 @@ class IngestJobAdmin(BaseJobAdmin):
list_display = ('id', 'source_config_', 'suid_', 'status_', 'date_started', 'share_version', )
list_select_related = BaseJobAdmin.list_select_related + ('suid',)
readonly_fields = BaseJobAdmin.readonly_fields + ('suid', 'raw', 'transformer_version', 'regulator_version', )
fake_readonly_fields = ('transformed_data', 'regulated_data')
fake_readonly_fields = ('transformed_datum', 'regulated_datum')
formfield_overrides = {
DateTimeAwareJSONField: {
'widget': PrettyJSONWidget(attrs={
Expand Down
1 change: 1 addition & 0 deletions share/bin/__init__.py
Expand Up @@ -25,5 +25,6 @@ def main(argv):

execute_cmd(argv[1:])


if __name__ == '__main__':
main(sys.argv)
1 change: 1 addition & 0 deletions share/bin/util.py
Expand Up @@ -80,5 +80,6 @@ def _execute_cmd(args, argv):
See '{0} <command> --help' for more information on a specific command."""
return 0


execute_cmd = Command(_execute_cmd, '')
command = execute_cmd.subcommand
1 change: 1 addition & 0 deletions share/graphql/agent.py
Expand Up @@ -56,6 +56,7 @@ def resolve_outgoing_agent_relations(self, limit=None, offset=None):
offset = (offset or 0) + limit
return self.outgoing_agent_relations.all()[offset:limit]


for klass in models.Agent.get_type_classes():
locals()[klass.__name__] = type(klass.__name__, (DjangoObjectType, ), {
'id': graphene.String(),
Expand Down
10 changes: 5 additions & 5 deletions share/migrations/0049_jobs.py
Expand Up @@ -78,8 +78,8 @@ class Migration(migrations.Migration):
('source_config_version', models.PositiveIntegerField()),
('transformer_version', models.PositiveIntegerField()),
('regulator_version', models.PositiveIntegerField()),
('transformed_data', share.models.fields.DateTimeAwareJSONField(null=True)),
('regulated_data', share.models.fields.DateTimeAwareJSONField(null=True)),
('transformed_datum', share.models.fields.DateTimeAwareJSONField(null=True)),
('regulated_datum', share.models.fields.DateTimeAwareJSONField(null=True)),
('retries', models.IntegerField(null=True)),
],
),
Expand Down Expand Up @@ -158,9 +158,9 @@ class Migration(migrations.Migration):
field=models.ForeignKey(editable=False, on_delete=django.db.models.deletion.CASCADE, related_name='ingest_jobs', to='share.SourceUniqueIdentifier'),
),
migrations.AddField(
model_name='normalizeddata',
name='ingest_job',
field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.CASCADE, related_name='ingested_normalized_data', to='share.IngestJob'),
model_name='ingestjob',
name='ingested_normalized_data',
field=models.ManyToManyField(related_name='ingest_jobs', to='share.NormalizedData'),
),
migrations.AlterUniqueTogether(
name='ingestjob',
Expand Down
1 change: 0 additions & 1 deletion share/models/core.py
Expand Up @@ -191,7 +191,6 @@ class NormalizedData(models.Model):
data = DateTimeAwareJSONField(validators=[JSONLDValidator(), ])
source = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE)
tasks = models.ManyToManyField('CeleryTaskResult')
ingest_job = models.ForeignKey('IngestJob', null=True, related_name='ingested_normalized_data', on_delete=models.CASCADE)

class JSONAPIMeta(BaseJSONAPIMeta):
pass
Expand Down
1 change: 1 addition & 0 deletions share/models/creative.py
Expand Up @@ -78,6 +78,7 @@ def defrankenize(self, *_, im_really_sure_about_this=False):
def __str__(self):
return self.title


generator = ModelGenerator(field_types={
'text': models.TextField,
'boolean': models.NullBooleanField, # Has to be nullable for types models :(
Expand Down
1 change: 1 addition & 0 deletions share/models/fields.py
Expand Up @@ -91,6 +91,7 @@ def validate(self, value, model_instance):
params={'value': value},
)


DatetimeAwareJSONField = DateTimeAwareJSONField


Expand Down
1 change: 1 addition & 0 deletions share/models/fuzzycount.py
Expand Up @@ -15,4 +15,5 @@ def fuzzy_count(self):

return int(cursor.fetchone()[0])


FuzzyCountManager = Manager.from_queryset(FuzzyCountQuerySet)
6 changes: 4 additions & 2 deletions share/models/jobs.py
Expand Up @@ -488,8 +488,10 @@ class IngestJob(AbstractBaseJob):
transformer_version = models.PositiveIntegerField()
regulator_version = models.PositiveIntegerField()

transformed_data = DateTimeAwareJSONField(null=True)
regulated_data = DateTimeAwareJSONField(null=True)
transformed_datum = DateTimeAwareJSONField(null=True)
regulated_datum = DateTimeAwareJSONField(null=True)

ingested_normalized_data = models.ManyToManyField('NormalizedData', related_name='ingest_jobs')

retries = models.IntegerField(null=True)

Expand Down
6 changes: 3 additions & 3 deletions share/tasks/jobs.py
Expand Up @@ -242,8 +242,8 @@ def _consume_job(self, job, superfluous, force, apply_changes=True, index=True):
data={'@graph': graph.to_jsonld()},
source=job.suid.source_config.source.user,
raw=job.raw,
ingest_job=job,
)
job.ingested_normalized_data.add(datum)

if apply_changes and settings.SHARE_LEGACY_PIPELINE:
# TODO make this pipeline actually legacy by implementing a new one
Expand All @@ -267,7 +267,7 @@ def _transform(self, job):
logger.warning('Graph was empty for %s, but a normalized data already exists for it', job.raw)
return None

job.log_graph('transformed_data', graph)
job.log_graph('transformed_datum', graph)
return graph

def _regulate(self, job, graph):
Expand All @@ -276,7 +276,7 @@ def _regulate(self, job, graph):
except exceptions.RegulateError as e:
job.fail(e)
return None
job.log_graph('regulated_data', graph)
job.log_graph('regulated_datum', graph)
return graph

def _apply_changes(self, job, normalized_datum):
Expand Down
1 change: 1 addition & 0 deletions share/transformers/com_mendeley_data.py
Expand Up @@ -10,6 +10,7 @@ def format_mendeley_address(ctx):
country=ctx['country']
)


RELATION_MAP = {
'related_to': 'WorkRelation',
'derived_from': 'IsDerivedFrom',
Expand Down
12 changes: 6 additions & 6 deletions share/util/__init__.py
Expand Up @@ -231,15 +231,15 @@ def chunked(iterable, size=25, fail_fast=False):
iterable = iter(iterable)
try:
while True:
l = []
chunk = []
for _ in range(size):
l.append(next(iterable))
yield l
chunk.append(next(iterable))
yield chunk
except StopIteration:
yield l
yield chunk
except Exception as e:
if not fail_fast and l:
yield l
if not fail_fast and chunk:
yield chunk
raise e


Expand Down
1 change: 1 addition & 0 deletions tests/api/test_generated_endpoints.py
Expand Up @@ -15,6 +15,7 @@ def camelCase_to_underscore(name):
s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()


initial = [
Preprint(
id=1,
Expand Down
1 change: 1 addition & 0 deletions tests/share/normalize/factories.py
Expand Up @@ -386,6 +386,7 @@ def _params(seed=None, id=None, type=None, **kwargs):
ret['seed'] = seed
return ret


for model in dir(models):
if not hasattr(getattr(models, model), 'VersionModel'):
continue
Expand Down
4 changes: 2 additions & 2 deletions whitepapers/Tables.md
Expand Up @@ -118,9 +118,9 @@ Job entries to track the status of an ingest task
| `source_config_version` | int | | | | | Version of the SUID's `SourceConfig` on the last attempted run |
| `transformer_version` | int | | | | | Version of the Transformer |
| `regulator_version` | int | | | | | Version of the Regulator |
| `transformed_data` | text | || | | Serialized output from the Transformer |
| `transformed_datum` | text | || | | Serialized output from the Transformer |
| `regulator_logs` | o2m | | | | | List of RegulatorLogs for this ingestion run |
| `regulated_data` | text | || | | Serialized output from the Regulator |
| `regulated_datum` | text | || | | Serialized output from the Regulator |

#### Other indices
* `suid_id`, `latest_raw_id`, `source_config_version`, `transformer_version`, `regulator_version` (unique)
Expand Down
4 changes: 2 additions & 2 deletions whitepapers/tasks/Ingest.md
Expand Up @@ -33,13 +33,13 @@
* Load the Transformer from the SUID's SourceConfig.
* Update `IngestJob.transformer_version`.
* Use the Transformer to transform the raw data into a [MutableGraph](../ingest/Graph.md).
* Serialize the MutableGraph to `IngestJob.transformed_data`.
* Serialize the MutableGraph to `IngestJob.transformed_datum`.
* [Regulate](../ingest/Regulator.md)
* Load the Regulator.
* Update `IngestJob.regulator_version`.
* Use the Regulator to clean the MutableGraph.
* Save list of modifications with reasons to `IngestJob.regulator_logs`.
* Serialize the cleaned MutableGraph to `IngestJob.regulated_data`.
* Serialize the cleaned MutableGraph to `IngestJob.regulated_datum`.
* Use the Regulator to validate the cleaned MutableGraph.
* NOT IMPLEMENTED: [Consolidate](../ingest/Consolidator.md)
* Load the Consolidator.
Expand Down

0 comments on commit 7afc7da

Please sign in to comment.