-
Notifications
You must be signed in to change notification settings - Fork 27
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
OpenConceptLab/ocl_issues#412 | script to import v1 collection versio…
…ns and ids
- Loading branch information
1 parent
ccfcfc2
commit 9a872bd
Showing
6 changed files
with
362 additions
and
4 deletions.
There are no files selected for viewing
76 changes: 76 additions & 0 deletions
76
core/importers/management/commands/import_v1_collection_ids.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
import json | ||
import time | ||
from pprint import pprint | ||
|
||
from django.core.management import BaseCommand | ||
from pydash import get | ||
|
||
from core.collections.models import Collection | ||
|
||
|
||
class Command(BaseCommand): | ||
help = 'import v1 collection/version ids' | ||
|
||
total = 0 | ||
processed = 0 | ||
created = [] | ||
existed = [] | ||
failed = [] | ||
not_found = [] | ||
start_time = None | ||
elapsed_seconds = 0 | ||
|
||
@staticmethod | ||
def log(msg): | ||
print("*******{}*******".format(msg)) | ||
|
||
def handle(self, *args, **options): | ||
self.start_time = time.time() | ||
FILE_PATH = '/code/core/importers/v1_dump/data/exported_collection_ids.json' | ||
lines = open(FILE_PATH, 'r').readlines() | ||
FILE_PATH = '/code/core/importers/v1_dump/data/exported_collectionversion_ids.json' | ||
lines += open(FILE_PATH, 'r').readlines() | ||
|
||
self.log('STARTING COLLECTION/VERSION IDS IMPORT') | ||
self.total = len(lines) | ||
self.log('TOTAL: {}'.format(self.total)) | ||
|
||
for line in lines: | ||
data = json.loads(line) | ||
original_data = data.copy() | ||
try: | ||
_id = get(data.pop('_id'), '$oid') | ||
uri = data.pop('uri') | ||
self.processed += 1 | ||
updated = Collection.objects.filter(uri=uri).update(internal_reference_id=_id) | ||
if updated: | ||
self.created.append(original_data) | ||
self.log("Updated: {} ({}/{})".format(uri, self.processed, self.total)) | ||
else: | ||
self.not_found.append(original_data) | ||
self.log("Not Found: {} ({}/{})".format(uri, self.processed, self.total)) | ||
|
||
except Exception as ex: | ||
self.log("Failed: ") | ||
self.log(ex.args) | ||
self.failed.append({**original_data, 'errors': ex.args}) | ||
|
||
self.elapsed_seconds = time.time() - self.start_time | ||
|
||
self.log( | ||
"Result (in {} secs) : Total: {} | Created: {} | NotFound: {} | Failed: {}".format( | ||
self.elapsed_seconds, self.total, len(self.created), len(self.not_found), len(self.failed) | ||
) | ||
) | ||
|
||
if self.existed: | ||
self.log("Existed") | ||
pprint(self.existed) | ||
|
||
if self.failed: | ||
self.log("Failed") | ||
pprint(self.failed) | ||
|
||
if self.not_found: | ||
self.log("Not Found") | ||
pprint(self.not_found) |
134 changes: 134 additions & 0 deletions
134
core/importers/management/commands/import_v1_collection_versions.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,134 @@ | ||
import json | ||
from pprint import pprint | ||
|
||
from django.core.management import BaseCommand | ||
from pydash import get | ||
|
||
from core.collections.models import Collection, CollectionReference | ||
from core.collections.utils import is_concept | ||
from core.concepts.documents import ConceptDocument | ||
from core.concepts.models import Concept | ||
from core.mappings.documents import MappingDocument | ||
from core.mappings.models import Mapping | ||
from core.users.models import UserProfile | ||
|
||
|
||
class Command(BaseCommand): | ||
help = 'import v1 collection versions' | ||
|
||
total = 0 | ||
processed = 0 | ||
created = [] | ||
existed = [] | ||
failed = [] | ||
not_found_expressions = dict() | ||
|
||
@staticmethod | ||
def log(msg): | ||
print("*******{}*******".format(msg)) | ||
|
||
def add_in_not_found_expression(self, collection_uri, expression): | ||
if collection_uri not in self.not_found_expressions: | ||
self.not_found_expressions[collection_uri] = [] | ||
|
||
self.not_found_expressions[collection_uri].append(expression) | ||
|
||
def handle(self, *args, **options): | ||
FILE_PATH = '/code/core/importers/v1_dump/data/exported_collectionversions.json' | ||
lines = open(FILE_PATH, 'r').readlines() | ||
|
||
self.log('STARTING COLLECTION VERSIONS IMPORT') | ||
self.total = len(lines) | ||
self.log('TOTAL: {}'.format(self.total)) | ||
|
||
for line in lines: | ||
data = json.loads(line) | ||
original_data = data.copy() | ||
self.processed += 1 | ||
_id = data.pop('_id') | ||
data['internal_reference_id'] = get(_id, '$oid') | ||
for attr in [ | ||
'active_concepts', 'active_mappings', 'last_child_update', 'last_concept_update', 'last_mapping_update', | ||
'parent_version_id', 'previous_version_id', 'versioned_object_type_id', 'concepts', 'mappings' | ||
]: | ||
data.pop(attr, None) | ||
|
||
data['snapshot'] = data.pop('collection_snapshot', None) | ||
data['external_id'] = data.pop('version_external_id', None) | ||
|
||
versioned_object_id = data.pop('versioned_object_id') | ||
versioned_object = Collection.objects.filter(internal_reference_id=versioned_object_id).first() | ||
version = data.pop('mnemonic') | ||
created_at = data.pop('created_at') | ||
updated_at = data.pop('updated_at') | ||
created_by = data.get('created_by') | ||
updated_by = data.get('updated_by') | ||
qs = UserProfile.objects.filter(username=created_by) | ||
if qs.exists(): | ||
data['created_by'] = qs.first() | ||
qs = UserProfile.objects.filter(username=updated_by) | ||
if qs.exists(): | ||
data['updated_by'] = qs.first() | ||
data['created_at'] = get(created_at, '$date') | ||
data['updated_at'] = get(updated_at, '$date') | ||
data['organization_id'] = versioned_object.organization_id | ||
data['user_id'] = versioned_object.user_id | ||
data['collection_type'] = versioned_object.collection_type | ||
references = data.pop('references') or [] | ||
|
||
self.log("Processing: {} ({}/{})".format(version, self.processed, self.total)) | ||
uri = data['uri'] | ||
if Collection.objects.filter(uri=uri).exists(): | ||
self.existed.append(original_data) | ||
else: | ||
collection = Collection.objects.create(**data, version=version, mnemonic=versioned_object.mnemonic) | ||
if collection.id: | ||
self.created.append(original_data) | ||
else: | ||
self.failed.append(original_data) | ||
continue | ||
saved_references = [] | ||
concepts = [] | ||
mappings = [] | ||
for ref in references: | ||
expression = ref.get('expression') | ||
__is_concept = is_concept(expression) | ||
concept = None | ||
mapping = None | ||
if __is_concept: | ||
concept = Concept.objects.filter(uri=expression).first() | ||
if concept: | ||
concepts.append(concept) | ||
else: | ||
mapping = Mapping.objects.filter(uri=expression).first() | ||
if mapping: | ||
mappings.append(mapping) | ||
|
||
if not concept and not mapping: | ||
self.add_in_not_found_expression(uri, expression) | ||
continue | ||
|
||
reference = CollectionReference(expression=expression) | ||
reference.save() | ||
saved_references.append(reference) | ||
|
||
collection.references.set(saved_references) | ||
collection.concepts.set(concepts) | ||
collection.mappings.set(mappings) | ||
collection.batch_index(collection.concepts, ConceptDocument) | ||
collection.batch_index(collection.mappings, MappingDocument) | ||
|
||
self.log( | ||
"Result: Created: {} | Existed: {} | Failed: {}".format( | ||
len(self.created), len(self.existed), len(self.failed) | ||
) | ||
) | ||
if self.existed: | ||
self.log("Existed") | ||
pprint(self.existed) | ||
if self.failed: | ||
self.log("Failed") | ||
pprint(self.failed) | ||
if self.not_found_expressions: | ||
self.log('Expressions Not Added') | ||
pprint(self.not_found_expressions) |
141 changes: 141 additions & 0 deletions
141
core/importers/management/commands/import_v1_collections.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,141 @@ | ||
import json | ||
from pprint import pprint | ||
|
||
from django.core.management import BaseCommand | ||
from pydash import get | ||
|
||
from core.collections.models import Collection, CollectionReference | ||
from core.collections.utils import is_concept | ||
from core.common.constants import HEAD | ||
from core.concepts.documents import ConceptDocument | ||
from core.concepts.models import Concept | ||
from core.mappings.documents import MappingDocument | ||
from core.mappings.models import Mapping | ||
from core.orgs.models import Organization | ||
from core.users.models import UserProfile | ||
|
||
|
||
class Command(BaseCommand): | ||
help = 'import v1 collections' | ||
|
||
total = 0 | ||
processed = 0 | ||
created = [] | ||
existed = [] | ||
failed = [] | ||
parents = dict() | ||
users = dict() | ||
references = dict() | ||
not_found_expressions = dict() | ||
|
||
@staticmethod | ||
def log(msg): | ||
print("*******{}*******".format(msg)) | ||
|
||
def get_parent(self, parent_id, uri): | ||
if parent_id not in self.parents: | ||
if '/orgs/' in uri: | ||
result = dict(organization=Organization.objects.filter(internal_reference_id=parent_id).first()) | ||
else: | ||
result = dict(user=UserProfile.objects.filter(internal_reference_id=parent_id).first()) | ||
self.parents[parent_id] = result | ||
|
||
return self.parents[parent_id] | ||
|
||
def add_in_not_found_expression(self, collection_uri, expression): | ||
if collection_uri not in self.not_found_expressions: | ||
self.not_found_expressions[collection_uri] = [] | ||
|
||
self.not_found_expressions[collection_uri].append(expression) | ||
|
||
def handle(self, *args, **options): | ||
FILE_PATH = '/code/core/importers/v1_dump/data/exported_collections.json' | ||
lines = open(FILE_PATH, 'r').readlines() | ||
|
||
self.log('STARTING COLLECTION IMPORT') | ||
self.total = len(lines) | ||
self.log('TOTAL: {}'.format(self.total)) | ||
|
||
for line in lines: | ||
data = json.loads(line) | ||
original_data = data.copy() | ||
self.processed += 1 | ||
_id = data.pop('_id') | ||
for attr in ['parent_type_id', 'concepts', 'mappings']: | ||
data.pop(attr, None) | ||
|
||
parent_id = data.pop('parent_id') | ||
created_at = data.pop('created_at') | ||
updated_at = data.pop('updated_at') | ||
created_by = data.get('created_by') | ||
updated_by = data.get('updated_by') | ||
references = data.pop('references') or [] | ||
qs = UserProfile.objects.filter(username=created_by) | ||
if qs.exists(): | ||
data['created_by'] = qs.first() | ||
qs = UserProfile.objects.filter(username=updated_by) | ||
if qs.exists(): | ||
data['updated_by'] = qs.first() | ||
data['internal_reference_id'] = get(_id, '$oid') | ||
data['created_at'] = get(created_at, '$date') | ||
data['updated_at'] = get(updated_at, '$date') | ||
mnemonic = data.get('mnemonic') | ||
data = {**data, **self.get_parent(parent_id, data['uri'])} | ||
|
||
self.log("Processing: {} ({}/{})".format(mnemonic, self.processed, self.total)) | ||
uri = data['uri'] | ||
if Collection.objects.filter(uri=uri).exists(): | ||
self.existed.append(original_data) | ||
else: | ||
collection = Collection.objects.create(**data, version=HEAD) | ||
if collection.id: | ||
self.created.append(original_data) | ||
else: | ||
self.failed.append(original_data) | ||
continue | ||
saved_references = [] | ||
concepts = [] | ||
mappings = [] | ||
for ref in references: | ||
expression = ref.get('expression') | ||
__is_concept = is_concept(expression) | ||
concept = None | ||
mapping = None | ||
if __is_concept: | ||
concept = Concept.objects.filter(uri=expression).first() | ||
if concept: | ||
concepts.append(concept) | ||
else: | ||
mapping = Mapping.objects.filter(uri=expression).first() | ||
if mapping: | ||
mappings.append(mapping) | ||
|
||
if not concept and not mapping: | ||
self.add_in_not_found_expression(uri, expression) | ||
continue | ||
|
||
reference = CollectionReference(expression=expression) | ||
reference.save() | ||
saved_references.append(reference) | ||
|
||
collection.references.set(saved_references) | ||
collection.concepts.set(concepts) | ||
collection.mappings.set(mappings) | ||
collection.batch_index(collection.concepts, ConceptDocument) | ||
collection.batch_index(collection.mappings, MappingDocument) | ||
|
||
self.log( | ||
"Result: Created: {} | Existed: {} | Failed: {}".format( | ||
len(self.created), len(self.existed), len(self.failed) | ||
) | ||
) | ||
if self.existed: | ||
self.log("Existed") | ||
pprint(self.existed) | ||
if self.failed: | ||
self.log("Failed") | ||
pprint(self.failed) | ||
if self.not_found_expressions: | ||
self.log('Expressions Not Added') | ||
pprint(self.not_found_expressions) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
6 changes: 4 additions & 2 deletions
6
core/importers/v1_dump/scripts/export_collections_and_versions.bash
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,6 @@ | ||
#!/usr/bin/env bash | ||
mongo "localhost:27017/ocl" ./export_collections_and_versions.js | ||
mongoexport --db ocl --collection export.collections -o ../data/exported_collections.json | ||
mongoexport --db ocl --collection export.collectionversions -o ../data/exported_collectionversions.json | ||
mongoexport --db ocl --collection export.collections -o exported_collections.json | ||
mongoexport --db ocl --collection export.collection_ids -o exported_collection_ids.json | ||
mongoexport --db ocl --collection export.collectionversion_ids -o exported_collectionversion_ids.json | ||
mongoexport --db ocl --collection export.collectionversions -o exported_collectionversions.json |