-
Notifications
You must be signed in to change notification settings - Fork 28
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
OpenConceptLab/ocl_issues#412 | script to import v1 source/collection…
… mongo ids and mapping versions
- Loading branch information
1 parent
fa16a47
commit ccfcfc2
Showing
6 changed files
with
240 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
145 changes: 145 additions & 0 deletions
145
core/importers/management/commands/import_v1_mapping_versions.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,145 @@ | ||
import json | ||
import time | ||
from pprint import pprint | ||
|
||
from django.core.management import BaseCommand | ||
from pydash import get | ||
|
||
from core.concepts.models import Concept | ||
from core.mappings.models import Mapping | ||
from core.sources.models import Source | ||
from core.users.models import UserProfile | ||
|
||
|
||
class Command(BaseCommand): | ||
help = 'import v1 mapping versions' | ||
|
||
total = 0 | ||
processed = 0 | ||
created = [] | ||
existed = [] | ||
failed = [] | ||
start_time = None | ||
elapsed_seconds = 0 | ||
users = dict() | ||
|
||
@staticmethod | ||
def log(msg): | ||
print("*******{}*******".format(msg)) | ||
|
||
def handle(self, *args, **options): | ||
self.start_time = time.time() | ||
FILE_PATH = '/code/core/importers/v1_dump/data/exported_mappingversions.json' | ||
lines = open(FILE_PATH, 'r').readlines() | ||
|
||
self.log('STARTING MAPPING VERSIONS IMPORT') | ||
self.total = len(lines) | ||
self.log('TOTAL: {}'.format(self.total)) | ||
|
||
for line in lines: | ||
data = json.loads(line) | ||
original_data = data.copy() | ||
self.processed += 1 | ||
created_at = data.pop('created_at') | ||
updated_at = data.pop('updated_at') | ||
created_by = data.get('created_by', None) or data.pop('version_created_by', None) or 'ocladmin' | ||
updated_by = data.get('updated_by') or created_by | ||
source_version_ids = data.pop('source_version_ids', None) or None | ||
|
||
for attr in [ | ||
'root_version_id', 'parent_version_id', 'previous_version_id', 'root_version_id', 'version_created_by', | ||
'versioned_object_type_id' | ||
]: | ||
data.pop(attr, None) | ||
|
||
data['comment'] = data.pop('update_comment', None) | ||
_id = data.pop('_id') | ||
versioned_object_id = data.pop('versioned_object_id') | ||
versioned_object = Mapping.objects.filter(internal_reference_id=versioned_object_id).first() | ||
if not versioned_object: | ||
self.failed.append({**original_data, 'errors': ['versioned_object not found']}) | ||
continue | ||
mnemonic = versioned_object.mnemonic | ||
data['version'] = data.pop('mnemonic') | ||
data['internal_reference_id'] = get(_id, '$oid') | ||
data['created_at'] = get(created_at, '$date') | ||
data['updated_at'] = get(updated_at, '$date') | ||
from_concept_id = get(data.pop('from_concept_id'), '$oid') | ||
to_concept_id = get(data.pop('to_concept_id'), '$oid') | ||
to_source_id = get(data.pop('to_source_id'), '$oid') | ||
from_concept = Concept.objects.filter(internal_reference_id=from_concept_id).first() | ||
to_concept = None | ||
to_source = None | ||
if to_concept_id: | ||
to_concept = Concept.objects.filter(internal_reference_id=to_concept_id).first() | ||
if to_source_id: | ||
to_source = Source.objects.filter(internal_reference_id=to_source_id).first() | ||
|
||
if created_by in self.users: | ||
data['created_by'] = self.users[created_by] | ||
elif created_by: | ||
qs = UserProfile.objects.filter(username=created_by) | ||
if qs.exists(): | ||
user = qs.first() | ||
self.users[created_by] = user | ||
data['created_by'] = user | ||
|
||
if updated_by in self.users: | ||
data['updated_by'] = self.users[updated_by] | ||
elif updated_by: | ||
qs = UserProfile.objects.filter(username=updated_by) | ||
if qs.exists(): | ||
user = qs.first() | ||
self.users[created_by] = user | ||
data['updated_by'] = user | ||
|
||
self.log("Processing: {} ({}/{})".format(mnemonic, self.processed, self.total)) | ||
if Mapping.objects.filter(uri=data['uri']).exists(): | ||
self.existed.append(original_data) | ||
else: | ||
try: | ||
source = versioned_object.parent | ||
data.pop('parent_id', None) | ||
mapping = Mapping( | ||
**data, mnemonic=mnemonic, parent=source, versioned_object_id=versioned_object.id, | ||
) | ||
mapping.to_concept_id = get(to_concept, 'id') or versioned_object.to_concept_id | ||
mapping.to_concept_code = data.get('to_concept_code') or versioned_object.to_concept_code | ||
mapping.to_concept_name = data.get('to_concept_name') or versioned_object.to_concept_name | ||
mapping.to_source_id = get(to_source, 'id') or get( | ||
to_concept, 'parent_id') or versioned_object.to_source_id | ||
mapping.from_concept_id = get(from_concept, 'id') or versioned_object.from_concept_id | ||
mapping.from_concept_code = get(from_concept, 'mnemonic') or versioned_object.from_concept_code | ||
mapping.from_source_id = get(from_concept, 'parent_id') or versioned_object.from_source_id | ||
mapping.save() | ||
|
||
source_versions = [source] | ||
if source_version_ids: | ||
source_versions += list(Source.objects.filter(internal_reference_id__in=source_version_ids)) | ||
mapping.sources.set(source_versions) | ||
mapping.save() | ||
|
||
# other_versions = versioned_object.versions.exclude(id=mapping.id) | ||
# if other_versions.exists(): | ||
# other_versions.update(is_latest_version=False) | ||
|
||
self.created.append(original_data) | ||
except Exception as ex: | ||
self.log("Failed: {}".format(data['uri'])) | ||
self.log(ex.args) | ||
self.failed.append({**original_data, 'errors': ex.args}) | ||
|
||
self.elapsed_seconds = time.time() - self.start_time | ||
|
||
self.log( | ||
"Result (in {} secs) : Total: {} | Created: {} | Existed: {} | Failed: {}".format( | ||
self.elapsed_seconds, self.total, len(self.created), len(self.existed), len(self.failed) | ||
) | ||
) | ||
if self.existed: | ||
self.log("Existed") | ||
pprint(self.existed) | ||
|
||
if self.failed: | ||
self.log("Failed") | ||
pprint(self.failed) |
76 changes: 76 additions & 0 deletions
76
core/importers/management/commands/import_v1_source_ids.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
import json | ||
import time | ||
from pprint import pprint | ||
|
||
from django.core.management import BaseCommand | ||
from pydash import get | ||
|
||
from core.sources.models import Source | ||
|
||
|
||
class Command(BaseCommand): | ||
help = 'import v1 source/version ids' | ||
|
||
total = 0 | ||
processed = 0 | ||
created = [] | ||
existed = [] | ||
failed = [] | ||
not_found = [] | ||
start_time = None | ||
elapsed_seconds = 0 | ||
|
||
@staticmethod | ||
def log(msg): | ||
print("*******{}*******".format(msg)) | ||
|
||
def handle(self, *args, **options): | ||
self.start_time = time.time() | ||
FILE_PATH = '/code/core/importers/v1_dump/data/exported_source_ids.json' | ||
lines = open(FILE_PATH, 'r').readlines() | ||
FILE_PATH = '/code/core/importers/v1_dump/data/exported_sourceversion_ids.json' | ||
lines += open(FILE_PATH, 'r').readlines() | ||
|
||
self.log('STARTING SOURCE/VERSION IDS IMPORT') | ||
self.total = len(lines) | ||
self.log('TOTAL: {}'.format(self.total)) | ||
|
||
for line in lines: | ||
data = json.loads(line) | ||
original_data = data.copy() | ||
try: | ||
_id = get(data.pop('_id'), '$oid') | ||
uri = data.pop('uri') | ||
self.processed += 1 | ||
updated = Source.objects.filter(uri=uri).update(internal_reference_id=_id) | ||
if updated: | ||
self.created.append(original_data) | ||
self.log("Updated: {} ({}/{})".format(uri, self.processed, self.total)) | ||
else: | ||
self.not_found.append(original_data) | ||
self.log("Not Found: {} ({}/{})".format(uri, self.processed, self.total)) | ||
|
||
except Exception as ex: | ||
self.log("Failed: ") | ||
self.log(ex.args) | ||
self.failed.append({**original_data, 'errors': ex.args}) | ||
|
||
self.elapsed_seconds = time.time() - self.start_time | ||
|
||
self.log( | ||
"Result (in {} secs) : Total: {} | Created: {} | NotFound: {} | Failed: {}".format( | ||
self.elapsed_seconds, self.total, len(self.created), len(self.not_found), len(self.failed) | ||
) | ||
) | ||
|
||
if self.existed: | ||
self.log("Existed") | ||
pprint(self.existed) | ||
|
||
if self.failed: | ||
self.log("Failed") | ||
pprint(self.failed) | ||
|
||
if self.not_found: | ||
self.log("Not Found") | ||
pprint(self.not_found) |
4 changes: 4 additions & 0 deletions
4
core/importers/v1_dump/scripts/export_collections_and_versions.js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,16 @@ | ||
db.export.collections.drop(); | ||
db.export.collectionversions.drop(); | ||
db.export.collection_ids.drop() | ||
db.export.collectionversion_ids.drop() | ||
|
||
|
||
org_ids = db.orgs_organization.find({mnemonic: {$in: ["EthiopiaNHDD", "MSF-OCB", "MOH-DM", "IAD", "integrated-impact", "SSAS", "DSME-Test", "GFPVAN", "im", "Kuunika", "DSME", "DSME-CDD", "MOH", "mTOMADY", "IRDO", "ibwighane", "mw-terminology-service", "mw-product-master", "ICI", "mw-terminology-service-development", "mw-product-master-ocl-instance", "mw-product-master-ocl", "malawi-diseases-diagnosis", "TestOrg", "DWB", "CMDF", "MUDHC", "MSF", "MU", "MUDH", "nproto", "MSFTW", "TWABC", "kuunika-registries", "UNIMED", "SHC", "MSFOCP", "SELF", "OpenSandbox", "sandbox", "ATH", "Reverton"]}}, {_id: 1}).map(doc => doc._id.str); | ||
collection_ids = db.collection_collection.find({parent_id: {$in: org_ids}}, {_id: 1}).map(doc => doc._id.str); | ||
|
||
db.export.collections.insertMany(db.collection_collection.find({parent_id: {$in: org_ids}}).map(doc => doc)); | ||
db.export.collectionversions.insertMany(db.collection_collectionversion.find({versioned_object_id: {$in: collection_ids}, mnemonic: {$ne: 'HEAD'}}).map(doc => doc)); | ||
db.export.collection_ids.insertMany(db.collection_collection.find({}).map(doc => ({_id: doc._id, uri: doc.uri}))) | ||
db.export.collectionversion_ids.insertMany(db.collection_collectionversion.find({}).map(doc => ({_id: doc._id, uri: doc.uri}))) | ||
|
||
print(db.export.collections.count() + " matching collection found"); | ||
print(db.export.collectionversions.count() + " matching collectionversion found"); |
2 changes: 2 additions & 0 deletions
2
core/importers/v1_dump/scripts/export_sources_and_versions.bash
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,6 @@ | ||
#!/usr/bin/env bash | ||
mongo "localhost:27017/ocl" ./export_sources_and_versions.js | ||
mongoexport --db ocl --collection export.source_ids -o exported_source_ids.json | ||
mongoexport --db ocl --collection export.sources -o exported_sources.json | ||
mongoexport --db ocl --collection export.sourceversion_ids -o exported_sourceversion_ids.json | ||
mongoexport --db ocl --collection export.sourceversions -o exported_sourceversions.json |
4 changes: 4 additions & 0 deletions
4
core/importers/v1_dump/scripts/export_sources_and_versions.js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,16 @@ | ||
db.export.sources.drop(); | ||
db.export.source_ids.drop(); | ||
db.export.sourceversions.drop(); | ||
db.export.sourceversion_ids.drop(); | ||
|
||
|
||
org_ids = db.orgs_organization.find({mnemonic: {$in: ["EthiopiaNHDD", "MSF-OCB", "MOH-DM", "IAD", "integrated-impact", "SSAS", "DSME-Test", "GFPVAN", "im", "Kuunika", "DSME", "DSME-CDD", "MOH", "mTOMADY", "IRDO", "ibwighane", "mw-terminology-service", "mw-product-master", "ICI", "mw-terminology-service-development", "mw-product-master-ocl-instance", "mw-product-master-ocl", "malawi-diseases-diagnosis", "TestOrg", "DWB", "CMDF", "MUDHC", "MSF", "MU", "MUDH", "nproto", "MSFTW", "TWABC", "kuunika-registries", "UNIMED", "SHC", "MSFOCP", "SELF", "OpenSandbox", "sandbox", "ATH", "Reverton"]}}, {_id: 1}).map(doc => doc._id.str); | ||
source_ids = db.sources_source.find({parent_id: {$in: org_ids}}, {_id: 1}).map(doc => doc._id.str); | ||
|
||
db.export.sources.insertMany(db.sources_source.find({parent_id: {$in: org_ids}}).map(doc => doc)); | ||
db.export.sourceversions.insertMany(db.sources_sourceversion.find({versioned_object_id: {$in: source_ids}, mnemonic: {$ne: 'HEAD'}}).map(doc => doc)); | ||
db.export.source_ids.insertMany(db.sources_source.find({}).map(doc => ({_id: doc._id, uri: doc.uri}))) | ||
db.export.sourceversion_ids.insertMany(db.sources_sourceversion.find({mnemonic: {$ne: 'HEAD'}}).map(doc => ({_id: doc._id, uri: doc.uri}))) | ||
|
||
print(db.export.sources.count() + " matching source found"); | ||
print(db.export.sourceversions.count() + " matching sourceversion found"); |