Skip to content

Commit

Permalink
OpenConceptLab/ocl_issues#412 | script to import v1 source/collection…
Browse files Browse the repository at this point in the history
… mongo ids and mapping versions
  • Loading branch information
snyaggarwal committed Mar 15, 2021
1 parent fa16a47 commit ccfcfc2
Show file tree
Hide file tree
Showing 6 changed files with 240 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ def handle(self, *args, **options):
_id = data.pop('_id')
versioned_object_id = data.pop('versioned_object_id')
versioned_object = Concept.objects.filter(internal_reference_id=versioned_object_id).first()
if not versioned_object:
self.failed.append({**original_data, 'errors': ['versioned_object not found']})
continue
mnemonic = versioned_object.mnemonic
descriptions_data = data.pop('descriptions', [])
names_data = data.pop('names', [])
Expand Down Expand Up @@ -87,6 +90,7 @@ def handle(self, *args, **options):
self.existed.append(original_data)
else:
try:
data.pop('parent_id', None)
source = versioned_object.parent
names = self.get_locales(names_data)
descriptions = self.get_locales(descriptions_data)
Expand All @@ -101,6 +105,11 @@ def handle(self, *args, **options):
concept.sources.set(source_versions)
concept.update_mappings()
concept.save()

# other_versions = versioned_object.versions.exclude(id=concept.id)
# if other_versions.exists():
# other_versions.update(is_latest_version=False)

self.created.append(original_data)
except Exception as ex:
self.log("Failed: {}".format(data['uri']))
Expand Down
145 changes: 145 additions & 0 deletions core/importers/management/commands/import_v1_mapping_versions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
import json
import time
from pprint import pprint

from django.core.management import BaseCommand
from pydash import get

from core.concepts.models import Concept
from core.mappings.models import Mapping
from core.sources.models import Source
from core.users.models import UserProfile


class Command(BaseCommand):
help = 'import v1 mapping versions'

total = 0
processed = 0
created = []
existed = []
failed = []
start_time = None
elapsed_seconds = 0
users = dict()

@staticmethod
def log(msg):
print("*******{}*******".format(msg))

def handle(self, *args, **options):
self.start_time = time.time()
FILE_PATH = '/code/core/importers/v1_dump/data/exported_mappingversions.json'
lines = open(FILE_PATH, 'r').readlines()

self.log('STARTING MAPPING VERSIONS IMPORT')
self.total = len(lines)
self.log('TOTAL: {}'.format(self.total))

for line in lines:
data = json.loads(line)
original_data = data.copy()
self.processed += 1
created_at = data.pop('created_at')
updated_at = data.pop('updated_at')
created_by = data.get('created_by', None) or data.pop('version_created_by', None) or 'ocladmin'
updated_by = data.get('updated_by') or created_by
source_version_ids = data.pop('source_version_ids', None) or None

for attr in [
'root_version_id', 'parent_version_id', 'previous_version_id', 'root_version_id', 'version_created_by',
'versioned_object_type_id'
]:
data.pop(attr, None)

data['comment'] = data.pop('update_comment', None)
_id = data.pop('_id')
versioned_object_id = data.pop('versioned_object_id')
versioned_object = Mapping.objects.filter(internal_reference_id=versioned_object_id).first()
if not versioned_object:
self.failed.append({**original_data, 'errors': ['versioned_object not found']})
continue
mnemonic = versioned_object.mnemonic
data['version'] = data.pop('mnemonic')
data['internal_reference_id'] = get(_id, '$oid')
data['created_at'] = get(created_at, '$date')
data['updated_at'] = get(updated_at, '$date')
from_concept_id = get(data.pop('from_concept_id'), '$oid')
to_concept_id = get(data.pop('to_concept_id'), '$oid')
to_source_id = get(data.pop('to_source_id'), '$oid')
from_concept = Concept.objects.filter(internal_reference_id=from_concept_id).first()
to_concept = None
to_source = None
if to_concept_id:
to_concept = Concept.objects.filter(internal_reference_id=to_concept_id).first()
if to_source_id:
to_source = Source.objects.filter(internal_reference_id=to_source_id).first()

if created_by in self.users:
data['created_by'] = self.users[created_by]
elif created_by:
qs = UserProfile.objects.filter(username=created_by)
if qs.exists():
user = qs.first()
self.users[created_by] = user
data['created_by'] = user

if updated_by in self.users:
data['updated_by'] = self.users[updated_by]
elif updated_by:
qs = UserProfile.objects.filter(username=updated_by)
if qs.exists():
user = qs.first()
self.users[created_by] = user
data['updated_by'] = user

self.log("Processing: {} ({}/{})".format(mnemonic, self.processed, self.total))
if Mapping.objects.filter(uri=data['uri']).exists():
self.existed.append(original_data)
else:
try:
source = versioned_object.parent
data.pop('parent_id', None)
mapping = Mapping(
**data, mnemonic=mnemonic, parent=source, versioned_object_id=versioned_object.id,
)
mapping.to_concept_id = get(to_concept, 'id') or versioned_object.to_concept_id
mapping.to_concept_code = data.get('to_concept_code') or versioned_object.to_concept_code
mapping.to_concept_name = data.get('to_concept_name') or versioned_object.to_concept_name
mapping.to_source_id = get(to_source, 'id') or get(
to_concept, 'parent_id') or versioned_object.to_source_id
mapping.from_concept_id = get(from_concept, 'id') or versioned_object.from_concept_id
mapping.from_concept_code = get(from_concept, 'mnemonic') or versioned_object.from_concept_code
mapping.from_source_id = get(from_concept, 'parent_id') or versioned_object.from_source_id
mapping.save()

source_versions = [source]
if source_version_ids:
source_versions += list(Source.objects.filter(internal_reference_id__in=source_version_ids))
mapping.sources.set(source_versions)
mapping.save()

# other_versions = versioned_object.versions.exclude(id=mapping.id)
# if other_versions.exists():
# other_versions.update(is_latest_version=False)

self.created.append(original_data)
except Exception as ex:
self.log("Failed: {}".format(data['uri']))
self.log(ex.args)
self.failed.append({**original_data, 'errors': ex.args})

self.elapsed_seconds = time.time() - self.start_time

self.log(
"Result (in {} secs) : Total: {} | Created: {} | Existed: {} | Failed: {}".format(
self.elapsed_seconds, self.total, len(self.created), len(self.existed), len(self.failed)
)
)
if self.existed:
self.log("Existed")
pprint(self.existed)

if self.failed:
self.log("Failed")
pprint(self.failed)
76 changes: 76 additions & 0 deletions core/importers/management/commands/import_v1_source_ids.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import json
import time
from pprint import pprint

from django.core.management import BaseCommand
from pydash import get

from core.sources.models import Source


class Command(BaseCommand):
help = 'import v1 source/version ids'

total = 0
processed = 0
created = []
existed = []
failed = []
not_found = []
start_time = None
elapsed_seconds = 0

@staticmethod
def log(msg):
print("*******{}*******".format(msg))

def handle(self, *args, **options):
self.start_time = time.time()
FILE_PATH = '/code/core/importers/v1_dump/data/exported_source_ids.json'
lines = open(FILE_PATH, 'r').readlines()
FILE_PATH = '/code/core/importers/v1_dump/data/exported_sourceversion_ids.json'
lines += open(FILE_PATH, 'r').readlines()

self.log('STARTING SOURCE/VERSION IDS IMPORT')
self.total = len(lines)
self.log('TOTAL: {}'.format(self.total))

for line in lines:
data = json.loads(line)
original_data = data.copy()
try:
_id = get(data.pop('_id'), '$oid')
uri = data.pop('uri')
self.processed += 1
updated = Source.objects.filter(uri=uri).update(internal_reference_id=_id)
if updated:
self.created.append(original_data)
self.log("Updated: {} ({}/{})".format(uri, self.processed, self.total))
else:
self.not_found.append(original_data)
self.log("Not Found: {} ({}/{})".format(uri, self.processed, self.total))

except Exception as ex:
self.log("Failed: ")
self.log(ex.args)
self.failed.append({**original_data, 'errors': ex.args})

self.elapsed_seconds = time.time() - self.start_time

self.log(
"Result (in {} secs) : Total: {} | Created: {} | NotFound: {} | Failed: {}".format(
self.elapsed_seconds, self.total, len(self.created), len(self.not_found), len(self.failed)
)
)

if self.existed:
self.log("Existed")
pprint(self.existed)

if self.failed:
self.log("Failed")
pprint(self.failed)

if self.not_found:
self.log("Not Found")
pprint(self.not_found)
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
db.export.collections.drop();
db.export.collectionversions.drop();
db.export.collection_ids.drop()
db.export.collectionversion_ids.drop()


org_ids = db.orgs_organization.find({mnemonic: {$in: ["EthiopiaNHDD", "MSF-OCB", "MOH-DM", "IAD", "integrated-impact", "SSAS", "DSME-Test", "GFPVAN", "im", "Kuunika", "DSME", "DSME-CDD", "MOH", "mTOMADY", "IRDO", "ibwighane", "mw-terminology-service", "mw-product-master", "ICI", "mw-terminology-service-development", "mw-product-master-ocl-instance", "mw-product-master-ocl", "malawi-diseases-diagnosis", "TestOrg", "DWB", "CMDF", "MUDHC", "MSF", "MU", "MUDH", "nproto", "MSFTW", "TWABC", "kuunika-registries", "UNIMED", "SHC", "MSFOCP", "SELF", "OpenSandbox", "sandbox", "ATH", "Reverton"]}}, {_id: 1}).map(doc => doc._id.str);
collection_ids = db.collection_collection.find({parent_id: {$in: org_ids}}, {_id: 1}).map(doc => doc._id.str);

db.export.collections.insertMany(db.collection_collection.find({parent_id: {$in: org_ids}}).map(doc => doc));
db.export.collectionversions.insertMany(db.collection_collectionversion.find({versioned_object_id: {$in: collection_ids}, mnemonic: {$ne: 'HEAD'}}).map(doc => doc));
db.export.collection_ids.insertMany(db.collection_collection.find({}).map(doc => ({_id: doc._id, uri: doc.uri})))
db.export.collectionversion_ids.insertMany(db.collection_collectionversion.find({}).map(doc => ({_id: doc._id, uri: doc.uri})))

print(db.export.collections.count() + " matching collection found");
print(db.export.collectionversions.count() + " matching collectionversion found");
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
#!/usr/bin/env bash
mongo "localhost:27017/ocl" ./export_sources_and_versions.js
mongoexport --db ocl --collection export.source_ids -o exported_source_ids.json
mongoexport --db ocl --collection export.sources -o exported_sources.json
mongoexport --db ocl --collection export.sourceversion_ids -o exported_sourceversion_ids.json
mongoexport --db ocl --collection export.sourceversions -o exported_sourceversions.json
4 changes: 4 additions & 0 deletions core/importers/v1_dump/scripts/export_sources_and_versions.js
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
db.export.sources.drop();
db.export.source_ids.drop();
db.export.sourceversions.drop();
db.export.sourceversion_ids.drop();


org_ids = db.orgs_organization.find({mnemonic: {$in: ["EthiopiaNHDD", "MSF-OCB", "MOH-DM", "IAD", "integrated-impact", "SSAS", "DSME-Test", "GFPVAN", "im", "Kuunika", "DSME", "DSME-CDD", "MOH", "mTOMADY", "IRDO", "ibwighane", "mw-terminology-service", "mw-product-master", "ICI", "mw-terminology-service-development", "mw-product-master-ocl-instance", "mw-product-master-ocl", "malawi-diseases-diagnosis", "TestOrg", "DWB", "CMDF", "MUDHC", "MSF", "MU", "MUDH", "nproto", "MSFTW", "TWABC", "kuunika-registries", "UNIMED", "SHC", "MSFOCP", "SELF", "OpenSandbox", "sandbox", "ATH", "Reverton"]}}, {_id: 1}).map(doc => doc._id.str);
source_ids = db.sources_source.find({parent_id: {$in: org_ids}}, {_id: 1}).map(doc => doc._id.str);

db.export.sources.insertMany(db.sources_source.find({parent_id: {$in: org_ids}}).map(doc => doc));
db.export.sourceversions.insertMany(db.sources_sourceversion.find({versioned_object_id: {$in: source_ids}, mnemonic: {$ne: 'HEAD'}}).map(doc => doc));
db.export.source_ids.insertMany(db.sources_source.find({}).map(doc => ({_id: doc._id, uri: doc.uri})))
db.export.sourceversion_ids.insertMany(db.sources_sourceversion.find({mnemonic: {$ne: 'HEAD'}}).map(doc => ({_id: doc._id, uri: doc.uri})))

print(db.export.sources.count() + " matching source found");
print(db.export.sourceversions.count() + " matching sourceversion found");

0 comments on commit ccfcfc2

Please sign in to comment.