In [2]:
# hide warnings
import warnings
warnings.filterwarnings('ignore')

# setup django
import os
import sys
sys.path.append('../../')
os.environ["DJANGO_SETTINGS_MODULE"] = "settings"
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "settings")
import django
django.setup()

from django.conf import settings
from django.db.models import *

In [40]:
from apps.document.models import DocumentField
from apps.document.models import DocumentFieldValue
from apps.users.models import User

class FieldMigration:
    def __init__(self,
                 old_field_code: str,
                 new_field_code: str, 
                 overwrite_values: bool = False):
        self.old_field_code = old_field_code
        self.new_field_code = new_field_code
        self.overwrite_values = overwrite_values
        self.new_field_by_doc = {}
        self.count_already_set = 0
        self.count_updated = 0
        
    def migrate(self):
        self.old_field = self.find_field_by_code(self.old_field_code)
        self.new_field = self.find_field_by_code(self.new_field_code)
        self.doc_type_code = self.new_field.document_type.code
        
        # get new field values
        new_values = DocumentFieldValue.objects.filter(field__pk=self.new_field.pk)
        for new_val in new_values:
            self.new_field_by_doc[new_val.document.pk] = new_val
        
        # iterate through field values
        old_values = DocumentFieldValue.objects.filter(field__pk=self.old_field.pk)
        print(f'Processing {old_values.count()} fields')
        for old_val in old_values:
            self.update_new_field_value(old_val)  
        print(f'Updated {self.count_updated} values, {len(self.new_field_by_doc)} were already set')
        if self.count_updated:
            print('Reindexing started')
            self.reindex()
            print('Reindexing completed.')
        
    def reindex(self):
        from apps.rawdb.tasks import manual_reindex
        from apps.task.tasks import call_task_func
        
        user = User.objects.all()[0]
        
        call_task_func(manual_reindex,
                      (self.doc_type_code, False),
                      user.pk)
        
    def update_new_field_value(self, old_val: DocumentFieldValue):
        new_val = self.new_field_by_doc.get(old_val.document.pk)
        if new_val:
            self.count_already_set += 1
            if not self.overwrite_values:
                return
        else:
            new_val = DocumentFieldValue()
            
            new_val.value = self.cast_value(old_val.value)
            new_val.document_id = old_val.document.id
            new_val.field_id = self.new_field.pk
            new_val.location_start = old_val.location_start
            new_val.location_end = old_val.location_end
            new_val.modified_by_id = old_val.modified_by_id
            new_val.created_by_id = old_val.created_by_id
            new_val.created_date = old_val.created_date
            new_val.location_text = old_val.location_text
            new_val.modified_date = old_val.modified_date
            new_val.extraction_hint = old_val.extraction_hint
            new_val.text_unit_id = old_val.text_unit_id
            new_val.removed_by_user = old_val.removed_by_user
            new_val.save()
            self.count_updated += 1
            
    def cast_value(self, old_val):
        if self.old_field.type == self.new_field.type:
            return old_val
        if self.old_field.type == 'int' and self.new_field.type == 'duration':
            return old_val
        raise Exception(f'Conversion from {self.old_field.type} to {self.new_field.type} is not implemented')
            
    def find_field_by_code(self, code: str) -> DocumentField:
        code_parts = code.split('.')
        doc_type = ''
        if len(code_parts) == 2:
            doc_type = code_parts[0]
            code = code_parts[1]
        fields = list(DocumentField.objects.filter(code=code))
        if not fields:
            raise Exception(f'Document field with code "{code}" was not found')
        if doc_type:
            fields = [f for f in fields if f.document_type.code == doc_type]
        if len(fields) > 1:
            msg = ', '.join([f'{f.document_type.code}.{f.code}' for f in fields])
            raise Exception(f'Found multiple fields with code "{code}":\n' + msg)
        return fields[0]

In [41]:
# I can omit "devil_doc" here because "devil_int" is a unique field code
# for all the document types
old_field = 'devil_doc.devil_int'
new_field = 'devil_duration'

# I don't want overwrite old values for "devil_duration" (values that were already set)
migration = FieldMigration(old_field, new_field, overwrite_values=False)

# Old field was "int", new field has the type "duration"
# an "int" can be easily converted to a "duration"

# If there should be some conversion logic, e.g., "String" -> "duration",
# I should have been override "def cast_value(cls, old_val)" method in derived class
# or right here, in "FieldMigration.cast_value" method body

migration.migrate()

Processing 2 fields
Updated 0 values, 3 were already set
Reindexing started


  from numpy.core.umath_tests import inner1d


Reindexing completed.
