In [7]:
help(merge_instances)

Help on function merge_instances in module utils.merge_model_objects:

merge_instances(primary_object, *alias_objects, disable_signals=True)
    Merge several model instances into one, the `primary_object`.
    Use this function to merge model objects and migrate all of the related
    fields from the alias objects the primary object.
    Usage:
        from django.contrib.auth.models import User
        primary_user = User.objects.get(email='good@example.com')
        duplicate_user = User.objects.get(email='good+duplicate@example.com')
        merge(primary_user, duplicate_user)
    Based on: https://djangosnippets.org/snippets/382/
    Based on https://djangosnippets.org/snippets/2283/



# Run merge operations

In [15]:
from django.db import models
from utils.merge_model_objects import merge_instances
import logging
from fuzzywuzzy import fuzz

logger = logging.getLogger('jup')
logger.setLevel(logging.INFO)

def sort_images(images):
    def key_func(im):
        return -im.usage, -len(im.exif_data), im.created
    return sorted(
        images.annotate(usage=models.Count('storyimage')),
        key=key_func,
    )

def merge_duplicates(qs, attrs=('id',), sort_func=None):
    for item in qs:
        kwargs = {attr: getattr(item, attr) for attr in attrs} 
        clones = qs.filter(**kwargs)
        if len(clones) > 1:
            logger.info(f'{item} merge with {len(clones) -1} clones')
            if sort_func:
                clones = sort_func(clones)
            merge_instances(*clones)
    logger.info(f'{qs.model.__qualname__} count: {qs.count()} -> {qs.all().count()}')
            
def merge_bylines():
    attrs = ['story', 'contributor', 'credit']
    qs = Byline.objects.all()
    merge_duplicates(qs, attrs)
    
def merge_images_by_md5():
    attrs = ['_md5']
    qs = ImageFile.objects.all()
    merge_duplicates(qs, attrs, sort_func=sort_images)
    
def merge_images_by_field(field='imagehash'):
    qs = ImageFile.objects.all()
    for item in qs: 
        clones = item.similar(field) | qs.filter(pk=item.pk) 
        if len(clones) > 1:
            logger.info(f'{item} merge with {len(clones) -1} clones')
            merge_instances(*sort_images(clones))
            
    logger.info(f'{qs.model.__qualname__} count: {qs.count()} -> {qs.all().count()}')
    
def _clone(*items):
    for item in items: 
        item.pk = None
        item.save()

def test_merge():
    _clone(*Byline.objects.order_by('?')[:3])
    merge_bylines()
    
    _clone(*ImageFile.objects.order_by('?')[:3])
    merge_images_by_field('md5')
    
    _clone(*ImageFile.objects.order_by('?')[:3])
    merge_images_by_field('imagehash')



In [18]:
def merge_contributors(cutoff=85):
    qs = Contributor.objects.all()
    for item in qs: 
        clones = qs.filter(email=item.email) 
        clones |= qs.filter(display_name__trigram_similar=item.display_name)
        if len(clones) > 1:
            clones = [c for c in clones if fuzz.ratio(c.display_name, item.display_name) > cutoff]
        if len(clones) > 1:
            logger.info(f'{item} merge with {len(clones) -1} clones')
            for clone in clones:
                name = clone.display_name
                print(
                    clone.email,
                    clone.display_name, 
                    fuzz.ratio(item.display_name, name)
                )
            #merge_instances(*sort_images(clones))
            
    logger.info(f'{qs.model.__qualname__} count: {qs.count()} -> {qs.all().count()}')

merge_contributors()

19:55:53 2017-10-01 [ INFO]          jup <ipython-input-18-3af575ee4e46>:9    (merge_contributors)
	Morten Oftedal Schwenke merge with 1 clones

19:55:53 2017-10-01 [ INFO]          jup <ipython-input-18-3af575ee4e46>:9    (merge_contributors)
	Morten Oftedal Schwencke merge with 1 clones

19:55:53 2017-10-01 [ INFO]          jup <ipython-input-18-3af575ee4e46>:19   (merge_contributors)
	Contributor count: 68 -> 68



morten.oftedal.schwenke@example.com Morten Oftedal Schwenke 100
morten.schwencke@gmail.com Morten Oftedal Schwencke 98
morten.oftedal.schwenke@example.com Morten Oftedal Schwenke 98
morten.schwencke@gmail.com Morten Oftedal Schwencke 100


In [9]:
test_merge()

19:39:51 2017-10-01 [ INFO]          jup <ipython-input-8-b383b23c35b0>:21   (merge_duplicates)
	@bl: Tekst: Olav Riise, På vegne av Gjestelista merge with 1 clones

19:39:51 2017-10-01 [ INFO]          jup <ipython-input-8-b383b23c35b0>:21   (merge_duplicates)
	@bl: Av: Marcus Pettersen Irgens, finanspolitisk talsperson i Liberal Liste merge with 1 clones

19:39:51 2017-10-01 [ INFO]          jup <ipython-input-8-b383b23c35b0>:21   (merge_duplicates)
	@bl: Tekst: Julie Brundtland merge with 1 clones

19:39:52 2017-10-01 [ INFO]          jup <ipython-input-8-b383b23c35b0>:25   (merge_duplicates)
	Byline count: 87 -> 84

19:39:52 2017-10-01 [ INFO]          jup <ipython-input-8-b383b23c35b0>:42   (merge_images_by_field)
	oya2017-120817-alfSimensen-5-hROumwI.jpg merge with 1 clones

19:39:52 2017-10-01 [ INFO]          jup <ipython-input-8-b383b23c35b0>:42   (merge_images_by_field)
	16-KUL-sermoni-HKH-02-jts2woT.jpg merge with 1 clones

19:39:52 2017-10-01 [ INFO]          jup <ipython-i