In [None]:
from query.models import FaceIdentity, FaceCharacterActor, Character, Actor, CharacterActor, Video, Labeler
from tqdm import tqdm

In [None]:
# Get all the movies that we have FaceIdentity mappings for
video_ids = sorted(list(set([
    fi.video_id
    for fi in FaceIdentity.objects.annotate(video_id=F('face__frame__video_id')).all()
])))

print("The following videos have FaceIdentity mappings:")
videos = Video.objects.filter(id__in=video_ids).order_by('id').all()
for video in videos:
    print(video.name, video.year)

In [None]:
# Get the list of actors and characters for each movie based on the identity mappings
video_actor_characters = {}
for video in tqdm(videos):
    identities = FaceIdentity.objects.filter(face__frame__video=video).annotate(
        name=F('identity__name'),
        labeler_name=F('labeler__name')
    ).all()
    video_actor_characters[video.id] = { 'characters': set(), 'actors': set() }
    for identity in identities:
        if 'role' in identity.labeler_name:
            video_actor_characters[video.id]['characters'].add(identity.name)
        else:
            video_actor_characters[video.id]['actors'].add(identity.name)

In [None]:
print(video_actor_characters)

In [None]:
# Now get all the actors and characters that don't exist in the CharacterActor table
difficult_identities = {
    video_id:
    {
        'characters': video_actor_characters[video_id]['characters'].difference(
            set([
                ca.name
                for ca in CharacterActor.objects.filter(video_id=video_id).annotate(name=F('character__name')).all()
            ])
        ),
        'actors': video_actor_characters[video_id]['actors'].difference(
            set([
                ca.name
                for ca in CharacterActor.objects.filter(video_id=video_id).annotate(name=F('actor__name')).all()
            ])
        )
    }
    for video_id in tqdm(video_actor_characters)
}

In [None]:
difficult_identities

In [None]:
face_ids_to_actors_and_characters = {}
for video in tqdm(videos):
    identities = FaceIdentity.objects.filter(face__frame__video=video).annotate(
        name=F('identity__name'),
        labeler_name=F('labeler__name')
    ).all()
    for identity in identities:
        face_id = identity.face_id
        if face_id not in face_ids_to_actors_and_characters:
            face_ids_to_actors_and_characters[face_id] = {'video_id': video.id}
        if 'role' in identity.labeler_name:
            face_ids_to_actors_and_characters[face_id]['character'] = identity.name
            face_ids_to_actors_and_characters[face_id]['character_labeler'] = identity.labeler_id
        else:
            face_ids_to_actors_and_characters[face_id]['actor'] = identity.name
            face_ids_to_actors_and_characters[face_id]['actor_labeler'] = identity.labeler_id

In [None]:
num_without_actor_annotations = 0
num_without_character_annotations = 0
for face_id in face_ids_to_actors_and_characters:
    if 'actor' not in face_ids_to_actors_and_characters[face_id]:
        num_without_actor_annotations += 1
    if 'character' not in face_ids_to_actors_and_characters[face_id]:
        num_without_character_annotations += 1

In [None]:
print(num_without_actor_annotations, num_without_character_annotations)

In [None]:
relevantcharacteractors = CharacterActor.objects.filter(video_id__in=video_ids).annotate(
    actor_name=F('actor__name'),
    character_name=F('character__name')
).order_by('video_id').all()

In [None]:
for face_id in tqdm(face_ids_to_actors_and_characters):
    mapping = face_ids_to_actors_and_characters[face_id]
    mapping['candidates'] = []
    # Find a good match in the relevant CharacterActor rows
    for ca in relevantcharacteractors:
        if ca.video_id != mapping['video_id']:
            continue
        if ca.actor_name == mapping['actor'] or ca.character_name == mapping['character']:
            mapping['candidates'].append(ca.id)

In [None]:
num_with_0 = 0
num_with_1 = 0
num_with_2 = 0
num_gt_2 = 0
for face_id in tqdm(face_ids_to_actors_and_characters):
    num_candidates = len(face_ids_to_actors_and_characters[face_id]['candidates'])
    if num_candidates == 0:
        num_with_0 += 1
    elif num_candidates == 1:
        num_with_1 += 1
    elif num_candidates == 2:
        num_with_2 += 1
    else:
        num_gt_2 += 1

In [None]:
print(num_with_0, num_with_1, num_with_2, num_gt_2)

In [None]:
for face_id in face_ids_to_actors_and_characters:
    num_candidates = len(face_ids_to_actors_and_characters[face_id]['candidates'])
    if num_candidates == 0:
        print(face_ids_to_actors_and_characters[face_id])

In [None]:
labeler_ids = set()
for face_id in face_ids_to_actors_and_characters:
    labeler_ids.add(face_ids_to_actors_and_characters[face_id]['character_labeler'])

In [None]:
new_labeler_ids = {}
for labeler_id in sorted(list(labeler_ids)):
    name = Labeler.objects.get(pk=labeler_id).name
    new_name = '-'.join(name.split('-')[0:1] + name.split('-')[2:])
    new_labeler, _ = Labeler.objects.get_or_create(name=new_name)
    new_labeler_ids[labeler_id] = new_labeler.id

In [None]:
new_labeler_ids

In [None]:
new_face_characteractor_mappings = []
for face_id in tqdm(face_ids_to_actors_and_characters):
    num_candidates = len(face_ids_to_actors_and_characters[face_id]['candidates'])
    if num_candidates == 1:
        new_face_characteractor_mappings.append(
            FaceCharacterActor(
                face_id=face_id,
                characteractor_id = face_ids_to_actors_and_characters[face_id]['candidates'][0],
                labeler_id = new_labeler_ids[face_ids_to_actors_and_characters[face_id]['character_labeler']]
            )
        )

In [None]:
FaceCharacterActor.objects.bulk_create(new_face_characteractor_mappings)