# Conversations query

In [None]:
from rekall.interval_list import IntervalList, Interval
from rekall.temporal_predicates import overlaps

## Using Identity Labels

In [None]:
def conversationsq(video_name):
    from query.models import FaceCharacterActor, Shot
    from rekall.video_interval_collection import VideoIntervalCollection
    from rekall.parsers import in_array, bbox_payload_parser, merge_dict_parsers, dict_payload_parser
    from rekall.merge_ops import payload_plus
    from rekall.payload_predicates import payload_satisfies
    from rekall.spatial_predicates import scene_graph
    from esper.rekall import intrvllists_to_result_bbox
    from query.models import Face
    from rekall.video_interval_collection import VideoIntervalCollection
    from rekall.parsers import in_array, bbox_payload_parser
    from rekall.merge_ops import payload_plus, merge_named_payload, payload_second
    from esper.rekall import intrvllists_to_result_bbox
    from rekall.payload_predicates import payload_satisfies
    from rekall.list_predicates import length_at_most
    from rekall.logical_predicates import and_pred, or_pred, true_pred
    from rekall.spatial_predicates import scene_graph, make_region
    from rekall.temporal_predicates import before, after, overlaps
    from rekall.bbox_predicates import height_at_least
    from esper.rekall import intrvllists_to_result, intrvllists_to_result_with_objects, add_intrvllists_to_result
    from esper.prelude import esper_widget
    from rekall.interval_list import Interval, IntervalList
    
    # faces are sampled every 12 frames
    ONE_FRAME = 1
   

    faces_with_character_actor_qs = FaceCharacterActor.objects.annotate(
        min_frame=F('face__frame__number'),
        max_frame=F('face__frame__number'),
        video_id=F('face__frame__video_id'),
        bbox_x1=F('face__bbox_x1'),
        bbox_y1=F('face__bbox_y1'),
        bbox_x2=F('face__bbox_x2'),
        bbox_y2=F('face__bbox_y2'),
        character_name=F('characteractor__character__name')
    ).filter(face__frame__video__name__contains=video_name)
    
    faces_with_identity = VideoIntervalCollection.from_django_qs(
        faces_with_character_actor_qs,
        with_payload=in_array(merge_dict_parsers([
            bbox_payload_parser(VideoIntervalCollection.django_accessor),
            dict_payload_parser(VideoIntervalCollection.django_accessor, { 'character': 'character_name' }),
        ]))
    ).coalesce(payload_merge_op=payload_plus)

    shots_qs = Shot.objects.filter(cinematic=True)
    shots = VideoIntervalCollection.from_django_qs(shots_qs)
    
    def payload_unique_characters(payload1, payload2):
        if 'characters' not in payload1[0]:
            unique_characters = set([p['character'] for p in payload1])
            for p in payload2:
                unique_characters.add(p['character'])
            payload1[0]['characters'] = list(unique_characters)
        else:
            unique_characters = set([p['character'] for p in payload2])
            unique_characters.update(payload1[0]['characters'])
            payload1[0]['characters'] = list(unique_characters)
        return payload1
        
    shots_with_faces = shots.merge(faces_with_identity, 
                                  predicate=overlaps(), 
                                  payload_merge_op=payload_second)
                                   
    shots_with_faces = shots_with_faces.coalesce(payload_merge_op=payload_unique_characters)

    def cross_product_faces(intrvl1, intrvl2):
        payload1 = intrvl1.get_payload()
        payload2 = intrvl2.get_payload()
        chrtrs1 = payload1[0]['characters'] if 'characters' in payload1[0] else list(set([p['character'] for p in payload1]))
        chrtrs2 = payload2[0]['characters'] if 'characters' in payload2[0] else list(set([p['character'] for p in payload2]))
        new_intervals = []
        for i in chrtrs1:
            for j in chrtrs2:
                if i!=j:
                    new_payload = {'A': i, 'B': j}
                    start = min(intrvl1.start, intrvl2.start)
                    end = max(intrvl1.end, intrvl2.end)
                    new_intervals.append(Interval(start, end, {'A': i, 'B': j}))

        return new_intervals
        
    def faces_equal(payload1, payload2):
        p1 = [payload1]
        if type(payload1) is dict and 'chrs' in payload1:
            p1 = payload1['chrs']
        elif type(payload1) is list:
            p1 = payload1
        
        p2 = [payload2]
        if type(payload2) is dict and 'chrs' in payload2:
            p2 = payload2['chrs']
        elif type(payload2) is list:
            p2 = payload2
            
        payload1 = p1
        payload2 = p2
        
        if type(payload1) is not list and type(payload1) is not list:
            return (payload1['A'] == payload2['A'] and payload1['B'] == payload2['B']) or (payload1['A'] == payload2['B'] and payload1['B'] == payload2['A'])
        elif type(payload1) is list and type(payload2) is list:
            for i in payload1:
                for j in payload2:
                    if i['A'] == j['A'] and i['B'] == j['B']:
                        return True
                    if i['A'] == j['B'] and i['B'] == j['A']:
                        return True
        elif type(payload1) is list:
            for i in payload1:
                if i['A'] == payload2['A'] and i['B'] == payload2['B']:
                    return True
                if i['A'] == payload2['B'] and i['B'] == payload2['A']:
                    return True
        else:
            for i in payload2:
                if i['A'] == payload1['A'] and i['B'] == payload1['B']:
                    return True
                if i['A'] == payload1['B'] and i['B'] == payload1['A']:
                    return True
        return False

    def times_equal(intrvl1, intrvl2):
        return (intrvl1.start >= intrvl2.start and intrvl1.end <= intrvl2.end) or (intrvl2.start >= intrvl1.start and intrvl2.end <= intrvl1.end)
        
    def times_overlap(intrvl1, intrvl2):
        return intrvl1.start <= intrvl2.end and intrvl2.start <= intrvl1.end
    
    def merge_to_list(payload1, payload2):
        p1 = payload1 if type(payload1) is list else [payload1]
        p2 = payload2 if type(payload2) is list else [payload2]
        return p1+p2
    
    def count_shots(payload1, payload2):
        p1 = [payload1]
        if type(payload1) is dict and 'chrs' in payload1:
            p1 = payload1['chrs']
        elif type(payload1) is list:
            p1 = payload1
        
        p2 = [payload2]
        if type(payload2) is dict and 'chrs' in payload2:
            p2 = payload2['chrs']
        elif type(payload2) is list:
            p2 = payload2
        
        p1_shots = payload1['shots'] if type(payload1) is dict and 'shots' in payload1 else 1
        p2_shots = payload2['shots'] if type(payload2) is dict and 'shots' in payload2 else 1
        return {'shots': p1_shots + p2_shots, 'chrs': p1 + p2}
        
    def shots_equal(payload1, payload2):
        p1 = [payload1]
        if type(payload1) is dict and 'chrs' in payload1:
            p1 = payload1['chrs']
        elif type(payload1) is list:
            p1 = payload1
        
        p2 = [payload2]
        if type(payload2) is dict and 'chrs' in payload2:
            p2 = payload2['chrs']
        elif type(payload2) is list:
            p2 = payload2
        
        p1_shots = payload1['shots'] if type(payload1) is dict and 'shots' in payload1 else 1
        p2_shots = payload2['shots'] if type(payload2) is dict and 'shots' in payload2 else 1

        shots = p1_shots if p1_shots > p2_shots else p2_shots
        return {'shots': shots, 'chrs': p1 + p2}

    two_shots = shots_with_faces.join(shots_with_faces, predicate=after(max_dist=ONE_FRAME, min_dist=ONE_FRAME), 
                                merge_op=cross_product_faces)

    convs = two_shots.coalesce(predicate=times_equal, payload_merge_op=merge_to_list)
    convs = convs.coalesce(predicate=payload_satisfies(faces_equal, arity=2), payload_merge_op=count_shots)
        
    adjacent_seq = convs.merge(convs, predicate=and_pred(after(max_dist=ONE_FRAME, min_dist=ONE_FRAME), payload_satisfies(faces_equal, arity=2), arity=2), payload_merge_op=count_shots)
    convs = convs.set_union(adjacent_seq)
    # convs = convs.coalesce(predicate=times_equal, payload_merge_op=shots_equal)

    def filter_fn(intvl):
        payload = intvl.get_payload()
        if type(payload) is dict and 'shots' in payload:
            return payload['shots'] >= 2
        return False 
    
    convs = convs.filter(filter_fn)
    convs = convs.coalesce(predicate=times_overlap)

    for video_id in convs.intervals.keys():
        print(video_id)
        intvllist = convs.get_intervallist(video_id)
        for intvl in intvllist.get_intervals():
            print(intvl.payload)
            print(str(intvl.start) + ':' + str(intvl.end))
    
    return convs

### Validation Numbers

```
Godfather Part iii
Precision:  0.7562506843815017
Recall:  0.9028280099350734
Precision Per Item:  0.5555555555555556
Recall Per Item:  1.0

Apollo 13
Precision:  0.9801451458304806
Recall:  0.7144069065322621
Precision Per Item:  1.0
Recall Per Item:  0.9333333333333333

Harry Potter 2
Precision:  0.8393842579146094
Recall:  0.5495863839497955
Precision Per Item:  0.75
Recall Per Item:  0.875

Fight Club
Precision:  0.7107177395618719
Recall:  0.8310226155358899
Precision Per Item:  0.6666666666666666
Recall Per Item:  0.9285714285714286
```

## Using Face Embeddings

Strategy: cluster embeddings by shot (number of clusters is max number of people in the shot), then compare cluster centroids.

In [None]:
def conversationsq_face_embeddings(video_name):
    from query.models import FaceCharacterActor, Shot
    from rekall.video_interval_collection import VideoIntervalCollection
    from rekall.parsers import in_array, bbox_payload_parser, merge_dict_parsers, dict_payload_parser
    from rekall.merge_ops import payload_plus
    from rekall.payload_predicates import payload_satisfies
    from rekall.spatial_predicates import scene_graph
    from esper.rekall import intrvllists_to_result_bbox
    from query.models import Face
    from rekall.video_interval_collection import VideoIntervalCollection
    from rekall.parsers import in_array, bbox_payload_parser
    from rekall.merge_ops import payload_plus, merge_named_payload, payload_second
    from esper.rekall import intrvllists_to_result_bbox
    from rekall.payload_predicates import payload_satisfies
    from rekall.list_predicates import length_at_most
    from rekall.logical_predicates import and_pred, or_pred, true_pred
    from rekall.spatial_predicates import scene_graph, make_region
    from rekall.temporal_predicates import before, after, overlaps, equal
    from rekall.bbox_predicates import height_at_least
    from esper.rekall import intrvllists_to_result, intrvllists_to_result_with_objects, add_intrvllists_to_result
    from esper.prelude import esper_widget
    from rekall.interval_list import Interval, IntervalList
    import esper.face_embeddings as face_embeddings
    
    EMBEDDING_EQUALITY_THRESHOLD = 1.
    ONE_FRAME = 1
    
    faces_qs = Face.objects.annotate(
        min_frame=F('frame__number'),
        max_frame=F('frame__number'),
        video_id=F('frame__video_id')
    ).filter(frame__video__name__contains=video_name, frame__regularly_sampled=True)
    
    faces_per_frame = VideoIntervalCollection.from_django_qs(
        faces_qs,
        with_payload=in_array(merge_dict_parsers([
            bbox_payload_parser(VideoIntervalCollection.django_accessor),
            dict_payload_parser(VideoIntervalCollection.django_accessor, { 'face_id': 'id' }),
        ]))
    ).coalesce(payload_merge_op=payload_plus)
    
    shots_qs = Shot.objects.filter(cinematic=True)
    shots = VideoIntervalCollection.from_django_qs(shots_qs)
    
    shots_with_faces = shots.merge(
        faces_per_frame, 
        predicate=overlaps(), 
        payload_merge_op=lambda shot_id, faces_in_frame: [faces_in_frame]
    ).coalesce(payload_merge_op=payload_plus)
   
    def cluster_center(face_ids):
        mean_embedding = face_embeddings.mean(face_ids)
        dists = face_embeddings.dist(face_ids, [mean_embedding])
        return min(zip(dists, face_ids))[1]

    def cluster_and_compute_centers(faces_in_frame_list):
        num_people = max(len(faces_in_frame) for faces_in_frame in faces_in_frame_list)
        face_ids = [face['face_id'] for faces_in_frame in faces_in_frame_list for face in faces_in_frame]
        if num_people == 1:
            clusters = [(fid, 0) for fid in face_ids]
        else:
            clusters = face_embeddings.kmeans(face_ids, num_people)
        centers = [
            (
                cluster_center([
                    face_id
                    for face_id, cluster_id in clusters
                    if cluster_id == i
                ]), [
                    face_id
                    for face_id, cluster_id in clusters
                    if cluster_id == i
                ]
            )
            for i in range(num_people)
        ]
        return centers

    print("Clusters computed")
    
    shots_with_centers = shots_with_faces.map(
        lambda intrvl: (intrvl.start, intrvl.end, cluster_and_compute_centers(intrvl.payload))
    )
    
    def same_face(center1, center2):
        return face_embeddings.dist([center1], target_ids=[center2])[0] < EMBEDDING_EQUALITY_THRESHOLD

    def cross_product_faces(intrvl1, intrvl2):
        payload1 = intrvl1.get_payload()
        payload2 = intrvl2.get_payload()
        payload = []
        for cluster1 in payload1:
            for cluster2 in payload2:
                if not same_face(cluster1[0], cluster2[0]):
                    new_payload = {'A': cluster1, 'B': cluster2}
                    payload.append(new_payload)

        return [(min(intrvl1.get_start(), intrvl2.get_start()),
                 max(intrvl1.get_end(), intrvl2.get_end()), {
            'chrs': payload,
            'shots': 1
        })]
    
    two_shots = shots_with_centers.join(
        shots_with_centers,
        predicate=after(max_dist=ONE_FRAME, min_dist=ONE_FRAME), 
        merge_op=cross_product_faces
    )
 
    print("Cross product done")

    def faces_equal(payload1, payload2):
        for face_pair1 in payload1['chrs']:
            for face_pair2 in payload2['chrs']:
                if (same_face(face_pair1['A'][0], face_pair2['A'][0]) and
                    same_face(face_pair1['B'][0], face_pair2['B'][0])):
                    return True
                if (same_face(face_pair1['A'][0], face_pair2['B'][0]) and
                    same_face(face_pair1['B'][0], face_pair2['A'][0])):
                    return True
        return False
    
    convs = two_shots.coalesce(
        predicate=payload_satisfies(faces_equal, arity=2),
        payload_merge_op = lambda payload1, payload2: {
            'chrs': payload1['chrs'] + payload2['chrs'],
            'shots': payload1['shots'] + payload2['shots']
        }
    )
    
    print("Coalesce done")    
        
    adjacent_seq = convs.merge(
        convs,
        predicate=and_pred(
            after(max_dist=ONE_FRAME, min_dist=ONE_FRAME),
            payload_satisfies(faces_equal, arity=2),
            arity=2),
        payload_merge_op = lambda payload1, payload2: {
            'chrs': payload1['chrs'] + payload2['chrs'],
            'shots': payload1['shots'] + payload2['shots']
        },
        working_window=1
    )
    convs = convs.set_union(adjacent_seq)
    # convs = convs.coalesce(predicate=times_equal, payload_merge_op=shots_equal)
    
    print("Two-shot adjacencies done")

    def filter_fn(intvl):
        payload = intvl.get_payload()
        if type(payload) is dict and 'shots' in payload:
            return payload['shots'] >= 2
        return False 
    
    convs = convs.filter(filter_fn)
    convs = convs.coalesce()
    
    print("Final filter done")

#     for video_id in convs.intervals.keys():
#         print(video_id)
#         intvllist = convs.get_intervallist(video_id)
#         for intvl in intvllist.get_intervals():
#             print(intvl.payload)
#             print(str(intvl.start) + ':' + str(intvl.end))
    
    return convs

In [None]:
convs = conversationsq_face_embeddings('apollo 13')

In [None]:
convs.get_intervallist(15).size()

In [None]:
# Returns precision, recall, precision_per_item, recall_per_item
def compute_statistics(query_intrvllists, ground_truth_intrvllists):
    total_query_time = 0
    total_query_segments = 0
    total_ground_truth_time = 0
    total_ground_truth_segments = 0
    
    for video in query_intrvllists:
        total_query_time += query_intrvllists[video].coalesce().get_total_time()
        total_query_segments += query_intrvllists[video].size()
    for video in ground_truth_intrvllists:
        total_ground_truth_time += ground_truth_intrvllists[video].coalesce().get_total_time()
        total_ground_truth_segments += ground_truth_intrvllists[video].size()
        
    total_overlap_time = 0
    overlapping_query_segments = 0
    overlapping_ground_truth_segments = 0
    
    for video in query_intrvllists:
        if video in ground_truth_intrvllists:
            query_list = query_intrvllists[video]
            gt_list = ground_truth_intrvllists[video]
            
            total_overlap_time += query_list.overlaps(gt_list).coalesce().get_total_time()
            overlapping_query_segments += query_list.filter_against(gt_list, predicate=overlaps()).size()
            overlapping_ground_truth_segments += gt_list.filter_against(query_list, predicate=overlaps()).size()
    
    if total_query_time == 0:
        precision = 1.0
        precision_per_item = 1.0
    else:
        precision = total_overlap_time / total_query_time
        precision_per_item = overlapping_query_segments / total_query_segments
    
    if total_ground_truth_time == 0:
        recall = 1.0
        recall_per_item = 1.0
    else:
        recall = total_overlap_time / total_ground_truth_time
        recall_per_item = overlapping_ground_truth_segments / total_ground_truth_segments
    
    return precision, recall, precision_per_item, recall_per_item

def print_statistics(query_intrvllists, ground_truth_intrvllists):
    precision, recall, precision_per_item, recall_per_item = compute_statistics(
        query_intrvllists, ground_truth_intrvllists)

    print("Precision: ", precision)
    print("Recall: ", recall)
    print("Precision Per Item: ", precision_per_item)
    print("Recall Per Item: ", recall_per_item)

In [None]:
apollo_convs = conversationsq_face_embeddings("apollo 13")
apollo_data = [
    (2578, 4100), (4244, 4826), (5098, 5828), (7757, 9546),
    (9602, 10300), (12393, 12943), (13088, 13884), (14146, 15212),
    (15427, 16116), (18040, 19198), (20801, 23368), (24572, 26185),
    (26735, 28753), (29462, 30873), (31768, 34618)]
apollo_gt = {15: IntervalList([Interval(start, end, payload=None) for (start,end) in apollo_data])}
print_statistics({15: apollo_convs.filter(lambda intrvl: intrvl.start < 34618).get_intervallist(15)}, apollo_gt)

In [None]:
godfather_convs = conversationsq_face_embeddings("the godfather part iii")
godfather_data = [(12481, 13454), (13673, 14729), (16888, 17299), (21101, 27196),
    (27602, 29032), (29033, 33204), (34071, 41293), (41512, 43103)]
godfather_gt = {216: IntervalList([Interval(start, end, payload=None) for (start,end) in godfather_data])}
print_statistics({216: godfather_convs.filter(lambda intrvl: intrvl.start < 43103).get_intervallist(216)}, 
                 godfather_gt)

In [None]:
hp2_convs = conversationsq_face_embeddings('harry potter and the chamber of secrets')
hp2_query = hp2_convs.filter(lambda inv: inv.start < 20308)
hp2_query = {'374': hp2_query.get_intervallist(374)}
hp2_data = [(2155, 4338), (4687, 6188), (6440, 10134), (12921, 13151), (16795, 17370),
            (17766, 18021), (18102, 19495), (19622, 20308)]
hp2_gt = {'374': IntervalList([Interval(start, end, payload=None) for (start,end) in hp2_data])}
print_statistics(hp2_query, hp2_gt)

In [None]:
fc_query = conversationsq_face_embeddings('fight club')
fc_query = fc_query.filter(lambda inv: inv.start < 58258)
fc_query = {'61': fc_query.get_intervallist(61)}
fc_data = [(4698, 5602), (6493, 6865), (8670, 9156), (9517, 10908), (11087, 13538), (22039, 24188),
           (25603, 27656), (31844, 32812), (32918, 33451), (33698, 35363), (42072, 45143),
           (45272, 46685), (49162, 50618), (56830, 58258)]
fc_gt = {'61': IntervalList([Interval(start, end, payload=None) for (start,end) in fc_data])}
print_statistics(fc_query, fc_gt)

Results with threshold of 0.9:
```
Precision:  0.9390051766824218
Recall:  0.8327760866310694
Precision Per Item:  0.9411764705882353
Recall Per Item:  1.0
Precision:  0.7085401799565622
Recall:  0.7960695455139658
Precision Per Item:  0.6
Recall Per Item:  0.875
Precision:  0.7528127623845507
Recall:  0.8525244841684891
Precision Per Item:  0.5625
Recall Per Item:  1.0
Precision:  0.6229706390328152
Recall:  0.7093411996066863
Precision Per Item:  0.6451612903225806
Recall Per Item:  0.9285714285714286

Average precision: 75.6
Average recall: 79.8
```

Results with a threshold of 1.0:
```
Precision:  0.9040439021791251
Recall:  0.8467488397624632
Precision Per Item:  0.8888888888888888
Recall Per Item:  1.0
Precision:  0.6720145787179304
Recall:  0.8195128328031722
Precision Per Item:  0.5238095238095238
Recall Per Item:  1.0
Precision:  0.7255309325946445
Recall:  0.8965484453741561
Precision Per Item:  0.5625
Recall Per Item:  1.0
Precision:  0.5912671438282219
Recall:  0.7269911504424779
Precision Per Item:  0.5555555555555556
Recall Per Item:  0.9285714285714286

Average precision: 72.3
Average recall: 82.3
```

## Face Embeddings Algorithm on Identities

In [None]:
def conversationsq_face_embeddings_with_identities(video_name):
    from query.models import FaceCharacterActor, Shot
    from rekall.video_interval_collection import VideoIntervalCollection
    from rekall.parsers import in_array, bbox_payload_parser, merge_dict_parsers, dict_payload_parser
    from rekall.merge_ops import payload_plus
    from rekall.payload_predicates import payload_satisfies
    from rekall.spatial_predicates import scene_graph
    from esper.rekall import intrvllists_to_result_bbox
    from query.models import Face
    from rekall.video_interval_collection import VideoIntervalCollection
    from rekall.parsers import in_array, bbox_payload_parser
    from rekall.merge_ops import payload_plus, merge_named_payload, payload_second
    from esper.rekall import intrvllists_to_result_bbox
    from rekall.payload_predicates import payload_satisfies
    from rekall.list_predicates import length_at_most
    from rekall.logical_predicates import and_pred, or_pred, true_pred
    from rekall.spatial_predicates import scene_graph, make_region
    from rekall.temporal_predicates import before, after, overlaps, equal
    from rekall.bbox_predicates import height_at_least
    from esper.rekall import intrvllists_to_result, intrvllists_to_result_with_objects, add_intrvllists_to_result
    from esper.prelude import esper_widget
    from rekall.interval_list import Interval, IntervalList
    import esper.face_embeddings as face_embeddings
    
    EMBEDDING_EQUALITY_THRESHOLD = 10
    ONE_FRAME = 1
    
    faces_with_character_actor_qs = FaceCharacterActor.objects.annotate(
        min_frame=F('face__frame__number'),
        max_frame=F('face__frame__number'),
        video_id=F('face__frame__video_id'),
        bbox_x1=F('face__bbox_x1'),
        bbox_y1=F('face__bbox_y1'),
        bbox_x2=F('face__bbox_x2'),
        bbox_y2=F('face__bbox_y2'),
        character_name=F('characteractor__character__name')
    ).filter(face__frame__video__name__contains=video_name)
    
    faces_per_frame = VideoIntervalCollection.from_django_qs(
        faces_with_character_actor_qs,
        with_payload=in_array(merge_dict_parsers([
            bbox_payload_parser(VideoIntervalCollection.django_accessor),
            dict_payload_parser(VideoIntervalCollection.django_accessor, { 'character': 'character_name' }),
        ]))
    ).coalesce(payload_merge_op=payload_plus)
    
    shots_qs = Shot.objects.filter(cinematic=True)
    shots = VideoIntervalCollection.from_django_qs(shots_qs)
    
    shots_with_faces = shots.merge(
        faces_per_frame, 
        predicate=overlaps(), 
        payload_merge_op=lambda shot_id, faces_in_frame: [faces_in_frame]
    ).coalesce(payload_merge_op=payload_plus)
   
    def cluster_center(face_ids):
        mean_embedding = face_embeddings.mean(face_ids)
        dists = face_embeddings.dist(face_ids, [mean_embedding])
        return min(zip(dists, face_ids))[1]

    def cluster_and_compute_centers(faces_in_frame_list):
#         num_people = max(len(faces_in_frame) for faces_in_frame in faces_in_frame_list)
#         face_ids = [face['face_id'] for faces_in_frame in faces_in_frame_list for face in faces_in_frame]
#         if num_people == 1:
#             clusters = [(fid, 0) for fid in face_ids]
#         else:
#             clusters = face_embeddings.kmeans(face_ids, num_people)
#         centers = [
#             (
#                 cluster_center([
#                     face_id
#                     for face_id, cluster_id in clusters
#                     if cluster_id == i
#                 ]), [
#                     face_id
#                     for face_id, cluster_id in clusters
#                     if cluster_id == i
#                 ]
#             )
#             for i in range(num_people)
#         ]
#         return centers
        return set([face['character'] for faces_in_frame in faces_in_frame_list for face in faces_in_frame])

    print("Clusters computed")
    
    shots_with_centers = shots_with_faces.map(
        lambda intrvl: (intrvl.start, intrvl.end, cluster_and_compute_centers(intrvl.payload))
    )
    
    def same_face(center1, center2):
        return center1 == center2

    def cross_product_faces(intrvl1, intrvl2):
        payload1 = intrvl1.get_payload()
        payload2 = intrvl2.get_payload()
        payload = []
        for cluster1 in list(payload1):
            for cluster2 in list(payload2):
                if not same_face(cluster1, cluster2):
                    new_payload = {'A': cluster1, 'B': cluster2}
                    payload.append(new_payload)

        return [Interval(min(intrvl1.get_start(), intrvl2.get_start()),
                 max(intrvl1.get_end(), intrvl2.get_end()), {
            'chrs': payload,
            'shots': 1
        })]
    
    two_shots = shots_with_centers.join(
        shots_with_centers,
        predicate=after(max_dist=ONE_FRAME, min_dist=ONE_FRAME), 
        merge_op=cross_product_faces,
        working_window=ONE_FRAME
    )
 
    print("Cross product done")

    def faces_equal(payload1, payload2):
        for face_pair1 in payload1['chrs']:
            for face_pair2 in payload2['chrs']:
                if (same_face(face_pair1['A'][0], face_pair2['A'][0]) and
                    same_face(face_pair1['B'][0], face_pair2['B'][0])):
                    return True
                if (same_face(face_pair1['A'][0], face_pair2['B'][0]) and
                    same_face(face_pair1['B'][0], face_pair2['A'][0])):
                    return True
        return False
    
    convs = two_shots.coalesce(
        predicate=payload_satisfies(faces_equal, arity=2),
        payload_merge_op = lambda payload1, payload2: {
            'chrs': payload1['chrs'] + + payload2['chrs'],
            'shots': payload1['shots'] + payload2['shots']
        }
    )
    
    print("Coalesce done")    
        
    adjacent_seq = convs.merge(
        convs,
        predicate=and_pred(
            after(max_dist=ONE_FRAME, min_dist=ONE_FRAME),
            payload_satisfies(faces_equal, arity=2),
            arity=2),
        payload_merge_op = lambda payload1, payload2: {
            'chrs': payload1['chrs'] + payload2['chrs'],
            'shots': payload1['shots'] + payload2['shots']
        },
        working_window=1
    )
    convs = convs.set_union(adjacent_seq)
    # convs = convs.coalesce(predicate=times_equal, payload_merge_op=shots_equal)
    
    print("Two-shot adjacencies done")

    def filter_fn(intvl):
        payload = intvl.get_payload()
        if type(payload) is dict and 'shots' in payload:
            return payload['shots'] >= 2
        return False 
    
    convs = convs.filter(filter_fn)
    convs = convs.coalesce()
    
    print("Final filter done")

    for video_id in convs.intervals.keys():
        print(video_id)
        intvllist = convs.get_intervallist(video_id)
        for intvl in intvllist.get_intervals():
            print(intvl.payload)
            print(str(intvl.start) + ':' + str(intvl.end))
    
    return convs

In [None]:
convs2 = conversationsq_face_embeddings_with_identities('apollo 13')

In [None]:
apollo_data = [
    (2578, 4100), (4244, 4826), (5098, 5828), (7757, 9546),
    (9602, 10300), (12393, 12943), (13088, 13884), (14146, 15212),
    (15427, 16116), (18040, 19198), (20801, 23368), (24572, 26185),
    (26735, 28753), (29462, 30873), (31768, 34618)]
apollo_gt = {15: IntervalList([Interval(start, end, payload=None) for (start,end) in apollo_data])}
print_statistics({15: convs2.filter(lambda intrvl: intrvl.start < 34618).get_intervallist(15)}, apollo_gt)

Results:
```
Precision:  0.9756586483390607
Recall:  0.510055391985628
Precision Per Item:  1.0
Recall Per Item:  0.9333333333333333
```

## Dan's Scratchpad

In [None]:
from query.models import FaceCharacterActor, Shot
from rekall.video_interval_collection import VideoIntervalCollection
from rekall.parsers import in_array, bbox_payload_parser, merge_dict_parsers, dict_payload_parser
from rekall.merge_ops import payload_plus
from rekall.payload_predicates import payload_satisfies
from rekall.spatial_predicates import scene_graph
from esper.rekall import intrvllists_to_result_bbox
from query.models import Face
from rekall.video_interval_collection import VideoIntervalCollection
from rekall.parsers import in_array, bbox_payload_parser
from rekall.merge_ops import payload_plus, merge_named_payload, payload_second
from esper.rekall import intrvllists_to_result_bbox
from rekall.payload_predicates import payload_satisfies
from rekall.list_predicates import length_at_most
from rekall.logical_predicates import and_pred, or_pred, true_pred
from rekall.spatial_predicates import scene_graph, make_region
from rekall.temporal_predicates import before, after, overlaps
from rekall.bbox_predicates import height_at_least
from esper.rekall import intrvllists_to_result, intrvllists_to_result_with_objects, add_intrvllists_to_result
from esper.prelude import esper_widget
from rekall.interval_list import Interval, IntervalList
import esper.face_embeddings as face_embeddings

# faces are sampled every 12 frames
SAMPLING_RATE = 12
ONE_FRAME = 1

video_name='apollo 13'

faces_qs = Face.objects.annotate(
    min_frame=F('frame__number'),
    max_frame=F('frame__number'),
    video_id=F('frame__video_id')
).filter(
    frame__video__name__contains=video_name,
    frame__regularly_sampled=True,
    probability__gte=0.9
)

faces_per_frame = VideoIntervalCollection.from_django_qs(
    faces_qs,
    with_payload=in_array(merge_dict_parsers([
        bbox_payload_parser(VideoIntervalCollection.django_accessor),
        dict_payload_parser(VideoIntervalCollection.django_accessor, { 'face_id': 'id' }),
    ]))
).coalesce(payload_merge_op=payload_plus)

shots_qs = Shot.objects.filter(cinematic=True)
shots = VideoIntervalCollection.from_django_qs(shots_qs)

shots_with_faces = shots.merge(
    faces_per_frame, 
    predicate=overlaps(), 
    payload_merge_op=lambda shot_id, faces_in_frame: [faces_in_frame]
).coalesce(payload_merge_op=payload_plus)

def cluster_center(face_ids):
    mean_embedding = face_embeddings.mean(face_ids)
    dists = face_embeddings.dist(face_ids, [mean_embedding])
    return min(zip(dists, face_ids))[1]

def cluster_and_compute_centers(faces_in_frame_list):
    num_people = max(len(faces_in_frame) for faces_in_frame in faces_in_frame_list)
    face_ids = [face['face_id'] for faces_in_frame in faces_in_frame_list for face in faces_in_frame]
    if num_people == 1:
        clusters = [(fid, 0) for fid in face_ids]
    else:
        clusters = face_embeddings.kmeans(face_ids, num_people)
    centers = [
        (
            cluster_center([
                face_id
                for face_id, cluster_id in clusters
                if cluster_id == i
            ]), [
                face_id
                for face_id, cluster_id in clusters
                if cluster_id == i
            ]
        )
        for i in range(num_people)
    ]
    return centers

shots_with_centers = shots_with_faces.map(
    lambda intrvl: (intrvl.start, intrvl.end, cluster_and_compute_centers(intrvl.payload))
)

In [None]:
shots_with_centroids.get_intervallist(15).filter(payload_satisfies(lambda p: len(p) > 1))

In [None]:
a_list = [706034, 706036, 706038, 706040, 706042, 706043, 706046, 706048]

In [None]:
b_list = [706033, 706035, 706037, 706039, 706041, 706044, 706045, 706047, 706049, 706050]

In [None]:
a_mean = face_embeddings.mean(a_list)
b_mean = face_embeddings.mean(b_list)

In [None]:
a_mean

In [None]:
b_mean

In [None]:
import numpy as np

In [None]:
np.sqrt(sum((a-b) ** 2 for a, b in zip(a_mean, b_mean))) * 8

In [None]:
a_mean_small1 = face_embeddings.mean(a_list[:4])

In [None]:
a_mean_small2 = face_embeddings.mean(a_list[4:8])

In [None]:
np.sqrt(sum((a-b) ** 2 for a, b in zip(a_mean_small1, a_mean_small2))) * 4

In [None]:
a_mean = face_embeddings.mean()

In [None]:
b_mean = face_embeddings.mean()

In [None]:
np.sqrt(sum((a-b) ** 2 for a, b in zip(a_mean, b_mean)))

In [None]:
def cluster_center(face_ids):
    mean_embedding = face_embeddings.mean(face_ids)
    dists = face_embeddings.dist(face_ids, [mean_embedding])
    return min(zip(dists, face_ids))[1]

In [None]:
cluster_center(a_list)

In [None]:
cluster_center(b_list)

In [None]:
face_embeddings.dist([cluster_center(a_list)], target_ids=[cluster_center(b_list)])

In [None]:
c_list = [706334, 706336, 706338, 706341, 706343, 706345, 706346, 706348, 706350, 706353, 706355, 706357, 706359, 706361, 706362, 706364, 706365, 706366, 706367, 706368, 706369]

In [None]:
face_embeddings.dist([cluster_center(a_list)], target_ids=[cluster_center(c_list)])

In [None]:
face_embeddings.dist([cluster_center(b_list)], target_ids=[cluster_center(c_list)])

# Scratchpad

In [None]:
from query.models import FaceCharacterActor, Shot
from rekall.video_interval_collection import VideoIntervalCollection
from rekall.parsers import in_array, bbox_payload_parser, merge_dict_parsers, dict_payload_parser
from rekall.merge_ops import payload_plus
from rekall.payload_predicates import payload_satisfies
from rekall.spatial_predicates import scene_graph
from esper.rekall import intrvllists_to_result_bbox
from query.models import Face
from rekall.video_interval_collection import VideoIntervalCollection
from rekall.parsers import in_array, bbox_payload_parser
from rekall.merge_ops import payload_plus, merge_named_payload, payload_second
from esper.rekall import intrvllists_to_result_bbox
from rekall.payload_predicates import payload_satisfies
from rekall.list_predicates import length_at_most
from rekall.logical_predicates import and_pred, or_pred, true_pred
from rekall.spatial_predicates import scene_graph, make_region
from rekall.temporal_predicates import before, after, overlaps
from rekall.bbox_predicates import height_at_least
from esper.rekall import intrvllists_to_result, intrvllists_to_result_with_objects, add_intrvllists_to_result
from esper.prelude import esper_widget
from rekall.interval_list import Interval, IntervalList

In [None]:
RIGHT_HALF_MIN_X = 0.45
LEFT_HALF_MAX_X = 0.55
MIN_FACE_HEIGHT = 0.4
MAX_FACES_ON_SCREEN = 2
# faces are sampled every 12 frames
SAMPLING_RATE = 12
ONE_SECOND = 1
FOUR_SECONDS = 96
TEN_SECONDS = 240

In [None]:
# Annotate face rows with start and end frames and the video ID
faces_with_character_actor_qs = FaceCharacterActor.objects.annotate(
    min_frame=F('face__frame__number'),
    max_frame=F('face__frame__number'),
    video_id=F('face__frame__video_id'),
    bbox_x1=F('face__bbox_x1'),
    bbox_y1=F('face__bbox_y1'),
    bbox_x2=F('face__bbox_x2'),
    bbox_y2=F('face__bbox_y2'),
    character_name=F('characteractor__character__name')
)

In [None]:
faces_with_identity = VideoIntervalCollection.from_django_qs(
    faces_with_character_actor_qs,
    with_payload=in_array(merge_dict_parsers([
        bbox_payload_parser(VideoIntervalCollection.django_accessor),
        dict_payload_parser(VideoIntervalCollection.django_accessor, { 'character': 'character_name' }),
    ]))
).coalesce(payload_merge_op=payload_plus)

In [None]:
shots_qs = Shot.objects.filter(
    labeler=Labeler.objects.get(name='shot-hsvhist-face'))
shots = VideoIntervalCollection.from_django_qs(shots_qs)

In [None]:
def payload_unique_characters(payload1, payload2):
    if 'characters' not in payload1[0]:
        unique_characters = set([p['character'] for p in payload1])
        for p in payload2:
            unique_characters.add(p['character'])
        payload1[0]['characters'] = list(unique_characters)
    else:
        unique_characters = set([p['character'] for p in payload2])
        unique_characters.update(payload1[0]['characters'])
        payload1[0]['characters'] = list(unique_characters)
    return payload1
    
    

In [None]:
shots_with_faces = shots.merge(faces_with_identity, 
                               predicate=overlaps(), 
                               payload_merge_op=payload_second)

In [None]:
shots_with_faces = shots_with_faces.coalesce(payload_merge_op=payload_unique_characters)


In [None]:
def cross_product_faces(intrvl1, intrvl2):
    payload1 = intrvl1.get_payload()
    payload2 = intrvl2.get_payload()
    chrtrs1 = payload1[0]['characters'] if 'characters' in payload1[0] else list(set([p['character'] for p in payload1]))
    chrtrs2 = payload2[0]['characters'] if 'characters' in payload2[0] else list(set([p['character'] for p in payload2]))
    new_intervals = []
    for i in payload1:
        for j in chrtrs2:
            if i!=j:
                new_payload = {'A': i, 'B': j}
#                 new_payload.update()
                start = min(intrvl1.start, intrvl2.start)
                end = max(intrvl1.end, intrvl2.end)
#                 print(intrvl1.keys())
#                 print(intrvl1.video_id == intrvl2.video_id )
                new_intervals.append(Interval(start, end, {'A': i, 'B': j}))

    return new_intervals

In [None]:
def faces_equal(payload1, payload2):
    return (payload1['A'] == payload2['A'] and payload1['B'] == payload2['B']) or (payload1['A'] == payload2['B'] and payload1['B'] == payload2['A'])

In [None]:
def faces_equal(payload1, payload2):
    if type(payload1) is not list and type(payload1) is not list:
        return (payload1['A'] == payload2['A'] and payload1['B'] == payload2['B']) or (payload1['A'] == payload2['B'] and payload1['B'] == payload2['A'])
    elif type(payload1) is list and type(payload1) is list:
        for i in payload1:
            for j in payload2:
                if i['A'] == j['A'] and i['B'] == j['B']:
                    return True
    elif type(payload1) is list:
        for i in payload1:
            if i['A'] == payload2['A'] and i['B'] == payload2['B']:
                return True
    else:
        for i in payload2:
            if i['A'] == payload1['A'] and i['B'] == payload1['B']:
                return True
    return False

def times_equal(intrvl1, intrvl2):
    return intrvl.start == intervl2.start and intrvl.end == intervl2.end

def merge_to_list(payload1, payload2):
    p1 = payload1 if type(payload1) is list else [payload1]
    p2 = payload2 if type(payload2) is list else [payload2]
    return p1+p2

In [None]:
two_shots = shots_with_faces.join(shots_with_faces, predicate=after(max_dist=ONE_SECOND, min_dist=ONE_SECOND), 
                                merge_op=cross_product_faces)


In [None]:
num_intervals = 0
for video_id in two_shots.intervals.keys():
    intvllist = two_shots.get_intervallist(video_id)
    s = intvllist.size()
    print(s)
    num_intervals += s
print(num_intervals)

In [None]:
conversations = two_shots.coalesce(predicate=payload_satisfies(faces_equal, arity=2))

In [None]:
num_intervals = 0
for video_id in conversations.intervals.keys():
    intvllist = conversations.get_intervallist(video_id)
    s = intvllist.size()
    print(s)
    num_intervals += s
print(num_intervals)

In [None]:
scene = three_shot.merge(three_shot, predicate=and_pred(after(max_dist=ONE_SECOND, min_dist=ONE_SECOND), 
                                              payload_satisfies(check_B_intersects, arity=2), arity=2)).coalesce()#, payload_merge_op=updateA))

In [None]:
esper_widget(intrvllists_to_result_with_objects(
            conversations.get_allintervals(), lambda payload, video: []),
            crop_bboxes=False,
            disable_playback=False,
            jupyter_keybindings=False)

In [None]:
conversations.get_allintervals().key()

In [None]:
conversations.intrvls.keys()

In [None]:
conversations

In [None]:
# Returns precision, recall, precision_per_item, recall_per_item
def compute_statistics(query_intrvllists, ground_truth_intrvllists):
    total_query_time = 0
    total_query_segments = 0
    total_ground_truth_time = 0
    total_ground_truth_segments = 0
    
    for video in query_intrvllists:
        total_query_time += query_intrvllists[video].coalesce().get_total_time()
        total_query_segments += query_intrvllists[video].size()
    for video in ground_truth_intrvllists:
        total_ground_truth_time += ground_truth_intrvllists[video].coalesce().get_total_time()
        total_ground_truth_segments += ground_truth_intrvllists[video].size()
        
    total_overlap_time = 0
    overlapping_query_segments = 0
    overlapping_ground_truth_segments = 0
    
    for video in query_intrvllists:
        if video in ground_truth_intrvllists:
            query_list = query_intrvllists[video]
            gt_list = ground_truth_intrvllists[video]
            
            total_overlap_time += query_list.overlaps(gt_list).coalesce().get_total_time()
            overlapping_query_segments += query_list.filter_against(gt_list, predicate=overlaps()).size()
            overlapping_ground_truth_segments += gt_list.filter_against(query_list, predicate=overlaps()).size()
    
    if total_query_time == 0:
        precision = 1.0
        precision_per_item = 1.0
    else:
        precision = total_overlap_time / total_query_time
        precision_per_item = overlapping_query_segments / total_query_segments
    
    if total_ground_truth_time == 0:
        recall = 1.0
        recall_per_item = 1.0
    else:
        recall = total_overlap_time / total_ground_truth_time
        recall_per_item = overlapping_ground_truth_segments / total_ground_truth_segments
    
    return precision, recall, precision_per_item, recall_per_item

def print_statistics(query_intrvllists, ground_truth_intrvllists):
    precision, recall, precision_per_item, recall_per_item = compute_statistics(
        query_intrvllists, ground_truth_intrvllists)

    print("Precision: ", precision)
    print("Recall: ", recall)
    print("Precision Per Item: ", precision_per_item)
    print("Recall Per Item: ", recall_per_item)

In [None]:
godfather_query = conversationsq('the godfather part iii')
for k in godfather_query.intervals.keys():
    print(k)
godfather_query = godfather_query.filter(lambda inv: inv.start < 43103)
godfather_query = {'216': godfather_query.get_intervallist(216)}

data = [(12481, 13454), (13673, 14729), (16888, 17299), (21101, 27196), (27602, 29032), (29033, 33204), (34071, 41293), (41512, 43103)]
godfather_gt = {'216': IntervalList([Interval(start, end, payload=None) for (start,end) in data])}
print_statistics(godfather_query, godfather_gt)


In [None]:
apollo_query = conversationsq('apollo 13')
apollo_query = apollo_query.filter(lambda inv: inv.start < 34618)
apollo_query = {'15': apollo_query.get_intervallist(15)}


In [None]:
data = [(2578, 4100), (4244, 4826), (5098, 5828), (7757, 9546), (9602, 10300), (12393, 12943), (13088, 13884), (14146, 15212), (15427, 16116), (18040, 19198), (20801, 23368), (24572, 26185), (26735, 28753), (29462, 30873), (31768, 34618)]
apollo_gt = {'15': IntervalList([Interval(start, end, payload=None) for (start,end) in data])}
print_statistics(apollo_query, apollo_gt)

In [None]:
invllist = caption_metadata_for_video(15)

In [None]:
hp2_query = conversationsq('harry potter and the chamber')
hp2_query = hp2_query.filter(lambda inv: inv.start < 20308)
hp2_query = {'374': hp2_query.get_intervallist(374)}

In [None]:
data = [(2155, 4338), (4687, 6188), (6440, 10134), (12921, 13151), (16795, 17370), (17766, 18021), (18102, 19495), (19622, 20308)]
hp2_gt = {'374': IntervalList([Interval(start, end, payload=None) for (start,end) in data])}
print_statistics(hp2_query, hp2_gt)

In [None]:
fc_query = conversationsq('fight club')
fc_query = fc_query.filter(lambda inv: inv.start < 58258)
fc_query = {'61': fc_query.get_intervallist(61)}

In [None]:
data = [(4698, 5602), (6493, 6865), (8670, 9156), (9517, 10908), (11087, 13538), (22039, 24188), (25603, 27656), (31844, 32812), (32918, 33451), (33698, 35363), (42072, 45143), (45272, 46685), (49162, 50618), (56830, 58258)]
fc_gt = {'61': IntervalList([Interval(start, end, payload=None) for (start,end) in data])}
print_statistics(fc_query, fc_gt)

In [None]:
for intvl in invllist.get_intervals():
    if 'speaker' in intvl.payload:
        print(intvl.payload)

In [None]:
Apollo 13
1:48 --> 2:50 V
2:57 --> 3:20 V
5:24 --> 7:44 V (5:24 - 6:24) (6:45 - 7:18) (shot broken up because of shots of the moon)
8:26 --> 9:02; 8:36 - 9:02 
10:00 --> 10:33; 9:41 - 10:33 - long shot times w multiple people present
10:33 --> 11:11; 10:44 - 11:12 - skipped the daughter
12:40 --> 13:17; 12:33 - 13:21 (shot is a bit over extended)
                [14:28 - 14:51] - reaction sequence; not dialogue
17:03 -- 18:02;  18:15 ; V - over extended; catches him in the next scene
20:29 --> 21:27 [DID NOT CATCH]
22:04 --> 23:56 V
27:04 --> 27:30 [caught and 
27:34 --> 27:47 combined in unexpected ways
                 
Godfather

                 
                 
                 
data = [
    (8757,9049),
    (12750,13463),
    (13683,14227),
    (21357,22236),
    (22294,22758),
    (23147,25854),
    (26007,26942),
    (27620,28172),
    (28382,28623),
    (28785,29036),
    (29904,31014),
    (33936,35339),
    (35421,36248),
    (39388,40062),
    (41675,42689),
    (51246,52118),
    (53117,54776),
    (54895,55762),
    (56819,59963),
    (60253,61875),
    (66533,67846),
    (68729,69040),
    (69421,70153),
    (70285,71102)]
intrvllist = IntervalList([Interval(start, end, payload=None) for (start,end) in data])
shot_reverse_shot_labelled = {216: intrvllist}
esper_widget(intrvllists_to_result_with_objects(shot_reverse_shot_labelled, lambda payload, video: []), disable_captions=True)