In [None]:
from esper.prelude import *
from query.models import Shot
from django.db.models import Value
from django.db.models.fields import IntegerField
from rekall.video_interval_collection import VideoIntervalCollection

def convert_to_1d_collection(collection):
    from rekall.interval_list import Interval
    video_map = collection.get_allintervals()
    return VideoIntervalCollection({vid: [Interval(
        i.t[0], i.t[1], i.payload) for i in video_map[vid].get_intervals()] for vid in video_map})


def display_result(collection_1d, display_payload=False):
    from esper.rekall import intrvllists_to_result_bbox
    from esper.rekall import intrvllists_to_result_with_objects
    if display_payload:
        results = intrvllists_to_result_with_objects(collection_1d.get_allintervals(), 
            lambda p, v: p, limit=1000, stride=1)
    else:
        results = intrvllists_to_result_with_objects(collection_1d.get_allintervals(), 
            lambda p, v:[], limit=1000, stride=1)
    return esper_widget(results,
            crop_bboxes=False, show_middle_frame=False, disable_captions=True,
            results_per_page=25, jupyter_keybindings=True)

def get_set(vid, collection):
    from rekall.interval_set_3d import IntervalSet3D
    return collection.get_allintervals().get(vid, IntervalSet3D([]))

# Hermione in the Middle

In [None]:
def hermione_in_the_middle():
    from rekall.video_interval_collection_3d import VideoIntervalCollection3D
    from rekall.interval_set_3d_utils import T,P,XY,or_preds,X
    from rekall.temporal_predicates import before, overlaps_before
    from rekall.bbox_predicates import height_at_least, same_value, same_height, left_of
    from query.models import FaceCharacterActor
    
    MIN_FACE_HEIGHT = 0.25
    EPSILON = 0.15
    NAMES = [ 'ron weasley', 'hermione granger', 'harry potter' ]
    
    faces_with_character_actor_qs = FaceCharacterActor.objects.annotate(
        min_frame=F('face__frame__number'),
        max_frame=F('face__frame__number'),
        video_id=F('face__frame__video_id'),
        bbox_x1=F('face__bbox_x1'),
        bbox_y1=F('face__bbox_y1'),
        bbox_x2=F('face__bbox_x2'),
        bbox_y2=F('face__bbox_y2'),
        character_name=F('characteractor__character__name')
    ).filter(face__frame__video__name__contains="harry potter")
    
    total = faces_with_character_actor_qs.count()
    
    schema = VideoIntervalCollection3D.django_bbox_default_schema()
    schema['payload'] = 'character_name'    
    all_faces = VideoIntervalCollection3D.from_django_qs(faces_with_character_actor_qs, schema, progress=True,
                                                    total=total)
    frames_with_faces = all_faces.group_by_time()
    
    def name_is(name):
        return lambda f: f.payload == name
    def start_before():
        return or_preds(overlaps_before(), before())
    def in_order():
        return lambda a,b,c: start_before()(a,b) and start_before()(b,c)
    def rev_order():
        return lambda *args: in_order()(*args[::-1])
    
    pattern = [
        (["harry"], [XY(height_at_least(MIN_FACE_HEIGHT)), name_is(NAMES[2])]),
        (["ron"], [XY(height_at_least(MIN_FACE_HEIGHT)), name_is(NAMES[0])]),
        (["hermione"], [XY(height_at_least(MIN_FACE_HEIGHT)), name_is(NAMES[1])]),
        (["harry", "ron"], [XY(same_value('y1', epsilon=EPSILON)), XY(same_height(epsilon=EPSILON))]),
        (["harry", "hermione"], [XY(same_value('y1', epsilon=EPSILON)), XY(same_height(epsilon=EPSILON))]),
        (["ron", "hermione"], [XY(same_value('y1', epsilon=EPSILON)), XY(same_height(epsilon=EPSILON))]),
        (["harry","hermione", "ron"], [X(or_preds(in_order(), rev_order()))])
    ]
    
    def matches_pattern(pattern, exact):
        def pred(intervals):
            return len(intervals.match(pattern, exact))>0
        return pred
    
    # Frame_IS<Face_IS<character>>
    final = frames_with_faces.filter(P(matches_pattern(pattern, exact=True)))
    return final

def payload_to_vgrid_objects(faces):
    from query.models import Character
    def intrvl_to_obj(face):
        return {
            'type': 'bbox',
            'bbox_x1': face.x[0], 'bbox_x2': face.x[1],
            'bbox_y1': face.y[0], 'bbox_y2': face.y[1],
            'character_id': Character.objects.get(name=face.payload).id
        }
    def update(acc, face):
        acc.append(intrvl_to_obj(face))
        return acc
    return faces.fold(update, [])
    
answer = hermione_in_the_middle()
display_result(convert_to_1d_collection(answer.map_payload(payload_to_vgrid_objects)), display_payload=True)

# Kissing

In [None]:
def kissing():
    from query.models import Face
    from rekall.interval_set_3d import Interval3D, IntervalSet3D
    from rekall.video_interval_collection_3d import VideoIntervalCollection3D
    from rekall.interval_set_3d_utils import T,P,XY,or_preds,X,Y
    from rekall.merge_ops import payload_plus
    from rekall.payload_predicates import payload_satisfies
    from rekall.spatial_predicates import scene_graph
    from rekall.temporal_predicates import overlaps, overlaps_before, before
    from rekall.face_landmark_predicates import looking_left, looking_right
    from rekall.bbox_predicates import height_at_least, same_height
    import esper.face_landmarks_wrapper as flw
    from esper.captions import get_all_segments
    from tqdm import tqdm_notebook as tqdm
    
    MAX_MOUTH_DIFF = 0.12
    MIN_FACE_CONFIDENCE = 0.8
    MIN_FACE_HEIGHT = 0.4
    MAX_FACE_HEIGHT_DIFF = 0.1
    MIN_FACE_OVERLAP_X = 0.05
    MIN_FACE_OVERLAP_Y = 0.2
    MAX_FACE_OVERLAP_X_FRACTION = 0.7
    MIN_FACE_ANGLE = 0.2
    
    # Cannot be parallelized??
    def get_landmarks(faces):
        ids = [face.payload for face in faces.get_intervals()]
        landmarks = flw.get_from_face_ids(ids)
        id_to_lm = {idx: lm for idx, lm in zip(ids, landmarks)}
        return faces.map_payload(lambda idx : {
            'id': idx,
            'landmarks': id_to_lm[idx]
        })
    
    def mouths_are_close(lm1, lm2):
        select_outer=[2,3,4,8,9,10]
        select_inner=[1,2,3,5,6,7]
        mouth1 = np.concatenate((lm1.outer_lips()[select_outer], lm1.inner_lips()[select_inner]))
        mouth2 = np.concatenate((lm2.outer_lips()[select_outer], lm2.inner_lips()[select_inner]))
        mean1 = np.mean(mouth1, axis=0)
        mean2 = np.mean(mouth2, axis=0)
        return np.linalg.norm(mean1-mean2) <= MAX_MOUTH_DIFF
    
    # Line is ax+by+c=0
    def project_point_to_line(pt, a, b, c):
        x0,y0=pt[0], pt[1]
        d=a*a+b*b
        x=(b*(b*x0-a*y0)-a*c)/d
        y=(a*(-b*x0+a*y0)-b*c)/d
        return np.array([x,y])
    
    # Returns (a,b,c) which defines ax+by+c=0
    def find_best_line_fit(xs, ys):
        fit1 = np.polyfit(xs, ys, 1)
        error1 = np.sum((np.poly1d(fit1)(xs)-ys)**2)
        fit2 = np.polyfit(ys, xs, 1)
        error2 = np.sum((np.poly1d(fit2)(ys)-xs)**2)
        if error1 < error2:
            # fit1[0]x+fit1[1]=y
            return fit1[0], -1, fit1[1]
        # fit2[0]y+fit2[1]=x
        return -1, fit2[0], fit2[1]
    
    # Positive if facing left
    def signed_face_angle(lm):
        center_line_indices = [27,28, 32, 33,34, 51,62,66,57]
        data = lm.landmarks[center_line_indices]
        a, b, c = find_best_line_fit(data[:,0], data[:,1])
        A = project_point_to_line(lm.landmarks[center_line_indices[0]], a, b, c)
        B = project_point_to_line(lm.landmarks[center_line_indices[-1]], a, b, c)
        AB = B-A
        AB = AB / np.linalg.norm(AB)
        C = np.mean(lm.nose_bridge()[2:4], axis=0)
        AC = C-A
        AC = AC / np.linalg.norm(AC)
        return np.cross(AB, AC)

    # Annotate face rows with start and end frames and the video ID
    faces_qs = Face.objects.filter(
        frame__regularly_sampled=True,
        probability__gte=MIN_FACE_CONFIDENCE).annotate(
        min_frame=F('frame__number'),
        max_frame=F('frame__number'),
        height = F('bbox_y2')-F('bbox_y1'),
        video_id=F('frame__video_id')).filter(height__gte=MIN_FACE_HEIGHT, video_id__lte=25)
    
    total = faces_qs.count()
    
    # Frame_IS<Face_IS<face_id>>
    frames_with_faces = VideoIntervalCollection3D.from_django_qs(
        faces_qs, VideoIntervalCollection3D.django_bbox_default_schema(),
        progress=True, total=total
    ).group_by_time(profile=True)
    
    overlap_faces_pattern = [
        (["left","right"], [
            X(or_preds(before(), overlaps_before())), # Left face on the left
            X(lambda f1,f2: f1.end - f2.start > MIN_FACE_OVERLAP_X), # Faces overlap
            Y(lambda f1,f2: min(f1.end, f2.end)-max(f1.start, f2.start) > MIN_FACE_OVERLAP_Y), # No face is entirely above another
            XY(same_height(MAX_FACE_HEIGHT_DIFF)),
            X(lambda f1, f2: (f1.end-f2.start)/max(f1.length(), f2.length()) < MAX_FACE_OVERLAP_X_FRACTION),
        ])
    ]
    
    def matches_pattern(pattern, exact):
        def pred(intervals):
            return len(intervals.match(pattern, exact))>0
        return pred
    
    # Frame_IS<Face_IS<face_id>>
    frames_with_overlapped_faces = frames_with_faces.filter(P(matches_pattern(overlap_faces_pattern, exact=True)), profile=True)
    
#     def get_landmark_map(frames_with_faces):
#         def update(fids, frame):
#             faces = frame.payload
#             return fids + [face.payload for face in faces.get_intervals()]
#         vid_to_fids = frames_with_faces.fold(update, [])
#         fids = [fid for fids in vid_to_fids.values() for fid in fids]
#         print("Getting landmarks for {0} faces".format(len(fids)))
#         landmarks = flw.get_from_face_ids(fids)
#         print("Landmarks loaded".format(len(fids)))
#         return {idx: lm for idx, lm in zip(fids, landmarks)}
    
#     landmark_map = get_landmark_map(frames_with_overlapped_faces)
    
#     def fid_to_face_meta(lm_map):
#         def map_fn(fid):
#             return {
#                 'id': fid,
#                 'landmarks': lm_map[fid]
#             }
#         return map_fn       
    
#     # Frame_IS<Face_IS<FaceMeta>>
#     frames_with_landmarks = frames_with_overlapped_faces.map_payload(
#         lambda faces: faces.map_payload(fid_to_face_meta(landmark_map)))
    
    # Frame_IS<Face_IS<FaceMeta>>
    frames_with_landmarks = frames_with_overlapped_faces.map_payload(get_landmarks, profile=True, parallel=True)
    opposing_face_pattern = [
        (['left'], [P(lambda f: signed_face_angle(f['landmarks']) < -MIN_FACE_ANGLE)]),
        (['right'], [P(lambda f: signed_face_angle(f['landmarks']) > MIN_FACE_ANGLE)]),
        (['left','right'], [P(lambda l, r: mouths_are_close(l['landmarks'], r['landmarks']))])
    ]
    
    # Frame_IS<Face_IS<FaceMeta>>
    frames_with_opposing_faces = frames_with_landmarks.filter(P(matches_pattern(opposing_face_pattern, exact=True)), profile=True)
    
    vids = frames_with_opposing_faces.get_allintervals().keys()
    
    # Merge with shots
    shots_qs = Shot.objects.filter(
        video_id__in = vids,
        cinematic = True,
    )
    total = shots_qs.count()
    # Shot_IS<>
    shots = VideoIntervalCollection3D.from_django_qs(
        shots_qs,
        progress=True, total=total
    )
    # Shot_IS<Frame_IS<Face_IS<FaceMeta>>>
    kissing_shots = shots.collect_by_interval(
        frames_with_opposing_faces,
        T(overlaps()),
        time_window=1, profile=True
    ).map_payload(lambda p:p[1]).map(
        # Take the start of the kissing as the start of the shot
        lambda shot: Interval3D((shot.payload.get_intervals()[0].t[0], shot.t[1]), payload=shot.payload))
    
    # Get faces in shots
    faces_qs2 = Face.objects.filter(
         frame__regularly_sampled=True,
         frame__video_id__in=vids,probability__gte=MIN_FACE_CONFIDENCE).annotate(
             min_frame=F('frame__number'),
             max_frame=F('frame__number'),
             video_id=F('frame__video_id')       
    )
    total = faces_qs2.count()
    # Frame_IS<Face_IS>
    frames_with_faces2 = VideoIntervalCollection3D.from_django_qs(
        faces_qs2, VideoIntervalCollection3D.django_bbox_default_schema(),
        progress=True, total=total
    ).group_by_time(profile=True)
    
    def both_faces_are_high(faces):
        def update(result, face):
            if face.height() < MIN_FACE_HEIGHT:
                return False
            return result
        return faces.fold(update, True)
    
    # Frame_IS<Face_IS>
    frames_with_two_faces = frames_with_faces2.filter(
        P(lambda faces: faces.size()==2)).filter(P(both_faces_are_high), profile=True)
    
    # Collect frames with two faces into kissing shots, and clips the shot to the last frame with two faces
    def clip_to_last_frame(intrvl):
        frames = intrvl.payload[1]
        if frames.empty():
            return intrvl.copy()
        return Interval3D((intrvl.t[0], frames.get_intervals()[-1].t[1]), payload=intrvl.payload)
    
    # Shot_IS<(Frame_IS<Face_IS>, Frame_IS<Face_IS>)>
    clipped_kissing_shots = kissing_shots.collect_by_interval(
        frames_with_two_faces, T(overlaps()), time_window=1, filter_empty=False, profile=True
    ).map(clip_to_last_frame).filter_size(min_size=12)
    
    
    results = get_all_segments(vids)
    fps_map = dict((i, Video.objects.get(id=i).fps) for i in vids)
    # Word_IS<>
    caption_results = VideoIntervalCollection3D({
        video_id: IntervalSet3D([Interval3D((
            word[0] * fps_map[video_id], # start frame
            word[1] * fps_map[video_id]))
            for word in words])
        for video_id, words in tqdm(results)
    })
    
    kissing_without_words = clipped_kissing_shots.minus(caption_results, profile=True)
    kissing_final = kissing_without_words.temporal_coalesce(epsilon=0.5, profile=True).map(
        lambda i: Interval3D((int(i.t[0]), int(i.t[1])), payload=i.payload)
    ).filter_size(min_size=12)
    
    return kissing_final

def payload_to_vgrid_objects(payload):
    # Frame_IS<Face_IS<FaceMeta>>
    frames_with_opposing_overlapped_faces, _ = payload
    def face_to_objects(face):
        from esper.stdlib import face_landmarks_to_dict
        return [{
            'type': 'bbox',
            'bbox_x1': face.x[0], 'bbox_x2': face.x[1],
            'bbox_y1': face.y[0], 'bbox_y2': face.y[1]
        }, face_landmarks_to_dict(face.payload['landmarks'])]
    def update(acc, frame):
        def accumulate_faces(a, face):
            return a+face_to_objects(face)
        return acc + frame.payload.fold(accumulate_faces, [])
    return frames_with_opposing_overlapped_faces.fold(update, [])

answer = kissing()
print("Query finished. Preparing VGrid.")
display_result(convert_to_1d_collection(answer.map_payload(payload_to_vgrid_objects)), display_payload=True)

# Action Shots

In [None]:
def action_shots():
    from query.models import Shot
    from rekall.video_interval_collection_3d import VideoIntervalCollection3D
    from rekall.interval_set_3d import IntervalSet3D, Interval3D
    from rekall.temporal_predicates import meets_before, overlaps, equal
    from rekall.interval_set_3d_utils import T,P,XY,or_preds,X,Y
    from django.db.models import ExpressionWrapper, FloatField
    from esper.captions import get_all_segments
    from rekall.merge_ops import payload_first, payload_plus, merge_named_payload
    import numpy as np

    NUM_SHOTS=5
    MAX_SHOT_DURATION=0.8
    BRIGHTNESS_THRESHOLD = 20.0
    MAX_NUM_WORDS_PER_SECOND = 1.0
    
    shots_qs = Shot.objects.annotate(
        duration = ExpressionWrapper((F('max_frame') - F('min_frame')) / F('video__fps'), output_field=FloatField())
    ).filter(
        duration__lt=MAX_SHOT_DURATION,
        duration__gt=0.,
        cinematic=True,
        video__ignore_film=False
    )
    total=shots_qs.count()
    
    # Shot_IS<>
    short_shots = VideoIntervalCollection3D.from_django_qs(shots_qs, progress=True, total=total)
    
    def get_all_frames(short_shots):
        def update(frames, shot):
            return frames + list(range(shot.t[0], shot.t[1]+1))
        return short_shots.fold(update, [])
    
    video_to_frame_numbers = get_all_frames(short_shots)
    all_frames = VideoIntervalCollection3D({})
    for video, frames in tqdm(video_to_frame_numbers.items()):
        # Getting brightness
        qs = Frame.objects.filter(video_id=video, number__in=frames, regularly_sampled=True).order_by('number')
        all_frames = all_frames.union(VideoIntervalCollection3D.from_django_qs(
            qs, schema={'t1':'number', 't2': 'number', 'payload': 'brightness'}), profile=False)
    
    def select_second(p):
        return p[1]
    
    # Shot_IS<Frame_IS>
    shots_with_brightness = short_shots.collect_by_interval(
        all_frames, T(overlaps()), time_window=0, filter_empty=False
    ).map_payload(select_second)
    
    # Sequence_IS<Shot_IS<Frame_IS>>
    one_shots = shots_with_brightness.collect_by_interval(
        shots_with_brightness, T(equal()), time_window=0).map_payload(select_second)
    n_shots = one_shots
    for n in range(2, NUM_SHOTS+1):
        print("Constructing {} consecutive short shots".format(n))
        n_shots = n_shots.merge(
            one_shots,
            T(meets_before(epsilon=1)),
            payload_merge_op = IntervalSet3D.union,            
            time_window=1, profile=False)
        print('There are {} videos with {} consecutive short shots'.format(
                 len(n_shots.get_allintervals()), n))
        
    def merge_shots(seq1, seq2):
        return seq1.union(seq2.minus(seq1))
    coalesced_n_shots = n_shots.temporal_coalesce(payload_merge_op=merge_shots)
    
    def bright_enough(shots):
        # Check if any shots is above mean brightness threshold
        def compute_avg_brightness(frames):
            ret = frames.fold(lambda acc, f: acc+f.payload, 0)
            if not frames.empty():
                ret = ret / frames.size()
            return ret
        return shots.map_payload(compute_avg_brightness).fold(
            lambda acc, shot: acc or shot.payload > BRIGHTNESS_THRESHOLD)
            
    n_bright_shots = n_shots.filter(P(bright_enough))
    
    vids = n_bright_shots.get_allintervals().keys()
    results = get_all_segments(vids)
    fps_map = dict((i, Video.objects.get(id=i).fps) for i in vids)
    
    # Word_IS<fps>
    caption_results = VideoIntervalCollection3D({
        video_id: IntervalSet3D([Interval3D(
            (word[0] * fps_map[video_id], word[1] * fps_map[video_id]),
            payload = fps_map[video_id])
            for word in words])
        for video_id, words in results
    })
    
    def has_few_words(seq):
        _, words = seq.payload
        n_words = words.size()
        if n_words == 0:
            return True
        time = seq.length() / words.get_intervals()[0].payload
        return n_words / time <= MAX_NUM_WORDS_PER_SECOND
    
    # Seq_IS<(Shot_IS<Frame_IS>, Word_IS)>
    n_bright_shots_with_few_words = n_bright_shots.collect_by_interval(
        caption_results,
        T(overlaps()),
        time_window=0,
        filter_empty=False).filter(has_few_words)
    
    # Seq_IS<Shot_IS<Frame_IS>>
    action_shots = coalesced_n_shots.filter_against(
        n_bright_shots_with_few_words,
        T(overlaps()),
        time_window=0)
    
    return action_shots

answer = action_shots()
display_result(convert_to_1d_collection(answer))

# Conversations with Identity Labels

In [None]:
def coalesce(self, predicate, payload_merge_op):
    from rekall.interval_set_3d import IntervalSet3D
    # State is (new, current)
    def update(state, interval):
        new, current = state
        updated_current = []
        size = len(new)+len(current)
        for cur in current:
            # No more intervals will overlap with cur
            if cur.t[1] < interval.t[0]:
                new.append(cur)
            else:
                updated_current.append(cur)
        matched = None
        for i, cur in enumerate(updated_current):
            if predicate(cur, interval):
                matched = i
                break
        if matched is None:
            updated_current.append(interval)
        else:
            updated_current[matched] = updated_current[matched].merge(interval, payload_merge_op)
        return new, updated_current
    converged = False
    while not converged:
        old = self.size(profile=False)
        self = self.fold_to_set(update, ([],[]), acc_to_set=lambda state: IntervalSet3D(state[0]+state[1]),
                                profile=False)
        converged = old == self.size(profile=False)
        # print(old, self.size(profile=False))
    return self

In [None]:
def conversationsq(vids, progress=True):
    from query.models import FaceCharacterActor, Shot
    from rekall.video_interval_collection_3d import VideoIntervalCollection3D
    from rekall.interval_set_3d import IntervalSet3D, Interval3D
    from rekall.temporal_predicates import before, overlaps, equal
    from rekall.interval_set_3d_utils import T,P,XY,or_preds,X,Y
    
    faces_with_character_actor_qs = FaceCharacterActor.objects.annotate(
        min_frame=F('face__frame__number'),
        max_frame=F('face__frame__number'),
        video_id=F('face__frame__video_id'),
        bbox_x1=F('face__bbox_x1'),
        bbox_y1=F('face__bbox_y1'),
        bbox_x2=F('face__bbox_x2'),
        bbox_y2=F('face__bbox_y2'),
        character_name=F('characteractor__character__name')
    ).filter(face__frame__video_id__in=vids)
    
    total = faces_with_character_actor_qs.count()
    
    schema = VideoIntervalCollection3D.django_bbox_default_schema()
    schema['payload'] = 'character_name'    
    all_faces = VideoIntervalCollection3D.from_django_qs(faces_with_character_actor_qs, schema, progress=progress,
                                                    total=total)
    
    vids = all_faces.get_allintervals().keys()
    if len(vids) == 0:
        return VideoIntervalCollection3D({})
    
    shots_qs = Shot.objects.filter(
        video_id__in = vids,
        cinematic = True,
    )
    total = shots_qs.count()
    shots = VideoIntervalCollection3D.from_django_qs(
        shots_qs,
        progress=progress, total=total
    )
    
    def select_second(p):
        return p[1]
    
    # Shot_IS<Face_IS>
    shots_with_frames = shots.collect_by_interval(
        all_faces, T(overlaps()), time_window=0, filter_empty=True).map_payload(select_second)
    
    def group_characters(faces):
        def get_char(face):
            return face.payload
        def merge(char, faces):
            merged_interval = faces.fold(Interval3D.merge)
            merged_interval.payload = (char, faces)
            return merged_interval
        return faces.group_by(get_char, merge)
    
    # Shot_IS<Char_IS<(char, Face_IS)>>
    shots_with_chars = shots_with_frames.map_payload(group_characters)
    
    def cross_product_chars(chars1, chars2):
        def get_chars(chars):
            def update(acc, char):
                acc.append(char.payload[0])
                return acc
            return chars.fold(update, [])
        chars_in_1 = get_chars(chars1)
        chars_in_2 = get_chars(chars2)
        result = []
        for charA in chars_in_1:
            for charB in chars_in_2:
                if charA != charB:
                    result.append((charA, charB))
        return result
            
    # Seq_IS<[(char, char)]>
    two_shots = shots_with_chars.merge(
        shots_with_chars,
        T(before(max_dist=1)),
        payload_merge_op=cross_product_chars,
        time_window=1
    )
    
    def sequences_share_face_pair(list1, list2):
        for A1, B1 in list1:
            for A2, B2 in list2:
                if ((A1==A2 and B1==B2) or
                    (A1==B2 and B1==A2)):
                    return True
        return False
    def merge_face_pairs(list1, list2):
        return list1+list2
    
    conv_candidates = coalesce(two_shots, P(sequences_share_face_pair), merge_face_pairs)
    
    def num_shots_at_least(n):
        def pred(shots):
            return shots.size() >= n
        return pred
    
    # Conv_IS<Shot_IS<Char_IS<(char, Face_IS)>>>
    convs = conv_candidates.collect_by_interval(
        shots_with_chars, T(overlaps()), time_window=0, filter_empty=True
    ).map_payload(select_second
    ).filter(P(num_shots_at_least(3)))
    
    
    return convs

#answer = conversationsq([380])
#display_result(convert_to_1d_collection(answer))

# ShotScale

In [None]:
def shot_scale_q(vids, progress=True):
    from query.models import Face, Pose, Shot
    from rekall.video_interval_collection_3d import VideoIntervalCollection3D
    from rekall.interval_set_3d import IntervalSet3D, Interval3D
    from rekall.temporal_predicates import before, overlaps, equal
    from rekall.interval_set_3d_utils import T,P,XY,or_preds,X,Y
    from esper import pose_wrapper as pw
    from esper import shot_scale
    
    shots_qs = Shot.objects.filter(video_id__in=vids, cinematic=True)
    total = shots_qs.count()
    # Shot_IS
    shots = VideoIntervalCollection3D.from_django_qs(shots_qs, progress=progress, total=total)
    
    faces_qs= Face.objects.filter(frame__video_id__in=vids, frame__regularly_sampled=True
                                 ).annotate(video_id=F('frame__video__id'),
                                            number=F('frame__number'))
    poses_qs = PoseMeta.objects.filter(frame__video_id__in=vids, frame__regularly_sampled=True
                                 ).annotate(video_id=F('frame__video__id'),
                                            number=F('frame__number'))
    
    total = faces_qs.count()
    all_faces = VideoIntervalCollection3D.from_django_qs(faces_qs, schema={
        't1':'number', 't2':'number', 'x1':'bbox_x1', 'x2':'bbox_x2', 'y1':'bbox_y1', 'y2':'bbox_y2'
    }, progress=progress, total=total)
    
    total = poses_qs.count()
    all_poses = VideoIntervalCollection3D.from_django_qs(poses_qs, schema={
        't1':'number', 't2':'number', 'payload':'id'
    }, progress=progress, total=total)
    
    # print("Loading {0} Poses".format(total))
    
    def get_pose_map(poses_qs):
        poses = pw.get(poses_qs)
        # print("Poses loaded")
        return {pose.id: pose for pose in poses}
    
    def get_pose(pose_map):
        def map_fn(payload):
            return pose_map[payload]
        return map_fn
    
    all_poses = all_poses.map_payload(get_pose(get_pose_map(poses_qs)), profile=progress)
    
    def add_scale_to_face(face):
        face.payload = shot_scale.face_height_to_shot_scale(face.height())
        return face
    
    # Face_IS<Scale>
    all_faces_with_scale = all_faces.map(add_scale_to_face, profile=progress)
    
    def add_scale_to_pose(pose):
        return {
            'pose': pose,
            'scale': shot_scale.pose_keypoints_to_shot_scale(pose.pose_keypoints())
        }
    # Pose_IS<{'pose','scale'}>
    all_poses_with_scale = all_poses.map_payload(add_scale_to_pose, profile=progress)
    
    # Frame_IS
    all_frames_with_signal = all_faces.union(all_poses).group_by_time(profile=progress)
    
    def select_second(p):
        return p[1]
    
    # Frame_IS<(Face_IS, Pose_IS)>
    frames_with_faces_and_poses = all_frames_with_signal.collect_by_interval(
        all_faces_with_scale,
        T(overlaps()),
        filter_empty=False,
        time_window=0, profile=progress,
    ).map_payload(select_second).collect_by_interval(
        all_poses_with_scale,
        T(overlaps()),
        filter_empty=False,
        time_window=0, profile=progress
    )
    
    def add_scale_to_frame(payload):
        faces, poses = payload
        def take_max_face(acc, face):
            return max(acc, face.payload)
        def take_max_pose(acc, pose):
            return max(acc, pose.payload['scale'])
        max_face_scale = faces.fold(take_max_face, shot_scale.ShotScale.UNKNOWN)
        max_pose_scale = poses.fold(take_max_pose, shot_scale.ShotScale.UNKNOWN)
        return (max(max_face_scale, max_pose_scale), faces, poses)
    
    # Frame_IS<(Scale, Face_IS, Pose_IS)>
    frames_with_scale = frames_with_faces_and_poses.map_payload(add_scale_to_frame, profile=progress)
    
    # Shot_IS<Frame_IS<(Scale, Face_IS, Pose_IS)>>
    shots_with_frames = shots.collect_by_interval(
        frames_with_scale,
        T(overlaps()),
        filter_empty=False,
        time_window=0, profile=progress,
    ).map_payload(select_second)
    
    def get_mode(scales):
        count = {}
        for s in shot_scale.ShotScale:
            count[s] = 0
        for s in scales:
            count[s]+=1
        best_count = 0
        best_scale=shot_scale.ShotScale.UNKNOWN
        for s in shot_scale.ShotScale:
            if count[s]>=best_count:
                best_count = count[s]
                best_scale = s
        return best_scale
    
    def add_scale_to_shot(frames):
        def update(acc, frame):
            acc.append(frame.payload[0])
            return acc
        scales = frames.fold(update, [])
        mode = get_mode(scales)
        return mode, frames
    
    # Shot_IS<(Scale, Frame_IS<(Scale, Face_IS<Scale>, Pose_IS<(Scale, PoseKeypoints)>)>)>
    shots_with_scale = shots_with_frames.map_payload(add_scale_to_shot, profile=progress)
    return shots_with_scale

def payload_to_vgrid_objects(payload):
    from rekall.interval_set_3d_utils import P
    # Frame_IS<(Scale, Face_IS, Pose_IS)>
    scale, frames = payload
    def face_to_object(face):
        return {
            'type': 'bbox',
            'bbox_x1': face.x[0], 'bbox_x2': face.x[1],
            'bbox_y1': face.y[0], 'bbox_y2': face.y[1]
        }
    
    def pose_to_object(pose):
        from esper.stdlib import pose_to_dict
        return pose_to_dict(pose.payload['pose'])
    
    def face_objects_at_scale(faces, scale):
        faces = faces.filter(P(lambda p:p==scale))
        def update(acc, face):
            acc.append(face_to_object(face))
            return acc
        return faces.fold(update, [])
    
    def pose_objects_at_scale(poses, scale):
        poses = poses.filter(P(lambda p:p['scale']==scale))
        def update(acc, pose):
            acc.append(pose_to_object(pose))
            return acc
        return poses.fold(update, [])
    
    frames = frames.filter(P(lambda p:p[0]==scale))
    def update(acc, frame):
        _, faces, poses = frame.payload
        return acc + face_objects_at_scale(faces, scale) + pose_objects_at_scale(poses, scale)
    return frames.fold(update, [])

#answer = shot_scale_q([1])
#display_result(convert_to_1d_collection(answer.map_payload(payload_to_vgrid_objects)), display_payload=True)

# Multiprocess

In [None]:
def dummy_func(vid):
    from rekall.interval_set_3d import IntervalSet3D, Interval3D
    from query.models import Video
    
    print("Processing", vid)
    v = Video.objects.get(id=vid)
    return IntervalSet3D([Interval3D((0, v.num_frames-1))])

def run_dummy():
    from esper.rekall_parallel import par_do
    from query.models import Video
    
    vids = [v.id for v in Video.objects.all()]
    return par_do(dummy_func, vids, profile=True, parallel=True, fork=True)

display_result(convert_to_1d_collection(run_dummy()))

## Hermione in the middle

In [None]:
def hermione_in_the_middle_for_vid(vid):
    from rekall.video_interval_collection_3d import VideoIntervalCollection3D
    from rekall.interval_set_3d_utils import T,P,XY,or_preds,X
    from rekall.temporal_predicates import before, overlaps_before
    from rekall.bbox_predicates import height_at_least, same_value, same_height, left_of
    from query.models import FaceCharacterActor
    
    MIN_FACE_HEIGHT = 0.25
    EPSILON = 0.15
    NAMES = [ 'ron weasley', 'hermione granger', 'harry potter' ]
    
    faces_with_character_actor_qs = FaceCharacterActor.objects.annotate(
        min_frame=F('face__frame__number'),
        max_frame=F('face__frame__number'),
        video_id=F('face__frame__video_id'),
        bbox_x1=F('face__bbox_x1'),
        bbox_y1=F('face__bbox_y1'),
        bbox_x2=F('face__bbox_x2'),
        bbox_y2=F('face__bbox_y2'),
        character_name=F('characteractor__character__name')
    ).filter(face__frame__video_id=vid)
    
    schema = VideoIntervalCollection3D.django_bbox_default_schema()
    schema['payload'] = 'character_name'    
    all_faces = VideoIntervalCollection3D.from_django_qs(faces_with_character_actor_qs, schema).get_allintervals()[vid]
    frames_with_faces = all_faces.group_by_time()
    
    def name_is(name):
        return lambda f: f.payload == name
    def start_before():
        return or_preds(overlaps_before(), before())
    def in_order():
        return lambda a,b,c: start_before()(a,b) and start_before()(b,c)
    def rev_order():
        return lambda *args: in_order()(*args[::-1])
    
    pattern = [
        (["harry"], [XY(height_at_least(MIN_FACE_HEIGHT)), name_is(NAMES[2])]),
        (["ron"], [XY(height_at_least(MIN_FACE_HEIGHT)), name_is(NAMES[0])]),
        (["hermione"], [XY(height_at_least(MIN_FACE_HEIGHT)), name_is(NAMES[1])]),
        (["harry", "ron"], [XY(same_value('y1', epsilon=EPSILON)), XY(same_height(epsilon=EPSILON))]),
        (["harry", "hermione"], [XY(same_value('y1', epsilon=EPSILON)), XY(same_height(epsilon=EPSILON))]),
        (["ron", "hermione"], [XY(same_value('y1', epsilon=EPSILON)), XY(same_height(epsilon=EPSILON))]),
        (["harry","hermione", "ron"], [X(or_preds(in_order(), rev_order()))])
    ]
    
    def matches_pattern(pattern, exact):
        def pred(intervals):
            return len(intervals.match(pattern, exact))>0
        return pred
    
    # Frame_IS<Face_IS<character>>
    final = frames_with_faces.filter(P(matches_pattern(pattern, exact=True)))
    return final

def run_hp():
    from esper.rekall_parallel import get_runtime_for_jupyter
    from rekall.runtime import wrap_interval_set
    from query.models import Video
    
    vids = [v.id for v in Video.objects.filter(name__contains="harry potter")]
    rt = get_runtime_for_jupyter(num_workers=8)
    return rt.run(wrap_interval_set(hermione_in_the_middle_for_vid), vids, profile=True, progress=True)

def payload_to_vgrid_objects(faces):
    from query.models import Character
    def intrvl_to_obj(face):
        return {
            'type': 'bbox',
            'bbox_x1': face.x[0], 'bbox_x2': face.x[1],
            'bbox_y1': face.y[0], 'bbox_y2': face.y[1],
            'character_id': Character.objects.get(name=face.payload).id
        }
    def update(acc, face):
        acc.append(intrvl_to_obj(face))
        return acc
    return faces.fold(update, [])
    
answer = run_hp()
print("Answer computed. Preparing for VGrid.")
display_result(convert_to_1d_collection(answer.map_payload(payload_to_vgrid_objects)), display_payload=True)

## Kissing

In [None]:
def kissing_for_vid(vid):
    from query.models import Face
    from rekall.interval_set_3d import Interval3D, IntervalSet3D
    from rekall.video_interval_collection_3d import VideoIntervalCollection3D
    from rekall.interval_set_3d_utils import T,P,XY,or_preds,X,Y
    from rekall.merge_ops import payload_plus
    from rekall.payload_predicates import payload_satisfies
    from rekall.spatial_predicates import scene_graph
    from rekall.temporal_predicates import overlaps, overlaps_before, before
    from rekall.face_landmark_predicates import looking_left, looking_right
    from rekall.bbox_predicates import height_at_least, same_height
    import esper.face_landmarks_wrapper as flw
    from esper.captions import get_all_segments
    from tqdm import tqdm_notebook as tqdm
    
    MAX_MOUTH_DIFF = 0.12
    MIN_FACE_CONFIDENCE = 0.8
    MIN_FACE_HEIGHT = 0.4
    MAX_FACE_HEIGHT_DIFF = 0.1
    MIN_FACE_OVERLAP_X = 0.05
    MIN_FACE_OVERLAP_Y = 0.2
    MAX_FACE_OVERLAP_X_FRACTION = 0.7
    MIN_FACE_ANGLE = 0.2
    
    def get_landmarks(faces):
        ids = [face.payload for face in faces.get_intervals()]
        landmarks = flw.get_from_face_ids(ids)
        id_to_lm = {idx: lm for idx, lm in zip(ids, landmarks)}
        return faces.map_payload(lambda idx : {
            'id': idx,
            'landmarks': id_to_lm[idx]
        })
    
    def mouths_are_close(lm1, lm2):
        select_outer=[2,3,4,8,9,10]
        select_inner=[1,2,3,5,6,7]
        mouth1 = np.concatenate((lm1.outer_lips()[select_outer], lm1.inner_lips()[select_inner]))
        mouth2 = np.concatenate((lm2.outer_lips()[select_outer], lm2.inner_lips()[select_inner]))
        mean1 = np.mean(mouth1, axis=0)
        mean2 = np.mean(mouth2, axis=0)
        return np.linalg.norm(mean1-mean2) <= MAX_MOUTH_DIFF
    
    # Line is ax+by+c=0
    def project_point_to_line(pt, a, b, c):
        x0,y0=pt[0], pt[1]
        d=a*a+b*b
        x=(b*(b*x0-a*y0)-a*c)/d
        y=(a*(-b*x0+a*y0)-b*c)/d
        return np.array([x,y])
    
    # Returns (a,b,c) which defines ax+by+c=0
    def find_best_line_fit(xs, ys):
        fit1 = np.polyfit(xs, ys, 1)
        error1 = np.sum((np.poly1d(fit1)(xs)-ys)**2)
        fit2 = np.polyfit(ys, xs, 1)
        error2 = np.sum((np.poly1d(fit2)(ys)-xs)**2)
        if error1 < error2:
            # fit1[0]x+fit1[1]=y
            return fit1[0], -1, fit1[1]
        # fit2[0]y+fit2[1]=x
        return -1, fit2[0], fit2[1]
    
    # Positive if facing left
    def signed_face_angle(lm):
        center_line_indices = [27,28, 32, 33,34, 51,62,66,57]
        data = lm.landmarks[center_line_indices]
        a, b, c = find_best_line_fit(data[:,0], data[:,1])
        A = project_point_to_line(lm.landmarks[center_line_indices[0]], a, b, c)
        B = project_point_to_line(lm.landmarks[center_line_indices[-1]], a, b, c)
        AB = B-A
        AB = AB / np.linalg.norm(AB)
        C = np.mean(lm.nose_bridge()[2:4], axis=0)
        AC = C-A
        AC = AC / np.linalg.norm(AC)
        return np.cross(AB, AC)

    # Annotate face rows with start and end frames and the video ID
    faces_qs = Face.objects.filter(
        frame__regularly_sampled=True,
        probability__gte=MIN_FACE_CONFIDENCE).annotate(
        min_frame=F('frame__number'),
        max_frame=F('frame__number'),
        height = F('bbox_y2')-F('bbox_y1'),
        video_id=F('frame__video_id')).filter(height__gte=MIN_FACE_HEIGHT, video_id=vid)
    
    # Frame_IS<Face_IS<face_id>>
    frames_with_faces = get_set(vid, VideoIntervalCollection3D.from_django_qs(
        faces_qs, VideoIntervalCollection3D.django_bbox_default_schema(),
    )).group_by_time()
    
    overlap_faces_pattern = [
        (["left","right"], [
            X(or_preds(before(), overlaps_before())), # Left face on the left
            X(lambda f1,f2: f1.end - f2.start > MIN_FACE_OVERLAP_X), # Faces overlap
            Y(lambda f1,f2: min(f1.end, f2.end)-max(f1.start, f2.start) > MIN_FACE_OVERLAP_Y), # No face is entirely above another
            XY(same_height(MAX_FACE_HEIGHT_DIFF)),
            X(lambda f1, f2: (f1.end-f2.start)/max(f1.length(), f2.length()) < MAX_FACE_OVERLAP_X_FRACTION),
        ])
    ]
    
    def matches_pattern(pattern, exact):
        def pred(intervals):
            return len(intervals.match(pattern, exact))>0
        return pred
    
    # Frame_IS<Face_IS<face_id>>
    frames_with_overlapped_faces = frames_with_faces.filter(P(matches_pattern(overlap_faces_pattern, exact=True)))
    
    # Frame_IS<Face_IS<FaceMeta>>
    frames_with_landmarks = frames_with_overlapped_faces.map_payload(get_landmarks)
    opposing_face_pattern = [
        (['left'], [P(lambda f: signed_face_angle(f['landmarks']) < -MIN_FACE_ANGLE)]),
        (['right'], [P(lambda f: signed_face_angle(f['landmarks']) > MIN_FACE_ANGLE)]),
        (['left','right'], [P(lambda l, r: mouths_are_close(l['landmarks'], r['landmarks']))])
    ]
    
    # Frame_IS<Face_IS<FaceMeta>>
    frames_with_opposing_faces = frames_with_landmarks.filter(P(matches_pattern(opposing_face_pattern, exact=True)))
    
    # Merge with shots
    shots_qs = Shot.objects.filter(
        video_id = vid,
        cinematic = True,
    )
    # Shot_IS<>
    shots = get_set(vid, VideoIntervalCollection3D.from_django_qs(
        shots_qs,
    ))
    # Shot_IS<Frame_IS<Face_IS<FaceMeta>>>
    kissing_shots = shots.collect_by_interval(
        frames_with_opposing_faces,
        T(overlaps()),
        time_window=1,
    ).map_payload(lambda p:p[1]).map(
        # Take the start of the kissing as the start of the shot
        lambda shot: Interval3D((shot.payload.get_intervals()[0].t[0], shot.t[1]), payload=shot.payload))
    
    # Get faces in shots
    faces_qs2 = Face.objects.filter(
         frame__regularly_sampled=True,
         frame__video_id=vid,
         probability__gte=MIN_FACE_CONFIDENCE).annotate(
             min_frame=F('frame__number'),
             max_frame=F('frame__number'),
             video_id=F('frame__video_id')       
    )
    # Frame_IS<Face_IS>
    frames_with_faces2 = get_set(vid, VideoIntervalCollection3D.from_django_qs(
        faces_qs2, VideoIntervalCollection3D.django_bbox_default_schema(),
    )).group_by_time()
    
    def both_faces_are_high(faces):
        def update(result, face):
            if face.height() < MIN_FACE_HEIGHT:
                return False
            return result
        return faces.fold(update, True)
    
    # Frame_IS<Face_IS>
    frames_with_two_faces = frames_with_faces2.filter(
        P(lambda faces: faces.size()==2)).filter(P(both_faces_are_high))
    
    # Collect frames with two faces into kissing shots, and clips the shot to the last frame with two faces
    def clip_to_last_frame(intrvl):
        frames = intrvl.payload[1]
        if frames.empty():
            return intrvl.copy()
        return Interval3D((intrvl.t[0], frames.get_intervals()[-1].t[1]), payload=intrvl.payload)
    
    # Shot_IS<(Frame_IS<Face_IS>, Frame_IS<Face_IS>)>
    clipped_kissing_shots = kissing_shots.collect_by_interval(
        frames_with_two_faces, T(overlaps()), time_window=1, filter_empty=False
    ).map(clip_to_last_frame).filter_size(min_size=12)
    
    
    _, words = next(get_all_segments([vid]))
    fps = Video.objects.get(id=vid).fps
    # Word_IS<>
    caption_results = IntervalSet3D([Interval3D((
            word[0] * fps, # start frame
            word[1] * fps))
            for word in words])
    
    kissing_without_words = clipped_kissing_shots.minus(caption_results)
    kissing_final = kissing_without_words.temporal_coalesce(epsilon=0.5).map(
        lambda i: Interval3D((int(i.t[0]), int(i.t[1])), payload=i.payload)
    ).filter_size(min_size=12)
    
    return kissing_final

def payload_to_vgrid_objects(payload):
    # Frame_IS<Face_IS<FaceMeta>>
    frames_with_opposing_overlapped_faces, _ = payload
    def face_to_objects(face):
        from esper.stdlib import face_landmarks_to_dict
        return [{
            'type': 'bbox',
            'bbox_x1': face.x[0], 'bbox_x2': face.x[1],
            'bbox_y1': face.y[0], 'bbox_y2': face.y[1]
        }, face_landmarks_to_dict(face.payload['landmarks'])]
    def update(acc, frame):
        def accumulate_faces(a, face):
            return a+face_to_objects(face)
        return acc + frame.payload.fold(accumulate_faces, [])
    return frames_with_opposing_overlapped_faces.fold(update, [])

def run_kissing():
    from esper.rekall_parallel import get_worker_pool_factory_for_jupyter, WorkerPoolWithStorageFactory
    from rekall.runtime import Runtime, wrap_interval_set
    from query.models import Video
    
    vids = [v.id for v in Video.objects.all()]
    output_dir = "/app/data/kissing"
    rt = Runtime(WorkerPoolWithStorageFactory(output_dir, get_worker_pool_factory_for_jupyter(num_workers=10)))
    return rt.run(wrap_interval_set(kissing_for_vid), vids, profile=True, progress=True, chunksize=5)

answer = run_kissing()
print("Query finished. Preparing VGrid.")
display_result(convert_to_1d_collection(answer.map_payload(payload_to_vgrid_objects)), display_payload=True)

## Action Shots

In [None]:
def action_shots_for_vid(vid):
    from query.models import Shot
    from rekall.video_interval_collection_3d import VideoIntervalCollection3D
    from rekall.interval_set_3d import IntervalSet3D, Interval3D
    from rekall.temporal_predicates import meets_before, overlaps, equal
    from rekall.interval_set_3d_utils import T,P,XY,or_preds,X,Y
    from django.db.models import ExpressionWrapper, FloatField
    from esper.captions import get_all_segments
    from rekall.merge_ops import payload_first, payload_plus, merge_named_payload
    import numpy as np

    NUM_SHOTS=5
    MAX_SHOT_DURATION=0.8
    BRIGHTNESS_THRESHOLD = 20.0
    MAX_NUM_WORDS_PER_SECOND = 1.0
    
    shots_qs = Shot.objects.annotate(
        duration = ExpressionWrapper((F('max_frame') - F('min_frame')) / F('video__fps'), output_field=FloatField())
    ).filter(
        duration__lt=MAX_SHOT_DURATION,
        duration__gt=0.,
        cinematic=True,
        video_id=vid,
    )
    
    # Shot_IS<>
    short_shots = get_set(vid, VideoIntervalCollection3D.from_django_qs(shots_qs))
    
    def get_all_frames(short_shots):
        def update(frames, shot):
            return frames + list(range(shot.t[0], shot.t[1]+1))
        return short_shots.fold(update, [])
    
    frame_numbers = get_all_frames(short_shots)
    frames_qs = Frame.objects.filter(video_id=vid, number__in=frame_numbers, regularly_sampled=True).order_by('number')
    all_frames = get_set(vid, VideoIntervalCollection3D.from_django_qs(
            frames_qs, schema={'t1':'number', 't2': 'number', 'payload': 'brightness'}))

    def select_second(p):
        return p[1]
    
    # Shot_IS<Frame_IS>
    shots_with_brightness = short_shots.collect_by_interval(
        all_frames, T(overlaps()), time_window=0, filter_empty=False
    ).map_payload(select_second)
    
    # Sequence_IS<Shot_IS<Frame_IS>>
    one_shots = shots_with_brightness.collect_by_interval(
        shots_with_brightness, T(equal()), time_window=0).map_payload(select_second)
    n_shots = one_shots
    for n in range(2, NUM_SHOTS+1):
        n_shots = n_shots.merge(
            one_shots,
            T(meets_before(epsilon=1)),
            payload_merge_op = IntervalSet3D.union,            
            time_window=1)
        
    def merge_shots(seq1, seq2):
        return seq1.union(seq2.minus(seq1))
    coalesced_n_shots = n_shots.temporal_coalesce(payload_merge_op=merge_shots)
    
    def bright_enough(shots):
        # Check if any shots is above mean brightness threshold
        def compute_avg_brightness(frames):
            ret = frames.fold(lambda acc, f: acc+f.payload, 0)
            if not frames.empty():
                ret = ret / frames.size()
            return ret
        return shots.map_payload(compute_avg_brightness).fold(
            lambda acc, shot: acc or shot.payload > BRIGHTNESS_THRESHOLD)
            
    n_bright_shots = n_shots.filter(P(bright_enough))
    
    _, words = next(get_all_segments([vid]))
    fps = Video.objects.get(id=vid).fps
    
    # Word_IS<>
    caption_results = IntervalSet3D([Interval3D(
            (word[0] * fps, word[1] * fps))
            for word in words])
    
    def has_few_words(seq):
        _, words = seq.payload
        n_words = words.size()
        if n_words == 0:
            return True
        time = seq.length() / fps
        return n_words / time <= MAX_NUM_WORDS_PER_SECOND
    
    # Seq_IS<(Shot_IS<Frame_IS>, Word_IS)>
    n_bright_shots_with_few_words = n_bright_shots.collect_by_interval(
        caption_results,
        T(overlaps()),
        time_window=0,
        filter_empty=False).filter(has_few_words)
    
    # Seq_IS<Shot_IS<Frame_IS>>
    action_shots = coalesced_n_shots.filter_against(
        n_bright_shots_with_few_words,
        T(overlaps()),
        time_window=0)
    
    return action_shots

def run_action_shots():
    from esper.rekall_parallel import get_runtime_for_jupyter
    from rekall.runtime import wrap_interval_set
    from query.models import Video
    
    vids = [v.id for v in Video.objects.all()]
    rt = get_runtime_for_jupyter(num_workers=8)
    return rt.run(wrap_interval_set(action_shots_for_vid), vids, profile=True, progress=True)

answer = run_action_shots()
print("Query finished. Preparing VGrid.")
display_result(convert_to_1d_collection(answer))

## Conversations with Identity

In [None]:
def run_conversations():
    from esper.rekall_parallel import get_runtime_for_jupyter
    from query.models import Video, FaceCharacterActor
    
    vids = [v.id for v in Video.objects.all()]
    rt = get_runtime_for_jupyter()
    def query(vids):
        return conversationsq(vids, progress=False)
    return rt.run(query, vids, profile=True, progress=True, chunksize=10)

answer = run_conversations()
print("Query finished. Preparing VGrid.")
display_result(convert_to_1d_collection(answer))

## ShotScale

In [None]:
def run_shot_scale():
    from esper.rekall_parallel import get_runtime_for_jupyter
    from query.models import Video
    
    vids = [v.id for v in Video.objects.filter(id__gte=600)]
    rt = get_runtime_for_jupyter()
    def query(vids):
        return shot_scale_q(vids, progress=False)
    return rt.run(query, vids, profile=True, progress=True, chunksize=1)

def payload_to_vgrid_objects(payload):
    from rekall.interval_set_3d_utils import P
    # Frame_IS<(Scale, Face_IS, Pose_IS)>
    scale, frames = payload
    def face_to_object(face):
        return {
            'type': 'bbox',
            'bbox_x1': face.x[0], 'bbox_x2': face.x[1],
            'bbox_y1': face.y[0], 'bbox_y2': face.y[1]
        }
    
    def pose_to_object(pose):
        from esper.stdlib import pose_to_dict
        return pose_to_dict(pose.payload['pose'])
    
    def face_objects_at_scale(faces, scale):
        faces = faces.filter(P(lambda p:p==scale))
        def update(acc, face):
            acc.append(face_to_object(face))
            return acc
        return faces.fold(update, [])
    
    def pose_objects_at_scale(poses, scale):
        poses = poses.filter(P(lambda p:p['scale']==scale))
        def update(acc, pose):
            acc.append(pose_to_object(pose))
            return acc
        return poses.fold(update, [])
    
    frames = frames.filter(P(lambda p:p[0]==scale))
    def update(acc, frame):
        _, faces, poses = frame.payload
        return acc + face_objects_at_scale(faces, scale) + pose_objects_at_scale(poses, scale)
    return frames.fold(update, [])

answer = run_shot_scale()
print("Query finished. Preparing VGrid.")
display_result(convert_to_1d_collection(answer.map_payload(payload_to_vgrid_objects)), display_payload=True)


# Scratchpad

In [None]:
answer.filter(lambda i:i.t[0]==120401).get_allintervals()[380].get_intervals()[0].payload.map_payload(lambda char: char.fold(lambda acc, c: acc+[c.payload[0]], []))

In [None]:
answer.get_allintervals()[32].get_intervals()[0].payload[0].get_intervals()[0].payload.get_intervals()

In [None]:
def payload_to_vgrid_objects(payload):
    from rekall.interval_set_3d_utils import P
    # Frame_IS<(Scale, Face_IS, Pose_IS)>
    scale, frames = payload
    def face_to_object(face):
        return {
            'type': 'bbox',
            'bbox_x1': face.x[0], 'bbox_x2': face.x[1],
            'bbox_y1': face.y[0], 'bbox_y2': face.y[1]
        }
    
    def pose_to_object(pose):
        from esper.stdlib import pose_to_dict
        return pose_to_dict(pose.payload['pose'])
    
    def face_objects_at_scale(faces, scale):
        faces = faces.filter(P(lambda p:p==scale))
        def update(acc, face):
            acc.append(face_to_object(face))
            return acc
        return faces.fold(update, [])
    
    def pose_objects_at_scale(poses, scale):
        poses = poses.filter(P(lambda p:p['scale']==scale))
        def update(acc, pose):
            acc.append(pose_to_object(pose))
            return acc
        return poses.fold(update, [])
    
    frames = frames.filter(P(lambda p:p[0]==scale))
    def update(acc, frame):
        _, faces, poses = frame.payload
        return acc + face_objects_at_scale(faces, scale) + pose_objects_at_scale(poses, scale)
    return frames.fold(update, [])

display_result(convert_to_1d_collection(answer.map_payload(payload_to_vgrid_objects)), display_payload=True)

In [None]:
qs=Face.objects.all()[:3]
[q.id for q in qs]

In [None]:
import esper.face_landmarks_wrapper as flw

In [None]:
flw.get_from_face_ids([3,5,6])

In [None]:
from django.db.models import Count
from query.models import Face
Face.objects.values("frame__video_id").annotate(num_faces=Count('id')).order_by('num_faces')

In [None]:
from query.models import Frame, Video
v = Video.objects.get(id=1)
Frame.objects.filter(video=v, number=v.num_frames-1)

In [None]:
from esper.shot_scale import ShotScale as ShotScaleEnum
from rekall.interval_set_3d_utils import P
display_result(convert_to_1d_collection(answer.filter(P(lambda p: p==ShotScaleEnum.EXTREME_LONG))))

In [None]:
display_result(convert_to_1d_collection(answer.map_payload(payload_to_vgrid_objects, profile=True, parallel=True)), display_payload=True)