In [None]:
from esper.prelude import *
from esper.rekall import intrvllists_to_result_with_objects, bbox_to_result_object
from esper.stdlib import face_landmarks_to_dict
def two_faces_up_close():
    from query.models import Face
    from rekall.video_interval_collection import VideoIntervalCollection
    from rekall.parsers import in_array, bbox_payload_parser, merge_dict_parsers, dict_payload_parser
    from rekall.merge_ops import payload_plus
    from rekall.payload_predicates import payload_satisfies
    from rekall.spatial_predicates import scene_graph
    from rekall.temporal_predicates import overlaps
    from rekall.face_landmark_predicates import looking_left, looking_right
    from rekall.bbox_predicates import height_at_least, same_height
    import esper.face_landmarks_wrapper as flw
    from esper.captions import get_all_segments
    
    MAX_MOUTH_DIFF = 0.12
    MIN_FACE_CONFIDENCE = 0.8
    MIN_FACE_HEIGHT = 0.4
    MAX_FACE_HEIGHT_DIFF = 0.1
    MIN_FACE_OVERLAP_X = 0.05
    MIN_FACE_OVERLAP_Y = 0.2
    MAX_FACE_OVERLAP_X_FRACTION = 0.7
    MIN_FACE_ANGLE = 0.2
    
    def map_payload(func):
        def map_fn(intvl):
            intvl.payload = func(intvl.payload)
            return intvl
        return map_fn
    
    def get_landmarks(faces):
        ids = [face['id'] for face in faces]
        landmarks = flw.get(Face.objects.filter(id__in=ids))
        for face, landmark in zip(faces, landmarks):
            face['landmarks'] = landmark
        return faces

    # Annotate face rows with start and end frames and the video ID
    faces_qs = Face.objects.filter(probability__gte=MIN_FACE_CONFIDENCE, frame__video_id__gte=0, frame__video_id__lte=72).annotate(
        min_frame=F('frame__number'),
        max_frame=F('frame__number'),
        height = F('bbox_y2')-F('bbox_y1'),
        video_id=F('frame__video_id')).filter(height__gte=MIN_FACE_HEIGHT)

    faces = VideoIntervalCollection.from_django_qs(
        faces_qs,
        with_payload=in_array(merge_dict_parsers([
            bbox_payload_parser(VideoIntervalCollection.django_accessor),
            dict_payload_parser(VideoIntervalCollection.django_accessor, {'id': 'id'})
        ]))
    ).coalesce(payload_merge_op=payload_plus)

    graph = {
        'nodes': [
            { 'name': 'face_left', 'predicates': [] },
            { 'name': 'face_right', 'predicates': [] },
        ],
        'edges': [
            {'start': 'face_left', 'end':'face_right', 'predicates': [
                lambda f1, f2: f1['x2'] < f2['x2'] and f1['x1']<f2['x1'], # Left face on the left
                lambda f1, f2: f1['x2'] - f2['x1'] > MIN_FACE_OVERLAP_X, # Faces overlap
                lambda f1, f2: min(f1['y2'], f2['y2'])-max(f1['y1'], f1['y1']) > MIN_FACE_OVERLAP_Y,
                lambda f1, f2: f1['y2'] > f2['y1'] and f1['y1'] < f2['y2'],  # No face is entirely above another
                same_height(MAX_FACE_HEIGHT_DIFF),
                lambda f1, f2: (f1['x2']-f2['x1'])/max(f1['x2']-f1['x1'], f2['x2']-f2['x1']) < MAX_FACE_OVERLAP_X_FRACTION
            ]},
        ]
    }
    
    def mouths_are_close(lm1, lm2):
        select_outer=[2,3,4,8,9,10]
        select_inner=[1,2,3,5,6,7]
        mouth1 = np.concatenate((lm1.outer_lips()[select_outer], lm1.inner_lips()[select_inner]))
        mouth2 = np.concatenate((lm2.outer_lips()[select_outer], lm2.inner_lips()[select_inner]))
        mean1 = np.mean(mouth1, axis=0)
        mean2 = np.mean(mouth2, axis=0)
        return np.linalg.norm(mean1-mean2) <= MAX_MOUTH_DIFF
    
    # Face is profile if both eyes are on the same side of the nose bridge horizontally.
    def is_left_profile(f):
        lm = f['landmarks']
        nose_x = min(lm.nose_bridge()[:,0])
        left = np.all(lm.left_eye()[:,0] >= nose_x)
        right = np.all(lm.right_eye()[:,0] >= nose_x)
        return left and right
    def is_right_profile(f):
        lm = f['landmarks']
        nose_x = max(lm.nose_bridge()[:,0])
        left = np.all(lm.left_eye()[:,0] <= nose_x)
        right = np.all(lm.right_eye()[:,0] <= nose_x)
        return left and right
    
    # Line is ax+by+c=0
    def project_point_to_line(pt, a, b, c):
        x0,y0=pt[0], pt[1]
        d=a*a+b*b
        x=(b*(b*x0-a*y0)-a*c)/d
        y=(a*(-b*x0+a*y0)-b*c)/d
        return np.array([x,y])
    
    # Positive if facing right
    def signed_face_angle(lm):
        center_line_indices = [27,28,32,33,34,51,62,66,57]
        data = lm.landmarks[center_line_indices]
        fit = np.polyfit(data[:,0], data[:,1], 1)
        # y = ax+b
        a,b = fit[0], fit[1]
        A = project_point_to_line(lm.landmarks[center_line_indices[0]], a,-1,b)
        B = project_point_to_line(lm.landmarks[center_line_indices[-1]], a,-1,b)
        AB = B-A
        AB = AB / np.linalg.norm(AB)
        C = np.mean(lm.nose_bridge()[2:4], axis=0)
        AC = C-A
        AC = AC / np.linalg.norm(AC)
        return np.cross(AB, AC)

        
    graph2 = {
        'nodes': [
            {'name': 'left', 'predicates': [
                lambda f: signed_face_angle(f['landmarks']) > MIN_FACE_ANGLE
#                 is_right_profile
            ]},
            {'name': 'right', 'predicates': [
                lambda f: signed_face_angle(f['landmarks']) < -MIN_FACE_ANGLE
#                 is_left_profile
            ]},
        ],
        'edges': [
            {'start': 'left', 'end':'right', 'predicates':[
                lambda l, r: mouths_are_close(l['landmarks'], r['landmarks']),
            ]}
        ]
    }

    mf_up_close = faces.filter(payload_satisfies(
        scene_graph(graph, exact=True))).map(map_payload(get_landmarks)).filter(
        payload_satisfies(scene_graph(graph2, exact=True)))
    vids = mf_up_close.get_allintervals().keys()
    # Merge with shots
    shots_qs = Shot.objects.filter(
        video_id__in = vids,
        labeler=Labeler.objects.get(name='shot-hsvhist-face')
    ).all()
    total = shots_qs.count()
    print("Total shots:", total)
    # use emtpy list as payload
    shots = VideoIntervalCollection.from_django_qs(
        shots_qs,
        with_payload=lambda row:[],
        progress=True,
        total=total
    )
    kissing_shots = mf_up_close.join(
      shots,
      lambda kiss, shot: [(kiss.get_start(), shot.get_end(), kiss.get_payload())],
      predicate=overlaps(),
      working_window=1
    ).coalesce()
    
    # Getting faces in the shot
    def wrap_in_list(intvl):
        intvl.payload = [intvl.payload]
        return intvl
    
    print("Getting faces...")
    faces_qs2 = Face.objects.filter(frame__video_id__in=vids,probability__gte=MIN_FACE_CONFIDENCE)
    total = faces_qs2.count()
    faces2 = VideoIntervalCollection.from_django_qs(
        faces_qs2.annotate(
            min_frame=F('frame__number'),
            max_frame=F('frame__number'),
            video_id=F('frame__video_id')
        ),
        with_payload=in_array(merge_dict_parsers([
            bbox_payload_parser(VideoIntervalCollection.django_accessor),
            dict_payload_parser(VideoIntervalCollection.django_accessor, {'frame': 'min_frame'})
        ])),
        progress=True,
        total = total
    ).coalesce(payload_merge_op=payload_plus).map(wrap_in_list)
    
    def clip_to_last_frame_with_two(intvl):
        faces = intvl.get_payload()[1]
        two_faces = [(f[0], f[1]) for f in faces if len(f)==2]
        two_high_faces = [(a, b) for a, b in two_faces if min(a['y2']-a['y1'],b['y2']-b['y1'])>=MIN_FACE_HEIGHT]
        frame = [a['frame'] for a,b in two_high_faces]
        
        if len(frame) > 0:
            intvl.end = frame[-1]
        return intvl
    
    clipped_kissing_shots = kissing_shots.merge(
        faces2,
        payload_merge_op = lambda p1, p2: (p1, p2),
        predicate=overlaps(),
        working_window=1
    ).coalesce(payload_merge_op=lambda p1, p2: (p1[0], p1[1]+p2[1])).map(
        clip_to_last_frame_with_two).filter_length(min_length=12)
    
    results = get_all_segments(vids)
    fps_map = dict((i, Video.objects.get(id=i).fps) for i in vids)
    caption_results = VideoIntervalCollection({
        video_id: [(
            word[0] * fps_map[video_id], # start frame
            word[1] * fps_map[video_id], # end frame
            word[2]) # payload is the word
            for word in words]
        for video_id, words in results
    })
    kissing_without_words = clipped_kissing_shots.minus(
            caption_results)
    kissing_final = kissing_without_words.map(
            lambda intvl: (int(intvl.start),
                int(intvl.end), intvl.payload)
            ).coalesce().filter_length(min_length=12)
    
    return kissing_final

def payload_to_objects(p, video_id):
    return [face_landmarks_to_dict(face['landmarks']) for face in p[0]] + [
                   bbox_to_result_object(face, video_id) for face in p[0]]

intervals = two_faces_up_close()
esper_widget(intrvllists_to_result_with_objects(intervals.get_allintervals(),
                lambda p, video_id: payload_to_objects(p, video_id), stride=1))

# Validation

In [None]:
# dict from video_id to list of frames that I would consider a kiss
ground_truth_dict = dict([
    (1, [(11820, 11897)]),
    (9, [(54540, 54636)]),
    (24, [(97476, 97596), (142176, 142224)]),
    (28, [(205020, 205073)]),
    (32, [(133080, 133155)]),
    (33, [(19128, 19200), (19296, 19440)]),
    (43, [(138948, 139026)]),
    (47, [(3204,3324), (55824, 55860)]),
    (55, [(112776, 113064), (155604, 155669)]),
    (59, [(43572, 43680), (93384, 93408), (126672, 126732), (192984, 193056)]),
    (62, [(18492, 18516), (18851,18936), (19392, 19447)]),
    (65, [(121331,121652)]),
    (66, [(171408, 171672)]),
    (70, [(140604, 140640)]),
    (72, [(115428, 115716), (135436, 135600), (138780,138793)])
])

from esper.prelude import *
from rekall.interval_list import IntervalList, Interval
from rekall.video_interval_collection import VideoIntervalCollection
import esper.face_landmarks_wrapper as flw
import esper.pose_wrapper as pw
from esper.rekall import intrvllists_to_result_with_objects, bbox_to_result_object
from rekall.temporal_predicates import overlaps
from esper.stdlib import pose_to_dict, face_landmarks_to_dict

def find_faces_and_poses(vid, frames):
    result = {}
    qs = Face.objects.select_related('frame').filter(frame__video_id=vid, frame__number__in=frames)
    pqs = PoseMeta.objects.select_related('frame').filter(frame__video_id=vid, frame__number__in=frames)
    if qs.count() > 0:
        lms = flw.get(qs)
        for face, lm in zip(qs, lms):
            frame = face.frame.number
            if frame not in result:
                result[frame] = {}
            if 'face' not in result[frame]:
                result[frame]['face'] = []
            result[frame]['face'].append({
                'x1': face.bbox_x1,
                'x2': face.bbox_x2,
                'y1': face.bbox_y1,
                'y2': face.bbox_y2,
                'landmarks': lm
            })
    if pqs.count() > 0:
        poses = pw.get(pqs)
        for pose, meta in zip(poses, pqs):
            frame = meta.frame.number
            if frame not in result:
                result[frame] = {}
            if 'pose' not in result[frame]:
                result[frame]['pose'] = []
            result[frame]['pose'].append(pose)
    for frame in result:
        if 'face' not in result[frame]:
            result[frame]['face'] = []
        if 'pose' not in result[frame]:
            result[frame]['pose'] = []
    return result

def find_all_faces_and_poses(intvl):
    vid = intvl.payload
    frames = list(range(intvl.start, intvl.end))
    intvl.payload = find_faces_and_poses(vid, frames)
    return intvl

    
def add_face_and_pose(intvl):
    vid = intvl.payload
    intvl.payload = {}
    qs = Face.objects.filter(frame__video_id=vid, frame__number=intvl.start)
    pqs = PoseMeta.objects.filter(frame__video_id=vid, frame__number=intvl.start)
    if qs.count() > 0:
        lms = flw.get(qs)
        intvl.payload['face'] = [{
            'x1': face.bbox_x1,
            'x2': face.bbox_x2,
            'y1': face.bbox_y1,
            'y2': face.bbox_y2,
            'landmarks': lm
        } for face,lm in zip(qs, lms)]
    else:
        intvl.payload['face'] = []
    if pqs.count() > 0:
        intvl.payload['pose'] = pw.get(pqs)        
    else:
        intvl.payload['pose'] = []
    if len(intvl.payload['face']) == 0 and len(intvl.payload['pose'])==0:
        intvl.payload=None
    return intvl

def get_per_frame_ground_truth_shots(lax=6):
    shots_qs = Shot.objects.filter(
        video_id__in=ground_truth_dict.keys(),
        labeler=Labeler.objects.get(name='shot-hsvhist-face'))

    shots = VideoIntervalCollection.from_django_qs(shots_qs, with_payload=lambda row: row.video_id)
    ground_truth = VideoIntervalCollection(dict((vid,
                    IntervalList([(t[0], t[1], vid) for t in ts])) for vid, ts in ground_truth_dict.items()))
    shots = shots.merge(ground_truth, predicate=overlaps(), working_window=1).coalesce().map(find_all_faces_and_poses)
    result = {}
    for vid, intvls in shots.get_allintervals().items():
        frames = [(frame, frame, intvl.payload.get(
            frame, {'face':[], 'pose':[]})) for intvl in intvls.intrvls for frame in range(intvl.start-lax, intvl.end+lax+1)
                 if frame < intvl.start or frame > intvl.end or frame in intvl.payload]
        result[vid] = IntervalList(frames)
    return VideoIntervalCollection(result)
    
    
# ground_truth_per_frame = VideoIntervalCollection(dict((vid,
#                     IntervalList([(time, time, vid) for t in ts for time in range(
#                         t[0], t[1]+1)])) for vid, ts in ground_truth_dict.items())).map(
#     add_face_and_pose).filter(lambda intvl: intvl.payload is not None)

ground_truth = VideoIntervalCollection(dict((vid,
                    IntervalList([(t[0], t[1], vid) for t in ts])) for vid, ts in ground_truth_dict.items()))

esper_widget(intrvllists_to_result_with_objects(get_per_frame_ground_truth_shots().get_allintervals(),
    payload_to_objs=lambda p,v:[face_landmarks_to_dict(face['landmarks']) for face in p['face']] + [
                   bbox_to_result_object(face, v) for face in p['face']] + [
                    pose_to_dict(pose) for pose in p['pose']
    ]))

In [None]:
# Returns precision, recall, precision_per_item, recall_per_item
def compute_statistics(query_intrvllists, ground_truth_intrvllists):
    from rekall.temporal_predicates import overlaps
    total_query_time = 0
    total_query_segments = 0
    total_ground_truth_time = 0
    total_ground_truth_segments = 0
    
    for video in query_intrvllists:
        total_query_time += query_intrvllists[video].coalesce().get_total_time()
        total_query_segments += query_intrvllists[video].size()
    for video in ground_truth_intrvllists:
        total_ground_truth_time += ground_truth_intrvllists[video].coalesce().get_total_time()
        total_ground_truth_segments += ground_truth_intrvllists[video].size()
        
    total_overlap_time = 0
    overlapping_query_segments = 0
    overlapping_ground_truth_segments = 0
    
    for video in query_intrvllists:
        if video in ground_truth_intrvllists:
            query_list = query_intrvllists[video]
            gt_list = ground_truth_intrvllists[video]
            
            total_overlap_time += query_list.overlaps(gt_list).coalesce().get_total_time()
            overlapping_query_segments += query_list.filter_against(gt_list, predicate=overlaps()).size()
            overlapping_ground_truth_segments += gt_list.filter_against(query_list, predicate=overlaps()).size()
    
    if total_query_time == 0:
        precision = 1.0
        precision_per_item = 1.0
    else:
        precision = total_overlap_time / total_query_time
        precision_per_item = overlapping_query_segments / total_query_segments
    
    if total_ground_truth_time == 0:
        recall = 1.0
        recall_per_item = 1.0
    else:
        recall = total_overlap_time / total_ground_truth_time
        recall_per_item = overlapping_ground_truth_segments / total_ground_truth_segments
    
    return precision, recall, precision_per_item, recall_per_item

result = compute_statistics(intervals.get_allintervals(), ground_truth.get_allintervals())
print(result, 2/(1/result[0]+1/result[1]))

# Shots with overlapping faces

In [None]:
# payloads are:
# faces: list of {frame_number, list of faces}
# poses: list of {frame_number, list of poses}
# vid: video_id
# overlap: frame numbers with overlapping face ids
def shots_with_overlapping_faces():
    MIN_FACE_CONFIDENCE = 0.8
    MIN_FACE_HEIGHT = 0.4
    MIN_FACE_OVERLAP_X = 0.05
    MIN_FACE_OVERLAP_Y = 0.2
    MAX_FACE_HEIGHT_DIFF=0.1
    MAX_FACE_OVERLAP_X_FRACTION = 0.7
    
    from rekall.parsers import in_array, merge_dict_parsers, bbox_payload_parser, dict_payload_parser
    from rekall.merge_ops import payload_first, payload_plus, merge_named_payload
    from rekall.payload_predicates import payload_satisfies
    from rekall.spatial_predicates import scene_graph
    
    # Annotate face rows with start and end frames and the video ID
    faces_qs = Face.objects.filter(
        frame__regularly_sampled=True,
        probability__gte=MIN_FACE_CONFIDENCE, frame__video_id__gte=0, frame__video_id__lte=72
    ).annotate(
        min_frame=F('frame__number'),
        max_frame=F('frame__number'),
        height = F('bbox_y2') - F('bbox_y1'),
        video_id=F('frame__video_id')
    )

    # payload: list of faces
    faces = VideoIntervalCollection.from_django_qs(
        faces_qs.filter(height__gte=MIN_FACE_HEIGHT),
        with_payload=in_array(merge_dict_parsers([
            bbox_payload_parser(VideoIntervalCollection.django_accessor),
            dict_payload_parser(VideoIntervalCollection.django_accessor, {'id': 'id'})
        ]))
    ).coalesce(payload_merge_op=payload_plus)

    graph = {
        'nodes': [
            { 'name': 'face_left', 'predicates': [] },
            { 'name': 'face_right', 'predicates': [] },
        ],
        'edges': [
            {'start': 'face_left', 'end':'face_right', 'predicates': [
                lambda f1, f2: f1['x2'] < f2['x2'] and f1['x1']<f2['x1'], # Left face on the left
                lambda f1, f2: f1['x2'] - f2['x1'] > MIN_FACE_OVERLAP_X, # Faces overlap
                lambda f1, f2: (f1['x2'] - f2['x1']) / max(f1['x2']-f1['x1'], f2['x2']-f2['x1']) < MAX_FACE_OVERLAP_X_FRACTION,
                lambda f1, f2: min(f1['y2'], f2['y2'])-max(f1['y1'], f1['y1']) > MIN_FACE_OVERLAP_Y,
                lambda f1, f2: abs(f2['y2']-f2['y1']-f1['y2']+f1['y1']) <= MAX_FACE_HEIGHT_DIFF,
                lambda f1, f2: f1['x1'] < 0.5 and f2['x2'] > 0.5, # boxes should not all be on one side of the screen.
            ]},
        ]
    }
    
    def get_two_face_ids(intvl):
        faces = intvl.payload
        assert(len(faces) == 2)
        f1, f2 = faces[0], faces[1]
        if f1['x2'] >= f2['x2']:
            f1, f2 = f2, f1
        intvl.payload = (f1['id'], f2['id'], intvl.start)
        return intvl
    
    # payload: list of overlapping face_ids, frame_number tuples
    overlapping_faces = faces.filter(payload_satisfies(scene_graph(graph, exact=True))).map(get_two_face_ids)
    vids = overlapping_faces.get_allintervals().keys()
    # Merge with shots
    shots_qs = Shot.objects.filter(
        video_id__in = vids,
        labeler=Labeler.objects.get(name='shot-hsvhist-face')
    ).all()
    total = shots_qs.count()
    print("Total shots:", total)
    # payload: vid
    shots = VideoIntervalCollection.from_django_qs(
        shots_qs,
        with_payload=lambda row:row.video_id,
        progress=True,
        total=total
    )
    
    # Get shots with overlapping faces
    # payload: vid and overlap
    overlapped_shots = overlapping_faces.merge(
      shots,
      payload_merge_op = lambda face_id_and_frame, vid: {
          'vid': vid,
          'overlap': [face_id_and_frame]
      },
      predicate=overlaps(),
      working_window=1
    ).coalesce(payload_merge_op=merge_named_payload({
        'vid': payload_first,
        'overlap': payload_plus,
    }))
    
    # Get all faces
    print("Adding all faces")
    def add_frame_number(intvl):
        faces = intvl.payload
        intvl.payload = {
            "frame": intvl.start,
            "faces": faces
        }
        return intvl
    # payload: frame, list of faces
    faces = VideoIntervalCollection.from_django_qs(
        faces_qs.filter(frame__video_id__in=vids),
        with_payload=in_array(merge_dict_parsers([
            bbox_payload_parser(VideoIntervalCollection.django_accessor),
            dict_payload_parser(VideoIntervalCollection.django_accessor, {'id': 'id'})
        ]))
    ).coalesce(payload_merge_op=payload_plus).map(add_frame_number)
    
    # payload: vid, overlap, faces: list of "frame number and faces", poses: []
    overlapped_shots_with_faces = overlapped_shots.merge(
        faces,
        payload_merge_op = lambda vid_and_overlap, frame_and_faces: {
            'vid': vid_and_overlap['vid'],
            'overlap': vid_and_overlap['overlap'],
            'faces': [frame_and_faces],
            'poses': [],
        },
        predicate=overlaps(),
        working_window=1
    ).coalesce(payload_merge_op=merge_named_payload({
        'vid': payload_first,
        'overlap': payload_first,
        'faces': payload_plus,
        'poses': payload_plus
    }))
    
    print("Adding poses")
    poses_qs = PoseMeta.objects.filter(
        frame__regularly_sampled=True,
        frame__video_id__in=vids).annotate(
        min_frame=F("frame__number"),
        max_frame=F('frame__number'),
        video_id=F('frame__video_id'))
    def add_frame_number_for_pose(intvl):
        poses = intvl.payload
        intvl.payload = {
            "frame": intvl.start,
            "poses": poses
        }
        return intvl
    # payload: frame, list of poses
    poses = VideoIntervalCollection.from_django_qs(
        poses_qs,
        with_payload=lambda row: [row]
    ).coalesce(payload_merge_op=payload_plus).map(add_frame_number_for_pose)
    
    def merge_poses_into_dict(d, poses):
        ret = d.copy()
        ret['poses'] = [poses]
        return ret
    # payload: vid, overlap, frames: list of "frame number and faces and poses"
    overlapped_shots_with_faces_and_poses = overlapped_shots_with_faces.merge(
        poses,
        payload_merge_op = merge_poses_into_dict,
        predicate=overlaps(),
        working_window=1
    ).coalesce(payload_merge_op=merge_named_payload({
        'vid': payload_first,
        'overlap': payload_first,
        'faces': payload_first,
        'poses': payload_plus
    })).set_union(overlapped_shots_with_faces).coalesce(payload_merge_op=merge_named_payload({
        'vid': payload_first,
        'overlap': payload_first,
        'faces': payload_first,
        'poses': lambda p1, p2: p1 if len(p1)>len(p2) else p2
    }))

    
    return overlapped_shots_with_faces_and_poses

def payload_to_objs_for_shots_with_overlapping_faces(payload, vid):
    ret = []
    _, _, frame = payload['overlap'][0]
    for f in payload['faces']:
        if f['frame'] == frame:
            ret += [bbox_to_result_object(face,vid) for face in f['faces']]
    return ret

# esper_widget(intrvllists_to_result_with_objects(shots_with_overlapping_faces().get_allintervals(),
#                     payload_to_objs_for_shots_with_overlapping_faces),
#              show_middle_frame=False, disable_captions=True)
collection = shots_with_overlapping_faces()
result = compute_statistics(collection.get_allintervals(), ground_truth.get_allintervals())
print(result, 2/(1/result[0]+1/result[1]))

In [None]:
def display_non_recalled(collection):
    return esper_widget(intrvllists_to_result_with_objects(ground_truth.minus(collection).get_allintervals(),
                                               lambda p,v:[]), show_middle_frame=False, disable_captions=True)
display_non_recalled(collection)

In [None]:
def display_vid(collection, vid):
    return esper_widget(intrvllists_to_result_with_objects({vid: collection.get_intervallist(vid)},
                                               lambda p,v:[]), show_middle_frame=False, disable_captions=True)

# Shots with overlapping faces and opposing faces

In [None]:
def filter_collection_with_opposing_faces(collection):
    import esper.face_landmarks_wrapper as flw
    import esper.pose_wrapper as pw
    from rekall.payload_predicates import payload_satisfies
    
    MIN_FACE_ANGLE = 0.2
    MIN_FACE_HEIGHT = 0.3
    MIN_FACE_OVERLAP_Y = 0.2
    MAX_FACE_DIST_X = 0

    # Line is ax+by+c=0
    def project_point_to_line(pt, a, b, c):
        x0,y0=pt[0], pt[1]
        d=a*a+b*b
        x=(b*(b*x0-a*y0)-a*c)/d
        y=(a*(-b*x0+a*y0)-b*c)/d
        return np.array([x,y])
    
    # Returns (a,b,c) which defines ax+by+c=0
    def find_best_line_fit(xs, ys):
        fit1 = np.polyfit(xs, ys, 1)
        error1 = np.sum((np.poly1d(fit1)(xs)-ys)**2)
        fit2 = np.polyfit(ys, xs, 1)
        error2 = np.sum((np.poly1d(fit2)(ys)-xs)**2)
        if error1 < error2:
            # fit1[0]x+fit1[1]=y
            return fit1[0], -1, fit1[1]
        # fit2[0]y+fit2[1]=x
        return -1, fit2[0], fit2[1]
    
    # Positive if facing left
    def signed_face_angle(lm):
        center_line_indices = [27,28, 32, 33,34, 51,62,66,57]
        data = lm.landmarks[center_line_indices]
        a, b, c = find_best_line_fit(data[:,0], data[:,1])
        A = project_point_to_line(lm.landmarks[center_line_indices[0]], a, b, c)
        B = project_point_to_line(lm.landmarks[center_line_indices[-1]], a, b, c)
        AB = B-A
        AB = AB / np.linalg.norm(AB)
        C = np.mean(lm.nose_bridge()[2:4], axis=0)
        AC = C-A
        AC = AC / np.linalg.norm(AC)
        return np.cross(AB, AC)
    
    def map_payload(func):
        def fn(intvl):
            intvl.payload = func(intvl.payload)
            return intvl
        return fn        
    
    # face_pair: a list of (left_face, right_face, frame).
    # opposing_face_frames: a list of frames
    def add_face_pairs_to_payload(p):
        # return left_face, right_face
        def get_face_pair(fs):
            def height(f):
                return f['y2']-f['y1']
            fs = [f for f in fs if height(f) > MIN_FACE_HEIGHT]
            if len(fs) != 2:
                return None, None
            f1, f2 = fs[0], fs[1]
            if f2['x2']<f1['x2'] and f2['x1'] < f1['x1']:
                f1, f2 = f2, f1
            if f1['x2']<f2['x2'] and f1['x1']<f2['x1'] and min(
                f1['y2'],f2['y2'])-max(f1['y1'],f2['y1']) > MIN_FACE_OVERLAP_Y and (
                f2['x1']-f1['x2']<MAX_FACE_DIST_X):
                return f1, f2
            return None, None
                   
        faces = p['faces']
        p['face_pairs'] = []
        p['opposing_face_frames'] = []
        overlapped_frames = dict((val[2], (val[0], val[1])) for val in p['overlap'])
        fids = []
        for frame_and_faces in faces:
            frame = frame_and_faces['frame']
            if frame in overlapped_frames:
                ids = overlapped_frames[frame]
                fs = [face for face in frame_and_faces['faces'] if face['id'] in ids]
            else:
                fs = frame_and_faces['faces']
            left, right = get_face_pair(fs)
            if left is not None and right is not None:
                p['face_pairs'].append((left, right, frame))
                fids += [left['id'], right['id']]
        landmarks = flw.get_from_face_ids(fids)
        index = 0
        for left, right, frame in p['face_pairs']:
            left['landmarks'] = landmarks[index]
            right['landmarks'] = landmarks[index+1]
            if signed_face_angle(left['landmarks'])  < -MIN_FACE_ANGLE and (
               signed_face_angle(right['landmarks']) > MIN_FACE_ANGLE):
                p['opposing_face_frames'].append(frame)
            index += 2
       
        return p
    
    def oppose_pose(p1, p2):
        # First use shoulder vectors
        # if shoulder vectors are opposite, then they are opposing
        from esper.pose_wrapper import PoseWrapper
        def get_vector(pts, l, r):
            if np.all(pts[[l,r],2]>0):
                return pts[r, :2] - pts[l, :2]
            return None
        pts1 = p1.pose_keypoints()
        pts2 = p2.pose_keypoints()
        l, r = PoseWrapper.LShoulder, PoseWrapper.RShoulder
        v1 = get_vector(pts1, l, r)
        v2 = get_vector(pts2, l, r)
        if v1 is not None and v2 is not None and np.dot(v1, v2) < 0:
            return True
        return False
    
    # pose_pairs: list of (pose1, pose2, frame)
    # opposing_pose_frames: list of frames
    def add_pose_pairs_to_payload(p):
        poses = p['poses']
        pids = []
        pose_frames = []
        p['opposing_pose_frames'] = []
        for frame_and_poses in poses:
            frame = frame_and_poses['frame']
            ps = frame_and_poses['poses']
            if len(ps) == 2:
                pose_frames.append(frame)
                pids += [ps[0], ps[1]]
        pws = pw.get(pids)
        index = 0
        p['pose_pairs'] = []
        for frame in pose_frames:
            p['pose_pairs'].append((pws[index], pws[index+1], frame))
            if oppose_pose(pws[index], pws[index+1]):
                p['opposing_pose_frames'].append(frame)
            index += 2
        return p      
    
    def has_opposing_face(p):
        return len(p['opposing_face_frames']) > 0
    
    def has_opposing_pose(p):
        return len(p['opposing_pose_frames'])>0
    
    # payload: vid, overlap, faces, poses, face_pairs
    return collection.map(map_payload(add_face_pairs_to_payload)).map(
        map_payload(add_pose_pairs_to_payload)).filter(payload_satisfies(
        lambda p: has_opposing_face(p) or has_opposing_pose(p)))

collection_with_opposing_faces = filter_collection_with_opposing_faces(collection)
result = compute_statistics(collection_with_opposing_faces.get_allintervals(), ground_truth.get_allintervals())
print(result, 2/(1/result[0]+1/result[1]))              
        

In [None]:
display_non_recalled(collection_with_opposing_faces)

In [None]:
def payload_to_opposing_landmarks_objs(p, v):
    from esper.stdlib import face_landmarks_to_dict
    from esper.rekall import bbox_to_result_object
    if len(p['opposing_face_frames']) == 0:
        return []
    f = p['opposing_face_frames'][0]
    objs = []
    for left, right, frame in p['face_pairs']:
        if f == frame:
            objs += [face_landmarks_to_dict(left['landmarks']), face_landmarks_to_dict(right['landmarks'])]
            objs += [bbox_to_result_object(left, p['vid']), bbox_to_result_object(right, p['vid'])]
    return objs

def payload_to_overlapping_face_objs(p, v):
    def add_color(o):
        o['gender_id'] = 1
        return o
    
    i1, i2, frame = p['overlap'][0]
    objs = []
    for faces in p['faces']:
        if faces['frame'] == frame:
            for f in faces['faces']:
                if f['id'] in [i1, i2]:
                    objs.append(add_color(bbox_to_result_object(f, v)))
    return objs

def payload_to_opposing_pose_objs(p,v):
    from esper.stdlib import pose_to_dict
    if len(p['opposing_pose_frames']) == 0:
        return []
    f = p['opposing_pose_frames'][0]
    objs = []
    for p1, p2, frame in p['pose_pairs']:
        if f == frame:
            objs += [pose_to_dict(p1), pose_to_dict(p2)]
    return objs

def payload_to_objects_for_collection_with_opposing_people(p,v):
    return payload_to_opposing_landmarks_objs(p,v)+payload_to_opposing_pose_objs(p,v)+payload_to_overlapping_face_objs(p,v)

esper_widget(intrvllists_to_result_with_objects(collection_with_opposing_faces.get_allintervals(),
        payload_to_objects_for_collection_with_opposing_people), show_middle_frame=False, disable_captions=True)

# Shots with opposing people close to overlapped faces

In [None]:
def filter_collection_with_people_close_to_overlap(collection):
    MAX_FRAME_DIFF = 12
    MAX_FACE_DIST = 0.05
    MAX_FACE_SHOULDER_DEV = 0.4
    
    from rekall.payload_predicates import payload_satisfies
    # payload: overlap, faces, poses, vid, face_pairs, pose_pairs, opposing_face_frames, opposing_pose_frames
    
    def get_bbox_corners(box):
        return np.array([
            [box['x1'],box['y1']],
            [box['x1'],box['y2']],
            [box['x2'],box['y2']],
            [box['x2'],box['y1']],
        ])
    def get_bbox_center(box):
        return np.array([(box['x1']+box['x2'])/2, (box['y1']+box['y2'])/2])
    def get_neck(p):
        pts = p.pose_keypoints()
        if pts[p.NECK,2]>0:
            return pts[p.NECK, :2]
        if pts[p.LShoulder, 2] == 0 or pts[p.RShoulder, 2] ==0:
            return None
        return np.mean(pts[[p.LShoulder, p.RShoulder],:2], axis=0)
    def check_face_dist(f1, f2, f3, f4):
        d13 = np.linalg.norm(get_bbox_center(f1)-get_bbox_center(f3))
        d23 = np.linalg.norm(get_bbox_center(f2)-get_bbox_center(f3))
        d14 = np.linalg.norm(get_bbox_center(f1)-get_bbox_center(f4))
        d24 = np.linalg.norm(get_bbox_center(f2)-get_bbox_center(f4))
        return (d13 <= MAX_FACE_DIST and d24 <= MAX_FACE_DIST) or (
                d14 <= MAX_FACE_DIST and d23 <= MAX_FACE_DIST)
    
    # Line is ax+by+c=0
    def project_point_to_line(pt, a, b, c):
        x0,y0=pt[0], pt[1]
        d=a*a+b*b
        x=(b*(b*x0-a*y0)-a*c)/d
        y=(a*(-b*x0+a*y0)-b*c)/d
        return np.array([x,y])
    # line parameterized by ax+by+c=0
    def get_line(p1, p2):
        if p1[0]==p2[0]:
            return (1, 0, -p1[0])
        return p2[1]-p1[1], p1[0]-p2[0], p1[1]*(p2[0]-p1[0])+p1[0]*(p1[1]-p2[1])
    
    def get_face_projection_dist_on_shoulder(f, p):
        cf = get_bbox_center(f)
        pts = p.pose_keypoints()
        a,b,c = get_line(pts[p.LShoulder,:2], pts[p.RShoulder,:2])
        proj = project_point_to_line(cf, a,b,c)
        mid = np.mean(pts[[p.LShoulder, p.RShoulder],:2], axis=0)
        return np.linalg.norm(proj-mid)/np.linalg.norm(pts[p.RShoulder,:2]-mid)
        
    def check_face_pose_dist(f1, f2, p3, p4):
        d13 = get_face_projection_dist_on_shoulder(f1, p3)
        d14 = get_face_projection_dist_on_shoulder(f1, p4)
        d23 = get_face_projection_dist_on_shoulder(f2, p3)
        d24 = get_face_projection_dist_on_shoulder(f2, p4)
        return (d13 <= MAX_FACE_SHOULDER_DEV and d24 <= MAX_FACE_SHOULDER_DEV) or (
                d14 <= MAX_FACE_SHOULDER_DEV and d23 <= MAX_FACE_SHOULDER_DEV)
    # add payload 'close_face_frame_pair', 'close_face_pose_frame_pair'
    def compute_close_to_overlap(intvl):
        p = intvl.payload
        def find_face(faces,i, frame):
            face_list = [fs for fs in faces if fs['frame']==frame][0]['faces']
            return [f for f in face_list if f['id']==i][0]
        def find_pair(pairs, frame):
            return [(left, right) for left, right, f in pairs if f==frame][0]
        
        p['close_face_frame_pair'] = []
        for f in p['opposing_face_frames']:
            for fid1, fid2, f2 in p['overlap']:
                if abs(f-f2)<=MAX_FRAME_DIFF:
                    overlapped1 = find_face(p['faces'], fid1, f2)
                    overlapped2 = find_face(p['faces'], fid2, f2)
                    face1, face2 = find_pair(p['face_pairs'], f)
                    if check_face_dist(overlapped1, overlapped2, face1, face2):
                        p['close_face_frame_pair'].append((f2, f))
                        
        p['close_face_pose_frame_pair'] = []
        for f in p['opposing_pose_frames']:
            for fid1, fid2, f2 in p['overlap']:
                if abs(f-f2)<=MAX_FRAME_DIFF:
                    overlapped1 = find_face(p['faces'], fid1, f2)
                    overlapped2 = find_face(p['faces'], fid2, f2)
                    pose1, pose2 = find_pair(p['pose_pairs'], f)
                    if check_face_pose_dist(overlapped1, overlapped2, pose1, pose2):
                        p['close_face_pose_frame_pair'].append((f2, f))
        return intvl
    
#     def remove_overlap_pairs_under_height(intvl):
#         def find_face(faces,i, frame):
#             face_list = [fs for fs in faces if fs['frame']==frame][0]['faces']
#             return [f for f in face_list if f['id']==i][0]     
#         def height(f):
#             return f['y2']-f['y1']
        
#         p = intvl.payload.copy()
#         os = p['overlap']
        
#         os = [v for v in os if abs(
#             height(find_face(p['faces'], v[0], v[2])) - height(
#                 find_face(p['faces'], v[1],v[2]))) <= MAX_FACE_HEIGHT_DIFF]
#         p['overlap'] = os
#         return (intvl.start, intvl.end, p)  
        
    animation_vids = [v.id for v in Video.objects.filter(genres__name='animation')]
    return collection.map(compute_close_to_overlap).filter(payload_satisfies(
        lambda p: len(p['close_face_frame_pair']) > 0 or len(p['close_face_pose_frame_pair']) > 0)).filter_length(
        min_length=24).filter(payload_satisfies(
        lambda p: p['vid'] not in animation_vids))
collection_with_opposing_faces_close_to_overlap = filter_collection_with_people_close_to_overlap(
    collection_with_opposing_faces)
result = compute_statistics(
    collection_with_opposing_faces_close_to_overlap.get_allintervals(), ground_truth.get_allintervals())
print(result, 2/(1/result[0]+1/result[1]))

In [None]:
display_non_recalled(collection_with_opposing_faces_close_to_overlap)

In [None]:
def payload_to_objects_for_collection_with_opposing_faces_close_to_overlap(p,v):
    def frame_to_face_objs(p, f):
        objs = []
        for left, right, frame in p['face_pairs']:
            if f == frame:
                objs += [face_landmarks_to_dict(left['landmarks']), face_landmarks_to_dict(right['landmarks'])]
                objs += [bbox_to_result_object(left, p['vid']), bbox_to_result_object(right, p['vid'])]
      
        return objs
    def frame_to_pose_objs(p, f):
        objs = []
        for p1, p2, frame in p['pose_pairs']:
            if f == frame:
                objs += [pose_to_dict(p1), pose_to_dict(p2)]
        return objs
        
    def frame_to_overlap_face(p, frame):
        def add_color(o):
            o['gender_id'] = 1
            return o
        i1, i2 = [(i1, i2) for i1, i2, f in p['overlap'] if f==frame][0]
        objs = []
        for faces in p['faces']:
            if faces['frame'] == frame:
                for f in faces['faces']:
                    if f['id'] in [i1, i2]:
                        objs.append(add_color(bbox_to_result_object(f, v)))
        return objs
    
    objs = []
    if len(p['close_face_frame_pair'])>0:
        overlap_frame, landmark_frame = p['close_face_frame_pair'][0]
        objs += frame_to_face_objs(p, landmark_frame)
        objs += frame_to_overlap_face(p,overlap_frame)
        return objs

    if len(p['close_face_pose_frame_pair'])>0:
        overlap_frame, pose_frame = p['close_face_pose_frame_pair'][0]
        objs += frame_to_pose_objs(p, pose_frame)
        objs += frame_to_overlap_face(p, overlap_frame)

    return objs

esper_widget(intrvllists_to_result_with_objects(collection_with_opposing_faces_close_to_overlap.get_allintervals(),
        payload_to_objects_for_collection_with_opposing_faces_close_to_overlap), show_middle_frame=False, disable_captions=True)

# Scratchpad

In [None]:
esper_widget(intrvllists_to_result_with_objects(collection.filter(lambda intvl: intvl.start==132855).get_allintervals(), lambda p,v:[]))

In [None]:
esper_widget(intrvllists_to_result_with_objects({65:collection.get_intervallist(65)}, lambda p,v:[]))

In [None]:
collection.get_intervallist(1).intrvls[0].payload['overlap']

In [None]:
faces = Face.objects.filter(probability__gte=0.8, frame__video_id=404, frame__number=10452).annotate(
        min_frame=F('frame__number'),
        max_frame=F('frame__number'),
        height = F('bbox_y2')-F('bbox_y1'),
        video_id=F('frame__video_id'),
        face_probability=F('probability')).filter(height__gte=0.5)
faces[1].labeler

In [None]:
Face.objects.select_related('frame').distinct('frame').count()

In [None]:
Face.objects.filter(frame__number=74952, frame__video__id=245)

In [None]:
import esper.face_landmarks_wrapper as flw

In [None]:
v=flw.get(Face.objects.filter(frame__number=149532, frame__video__id=156))
ls=[]
for l in v:
    ls.append(l)
    print(l)

In [None]:
ls[0].left_eye(), ls[0].right_eye(), ls[0].nose_bridge()

In [None]:
ls[1].left_eye(), ls[1].right_eye(), ls[1].nose_bridge()

In [None]:
ls[1].left_eyebrow(), ls[1].right_eyebrow()

In [None]:
for vid, l in intervals.get_allintervals().items():
    print(vid, len(l.intrvls))
    for l_ in l.intrvls:
        print(l_.start, l_.end)

In [None]:
faces= intervals.get_intervallist(1).intrvls[0].payload[1]
faces

In [None]:
lm1=intervals.get_intervallist(32).intrvls[1].payload[0][0]['landmarks']
lm2=intervals.get_intervallist(32).intrvls[1].payload[0][1]['landmarks']

In [None]:
lm1.nose_bridge(), lm2.nose_bridge()

In [None]:
lm1.left_eyebrow(), lm1.right_eyebrow()

In [None]:
lm2.left_eyebrow(), lm2.right_eyebrow()

In [None]:
from matplotlib import pyplot as plt

In [None]:
plt.plot(lm2.left_eyebrow()[:,0], lm2.left_eyebrow()[:,1], label="lm2 left eyebrow")
plt.plot(lm2.right_eyebrow()[:,0], lm2.right_eyebrow()[:,1], label="lm2 right eyebrow")
plt.plot(lm2.right_eye()[:,0], lm2.right_eye()[:,1], label="lm2 right eye")
plt.plot(lm2.left_eye()[:,0], lm2.left_eye()[:,1], label="lm2 left eye")
plt.plot(lm1.left_eyebrow()[:,0], lm1.left_eyebrow()[:,1], label="lm1 left eyebrow")
plt.plot(lm1.right_eyebrow()[:,0], lm1.right_eyebrow()[:,1], label="lm1 right eyebrow")
plt.plot(lm1.right_eye()[:,0], lm1.right_eye()[:,1], label="lm1 right eye")
plt.plot(lm1.left_eye()[:,0], lm1.left_eye()[:,1], label="lm1 left eye")
plt.legend()
plt.gca().invert_yaxis()

In [None]:
sum = 0
for ints in kissing_final.get_allintervals().values():
    for i in ints.intrvls:
        sum += i.end-i.start
sum/24/60

In [None]:
esper_widget(intrvllists_to_result_with_objects(intervals.filter_length(min_length=240),
                lambda p, video_id:  [face_landmarks_to_dict(face['landmarks']) for face in p[0]] + [
                   bbox_to_result_object(face, video_id) for face in p[0]], stride=1))

In [None]:
intervals.filter_length(min_length=240).get_intervallist(118).intrvls[0].payload[1]

In [None]:
# Line is ax+by+c=0
def project_point_to_line(pt, a, b, c):
    x0,y0=pt
    d=a*a+b*b
    x=(b*(b*x0-a*y0)-a*c)/d
    y=(a*(-b*x0+a*y0)-b*c)/d
    return [x,y]

In [None]:
# Positive if facing right
def signed_face_angle(f):
    lm = f['landmarks']
    center_line_indices = [27,28,32,33,34,51,62,66,57]
    data = lm.landmarks[center_line_indices]
    fit = np.polyfit(data[:,0], data[:1], 1)
    # y = ax+b
    a,b = fit[0], fit[1]
    A = project_point_to_line(lm.landmarks[center_line_indices[0]], a,-1,b)
    B = project_point_to_line(lm.landmarks[center_line_indices[-1]], a,-1,b)
    AB = B-A
    AB = AB / np.linalg.norm(AB)
    C = np.mean(lm.nose_bridge()[2:4], axis=0)
    AC = C-A
    AC = AC / np.linalg.norm(AC)
    return np.cross(AB, AC)

In [None]:
print("Removing dialogs")
from esper.captions import get_all_segments
from rekall.video_interval_collection import VideoIntervalCollection
vids = list(intervals.get_allintervals().keys())
print(len(vids))
results = get_all_segments(vids)
fps_map = dict((i, Video.objects.get(id=i).fps) for i in vids)
caption_results = VideoIntervalCollection({
    video_id: [(
        word[0] * fps_map[video_id], # start frame
        word[1] * fps_map[video_id], # end frame
        word[2][0]) # payload is the word (string)
        for word in words]
    for video_id, words in results
})
kissing_without_words = intervals.minus(caption_results)

In [None]:
kissing_final = kissing_without_words.map(lambda intvl: (int(intvl.start), int(intvl.end), intvl.payload)).coalesce().filter_length(min_length=12)

In [None]:
esper_widget(intrvllists_to_result_with_objects(kissing_final.get_allintervals(),
                lambda p, video_id:  [face_landmarks_to_dict(face['landmarks']) for face in p[0]] + [
                   bbox_to_result_object(face, video_id) for face in p[0]], stride=1))

In [None]:
kissing_final.get_intervallist(1)

In [None]:
vids

In [None]:
from esper.captions import INDEX, LEXICON, DOCUMENTS

In [None]:
for w in INDEX.tokens(244):
    try:
        t = LEXICON[w].token
        print(t)
    except:
        print("ERROR!", w)

In [None]:
Video.objects.get(id=86)

In [None]:
DOCUMENTS[86].name

In [None]:
np.log(16777215+1)/np.log(2)

In [None]:
def load_srt(doc_path: str):
    import pysrt
    try:
        subs = pysrt.open(doc_path)
    except:
        try:
            subs = pysrt.open(doc_path, encoding='iso-8859-1')
        except:
            raise Exception('Cannot parse {}'.format(doc_path))
    return subs


def get_doc_words(doc_path: str):
    from collections import Counter
    words = Counter()
    try:
        subs = load_srt(doc_path)
    except Exception as e:
        print(e)
        return words

    for s in subs:
        tokens = TOKENIZER.tokens(s.text)
        words.update(t for t in tokens if len(t) <= MAX_WORD_LEN)
    return words

In [None]:
def _sanitize(t):
    import string
    return ''.join(filter(lambda x: x in string.printable, t)).strip()


class SpacyTokenizer(object):

    def __init__(self):
        # Lazy import
        import spacy
        self._tokenizer = spacy.load('en', disable=['tagger', 'parser', 'ner'])

    def tokens(self, text: str):
        tokens = (_sanitize(t.text) for t in self._tokenizer(text))
        return [t for t in tokens if t]



In [None]:
TOKENIZER=SpacyTokenizer()
MAX_WORD_LEN=10000

In [None]:
c=get_doc_words("../data/subs/aligned/86.srt")

In [None]:
c['Byzantium']

In [None]:
import spacy
spacy.load('en', disable=['tagger', 'parser', 'ner'])

In [None]:
def all_captions():
    from esper.captions import get_all_segments
    from rekall.video_interval_collection import VideoIntervalCollection
    from esper.rekall import intrvllists_to_result_with_objects
    
    video_ids = [17]

    # Only aligned captions are in the caption index
    results = get_all_segments(video_ids)
    fps_map = dict((i, Video.objects.get(id=i).fps) for i in video_ids)
    caption_results = VideoIntervalCollection({
        video_id: [(
            word[0] * fps_map[video_id], # start frame
            word[1] * fps_map[video_id], # end frame
            word[2]) # payload is the word (string)
            for word in words]
        for video_id, words in results
    }).filter(lambda intvl: intvl.start >=11300 and intvl.start <=11500)
    
    return caption_results

debug = all_captions()

In [None]:
[intvl.payload for intvl in debug.get_allintervals()[17].intrvls]

In [None]:
11/27

In [None]:
6/14

In [None]:
600/50

In [None]:
11*12

In [None]:
14*12

In [None]:
10/24

In [None]:
isinstance((1,2), tuple)

In [None]:
import esper.pose_wrapper as pw
ps = pw.get(PoseMeta.objects.filter(frame__video_id=184, frame__number=109332))

In [None]:
ps[0].hand_keypoints()

In [None]:
Face.objects.filter(frame__video_id=72, frame__number=135528)

In [None]:
t=collection_with_opposing_faces.filter(lambda intvl: intvl.start==81555).get_intervallist(1).intrvls[0].payload['face_pairs'][1]
t

In [None]:
lm1 = t[0]['landmarks']
lm2 = t[1]['landmarks']

In [None]:
# Line is ax+by+c=0
def project_point_to_line(pt, a, b, c):
    x0,y0=pt[0], pt[1]
    d=a*a+b*b
    x=(b*(b*x0-a*y0)-a*c)/d
    y=(a*(-b*x0+a*y0)-b*c)/d
    return np.array([x,y])

# Returns (a,b,c) which defines ax+by+c=0
def find_best_line_fit(xs, ys):
    fit1 = np.polyfit(xs, ys, 1)
    error1 = np.sum((np.poly1d(fit1)(xs)-ys)**2)
    fit2 = np.polyfit(ys, xs, 1)
    error2 = np.sum((np.poly1d(fit2)(ys)-xs)**2)
    if error1 < error2:
        # fit1[0]x+fit1[1]=y
        return fit1[0], -1, fit1[1]
    # fit2[0]y+fit2[1]=x
    return -1, fit2[0], fit2[1]

def plot_face_fit(lm, data, A, B, C):
    import matplotlib.pyplot as plt
    ax = plt.gca()
    ax.invert_yaxis()
    ax.axis('equal')
    ax.scatter(lm.landmarks[:,0], lm.landmarks[:,1])
    ax.scatter(data[:,0], data[:,1])
    t = np.array([A, B, C])
    ax.scatter(t[:,0],t[:,1])
    ax.arrow(A[0], A[1], (B-A)[0], (B-A)[1])
    ax.arrow(A[0], A[1], (C-A)[0], (C-A)[1])
    plt.show()
    

# Positive if facing right
def signed_face_angle(lm):
    center_line_indices = [27,28, 32, 33,34, 51,62,66,57]
    data = lm.landmarks[center_line_indices]
    a, b, c = find_best_line_fit(data[:,0], data[:,1])
    A = project_point_to_line(lm.landmarks[center_line_indices[0]], a, b, c)
    B = project_point_to_line(lm.landmarks[center_line_indices[-1]], a, b, c)
    AB = B-A
    AB = AB / np.linalg.norm(AB)
    C = np.mean(lm.nose_bridge()[2:4], axis=0)
    AC = C-A
    AC = AC / np.linalg.norm(AC)
    plot_face_fit(lm, data, A, B, C)
    return np.cross(AB, AC)

In [None]:
signed_face_angle(lm1)

In [None]:
lm1.nose_bridge()

In [None]:
lm1.landmarks[57]

In [None]:
ax = plt.gca()
ax.axis('equal')
ax.invert_yaxis()
ax.scatter(lm2.landmarks[:,0], lm2.landmarks[:,1])
plt.scatter(data[:,0], data[:,1])
ax.plot(np.unique(data[:,0]), np.poly1d(np.polyfit(data[:,0], data[:,1], 1))(np.unique(data[:,0])))
plt.show()

In [None]:
import matplotlib.pyplot as plt
plt.gca().invert_yaxis()
plt.axis('equal')
plt.scatter(lm2.landmarks[:,0], lm2.landmarks[:,1])
center_line_indices = [27,28,32, 33,34, 51,62,66,57]
data = lm2.landmarks[center_line_indices]
plt.scatter(data[:,0], data[:,1])
#plt.scatter(lm2.landmarks[[27,28,32,33,34,51,62,66,57],0], lm2.landmarks[[27,28,32,33,34,51,62,66,57],1])
plt.scatter([0.506,0.56, 0.517],[0.380,0.403,0.354])
plt.scatter(lm2.landmarks[27,0], lm2.landmarks[27,1])

#plt.scatter(lm1.nose_bridge()[:,0], lm1.nose_bridge()[:,1])

In [None]:
#[i.start for i in collection_with_opposing_faces.get_intervallist(59).intrvls]
collection.filter(lambda i:i.start==115501).get_intervallist(72).intrvls[0].payload

In [None]:
from esper.pose_wrapper import PoseWrapper
print(p1.pose_keypoints()[[p1.LShoulder, p1.RShoulder]])
print(p2.pose_keypoints()[[p1.LShoulder, p1.RShoulder]])

In [None]:
def get_face_pair(fs):
    def height(f):
        return f['y2']-f['y1']
    fs = [f for f in fs if height(f) > 0.3]
    if len(fs) != 2:
        print("too many large faces")
        return None, None
    f1, f2 = fs[0], fs[1]
    if f2['x2']<f1['x2'] and f2['x1'] < f1['x1']:
        f1, f2 = f2, f1
    if f1['x2']<f2['x2'] and f1['x1']<f2['x1'] and min(
        f1['y2'],f2['y2'])-max(f1['y1'],f2['y1']) > 0.2 and (
        f2['x1']-f1['x2']<0.05):
        return f1, f2
    
    print(f1, f2)
    return None, None

In [None]:
get_face_pair([f['faces'] for f in faces if f['frame']==132960][0])