<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc" style="margin-top: 1em;"><ul class="toc-item"><li><ul class="toc-item"><li><span><a href="#ScratchPad-From-This-Point-On" data-toc-modified-id="ScratchPad-From-This-Point-On-0.1"><span class="toc-item-num">0.1&nbsp;&nbsp;</span>ScratchPad From This Point On</a></span></li></ul></li></ul></div>

In [139]:
from esper.prelude import *
import esper.stdlib as stdlib
from django.db.models import ExpressionWrapper, F
import rekall as rk
import rekall.parsers
import rekall.payload_predicates
import esper.rekall
from rekall.video_interval_collection import VideoIntervalCollection
from query.models import *
import numpy as np

In [229]:
# Defining the shot scale in terms of labels: face height percentage.
# Using https://filmanalysis.coursepress.yale.edu/cinematography/ as reference.
from enum import IntEnum
class ShotScale(IntEnum):
    """ L=Long CU=Close-up X=Extreme M=Medium """
    UNK = 0
    XL = 1
    L = 2
    ML = 3
    M = 4
    CU = 5
    XCU = 6

# Limitations:
#   Extreme Close-up is difficult to get since face detector does not work on those frames.
#   Long shots and Extreme Long shots are difficult to get since faces are too small for face detectors.
def face_height_to_shot_scale(face_height):
    if face_height >= 0.95:
        return ShotScale.XCU
    if face_height >= 0.5:
        return ShotScale.CU
    if face_height >= 0.25:
        return ShotScale.M
    if face_height >= 0.12:
        return ShotScale.ML
    # if face_height >= 0.05:
    #     return ShotScale.L
    # return ShotScale.XL
    return ShotScale.UNK

def pose_keypoints_to_shot_scale(keypoints):
    def visible(pose, positions):
        return np.any(pose[positions, 2]>0)
    def get_y(pose, positions, reduce):
        rows = pose[positions, :]
        heights = rows[rows[:,2]>0, 1] # only consider existing keypoints
        return reduce(heights)
    def get_height(pose, upper_pos, lower_pose):
        return get_y(pose, lower_pose, max) - get_y(pose, upper_pos, min)
    pose = np.array(keypoints).reshape((-1,3))
    ankles = [Pose.RAnkle, Pose.LAnkle]
    knees = [Pose.RKnee, Pose.LKnee]
    hips = [Pose.RHip, Pose.LHip]
    shoulders = [Pose.RShoulder, Pose.LShoulder, Pose.Neck]
    head = [Pose.Nose, Pose.LEye, Pose.REye, Pose.REar, Pose.LEar]
    show_ankle = visible(pose, ankles)
    show_knee = visible(pose, knees)
    show_hip = visible(pose, hips)
    show_shoulder = visible(pose, shoulders)
    show_head = visible(pose, head)
    if show_head and show_shoulder and show_hip and show_knee and show_ankle:
        if get_height(pose, head, ankles+knees+hips) >= 0.5:
            return ShotScale.L
        return ShotScale.XL
    if show_head and show_shoulder and show_hip and show_knee:
        height = get_height(pose, head, knees+hips)
        if height >= 0.75:
            return ShotScale.ML
        elif height >= 0.4:
            return ShotScale.L
        return ShotScale.XL
    if show_head and show_shoulder and show_hip:
        height = get_height(pose, head, hips)
        if height >= 0.75:
            return ShotScale.M
        elif height >= 0.5:
            return ShotScale.ML
        elif height >= 0.2:
            return ShotScale.L
        return ShotScale.XL
    if show_head and show_shoulder:
        height = get_height(pose, head, shoulders)
        if height >= 0.4:
            return ShotScale.CU
        elif height >= 0.15:
            return ShotScale.M
        return ShotScale.UNK
    if show_head:
        height = get_height(pose, head, head)
        if height >= 0.25:
            return ShotScale.XCU
        elif height >= 0.1:
            return ShotScale.CU
    return ShotScale.UNK    
    
    

In [141]:
# Helper functions for operating on QuerySets
def annotate_face_height(qs):
    return qs.annotate(face_height=Face.height_expr())

def filter_faces_by_face_height(qs, min_height=0.0, max_height=1.0):
    return qs.filter(face_height__gte=min_height, face_height__lt=max_height)

def annotate_frame_and_video(qs):
    return qs.annotate(frame_number=F('frame__number'), video_id=F('frame__video__id'))

def to_simple_display(frames):
    """values need to have frame_number, video_id fields"""
    return stdlib.simple_result([{'video': row.video_id,
                                'min_frame': row.frame_number,
                                 'objects':[]} for row in frames], 'Video')

def bbox_payload_to_object(bbox, video):
    return {'id': video,
            'type': 'bbox',
            'bbox_x1': bbox['x1'],
            'bbox_x2': bbox['x2'],
            'bbox_y1': bbox['y1'],
            'bbox_y2': bbox['y2'],
            }

def pose_payload_to_object(pose, video):
    return {
        "id": video,
        'type': 'pose',
        'labeler': 'UNKNOWN',
        'keypoints': pose
    }

def payload_to_objects(payload, video_id):
    result = []
    result += [bbox_payload_to_object(x, video_id) for x in payload.get('bbox', [])]
    result += [pose_payload_to_object(x, video_id) for x in payload.get('pose', [])]
    return result
    

def pose_payload_parser():
    def get_pose(row):
        return {
            'hand_left': row.hand_keypoints()[0].tolist(),
            'hand_right': row.hand_keypoints()[1].tolist(),
            'pose': row.pose_keypoints().tolist(),
            'face': row.face_keypoints().tolist()
        }
    return get_pose

# Convert named payloads to objects to send to vgrid
def intrvllists_to_result_general(intrvllists, limit=None, stride=1):
    """ Gets a result for intrvllists, assuming that the objects are bounding boxes.
    """
    materialized_results = []
    for video in intrvllists:
        intrvllist = intrvllists[video].get_intervals()
        if len(intrvllist) == 0:
            continue
        if limit is not None and len(materialized_results) > limit:
            break
        for intrvl in intrvllist[::stride]:
            materialized_results.append({
                'video': video,
                'min_frame': (intrvl.get_start() + intrvl.get_end()) / 2,
                'objects': payload_to_objects(intrvl.get_payload(), video)
            })
          

    if limit is None:
        limit = len(materialized_results)
    materialized_results = materialized_results[:limit]

    groups = [{'type': 'flat', 'label': '', 'elements': [r]}
            for r in materialized_results]

    return {'result': groups, 'count': len(list(intrvllists.keys())), 'type': 'Video'}

# Put ouput of `parser` under `name`
def with_named_payload(name, parser):
    def getter(obj):
        return {name: parser(obj)}
    return getter

def with_bbox():
    return with_named_payload('bbox',
             rk.parsers.in_array(
               rk.parsers.bbox_payload_parser(VideoIntervalCollection.django_accessor)))

def with_pose():
    return with_named_payload('pose',
            rk.parsers.in_array(
               pose_payload_parser()))

def with_attr(attr):
    return with_named_payload(attr, lambda obj:getattr(obj, attr))

def shot_scale_payload_merge(p1, p2):
    """ Merges all bboxes and poses, but take the largest face_height.
        This assumes that the intervals to be merged are single-frame intervals.
    """
    result = {}
    agg_terms = ['bbox', 'pose']
    max_terms = ['shot_scale', 'face_height']
    for term in agg_terms:
        if term in p1 or term in p2:
            result[term] =  p1.get(term, [])+p2.get(term, [])
    for term in max_terms:
        if term in p1 or term in p2:
            result[term]= max(p1.get(term, 0), p2.get(term, 0))
    return result

def has_entire_body(p):
    if 'pose' not in p:
        return False
    poses = p['pose']
    for all_pose in poses:
        pose = np.array(all_pose['pose']).reshape((-1,3)) # Get the body pose
        if np.all(pose[:,2]>0):
            return True
    return False

def get_shot_scale_fn(s):
    def has_scale(p):
        return p['shot_scale'] == s
    return has_scale

# Derive from face_height or pose
def add_shot_scale_payload(p):
    s = ShotScale.UNK
    if 'face_height' in p:
        s = max(s, face_height_to_shot_scale(p['face_height']))
    if 'pose' in p:
        for all_pose in p['pose']:
            s = max(s, pose_keypoints_to_shot_scale(all_pose['pose']))
    p['shot_scale'] = s
    return p

def map_payload(payload_fn):
    def fn(interval):
        interval.payload = payload_fn(interval.payload)
        return interval
    return fn
        
def get_video_interval_collection(qs, with_payload=None):
    return VideoIntervalCollection.from_django_qs(qs, schema={
        "start": "frame_number",
        "end": "frame_number"
    }, with_payload=with_payload)

def get_all_frames_with_shot_scale(video_id, scale):
    collection = get_video_interval_collection(
            annotate_frame_and_video(annotate_face_height(Face.objects)).filter(video_id=video_id),
            rk.parsers.merge_dict_parsers([with_attr('face_height'), with_bbox()]))
    collection = collection.set_union(get_video_interval_collection(
             annotate_frame_and_video(Pose.objects).filter(video_id=video_id),
             with_pose()))
    collection = collection.map(map_payload(add_shot_scale_payload))
    collection = collection.coalesce(shot_scale_payload_merge)
    collection = collection.filter(rk.payload_predicates.payload_satisfies(get_shot_scale_fn(scale)))
    return collection

In [230]:
esper_widget(
    intrvllists_to_result_general(
        get_all_frames_with_shot_scale(214, ShotScale.UNK).get_allintervals(),
        limit=1000, stride=1))

VGridWidget(jsglobals={'bucket': 'esper', 'queries': [['All faces', 'def all_faces():\n    from query.models i…

## ScratchPad From This Point On

In [149]:
#list(Video.objects.filter(name="the godfather"))
#POSE_ID=5010
#print(Pose.objects.filter(pk=POSE_ID)[0].pose_keypoints())
#esper_widget(stdlib.qs_to_result(Pose.objects.filter(pk=POSE_ID)))
for f in Pose.objects.annotate(video_id=F("frame__video__id")).distinct('video_id'):
    print(f.frame.video.name, f.frame.video.id)

mr mrs smith 123
star wars episode i the phantom menace 184
star wars episode ii attack of the clones 185
star wars episode iii revenge of the sith 186
star wars episode iv a new hope 187
star wars episode v the empire strikes back 188
star wars episode vi return of the jedi 189
the godfather 214
the godfather part ii 215
the godfather part iii 216
brooklyn 313
harry potter and the chamber of secrets 374
harry potter and the deathly hallows part 1 375
harry potter and the deathly hallows part 2 376
harry potter and the goblet of fire 377
harry potter and the halfblood prince 378
harry potter and the order of the phoenix 379
harry potter and the prisoner of azkaban 380
harry potter and the sorcerers stone 381
mr mrs smith 445
pillow talk 467
star wars the force awakens 519


In [233]:
for p in Pose.objects.filter(frame__video__id=214, frame__number=119268):
    print(p.pose_keypoints())
    print(pose_keypoints_to_shot_scale(p.pose_keypoints()))

[[ 0.32388258  0.54875612  0.62992316]
 [ 0.          0.          0.        ]
 [ 0.          0.          0.        ]
 [ 0.          0.          0.        ]
 [ 0.          0.          0.        ]
 [ 0.          0.          0.        ]
 [ 0.          0.          0.        ]
 [ 0.          0.          0.        ]
 [ 0.          0.          0.        ]
 [ 0.          0.          0.        ]
 [ 0.          0.          0.        ]
 [ 0.          0.          0.        ]
 [ 0.          0.          0.        ]
 [ 0.          0.          0.        ]
 [ 0.27171788  0.45053458  0.63950366]
 [ 0.36837217  0.4532536   0.56719869]
 [ 0.16590875  0.52145916  0.45909595]
 [ 0.42359021  0.49964789  0.09720651]]
ShotScale.UNK
