In [1]:
from query.models import Face, Shot
from rekall.video_interval_collection import VideoIntervalCollection
from rekall.parsers import named_payload, in_array, bbox_payload_parser
from rekall.parsers import merge_dict_parsers, dict_payload_parser
from rekall.merge_ops import payload_plus, payload_first, merge_named_payload
from rekall.payload_predicates import payload_satisfies, on_name
from rekall.spatial_predicates import scene_graph
from rekall.logical_predicates import and_pred
from rekall.temporal_predicates import overlaps
from rekall.bbox_predicates import height_at_least, left_of, same_value
from esper.rekall import intrvllists_to_result_with_objects, bbox_to_result_object

In [2]:
from esper.prelude import esper_widget
import time
from IPython.display import display

# Query

In [10]:
def query(video_id, NUM_SHOTS, MAX_SHOT_DURATION, stride):
    # We're going to look for frames that would be good "hero shot" frames --
    #   potentially good frames to show in a Netflix preview, for instance.
    # We're going to look for frames where there's exactly one face of a
    #   certain height, and the frame has certain minimum brightness,
    #   sharpness, and contrast properties.
#     MIN_FACE_HEIGHT = 0.2
#     MIN_BRIGHTNESS = 50

    # Annotate face rows with start and end frames, video ID, and frame image
    #   information
    from query.models import Shot
    from rekall.video_interval_collection import VideoIntervalCollection
    from rekall.temporal_predicates import meets_before, overlaps
    from esper.rekall import intrvllists_to_result_with_objects
    from django.db.models import ExpressionWrapper, FloatField
    from esper.captions import get_all_segments
    from rekall.merge_ops import payload_first, payload_plus, merge_named_payload
    from tqdm import tqdm
    import numpy as np

#     NUM_SHOTS=3
#     MAX_SHOT_DURATION=1.5

    # payload: shot_id, vid
    short_shots = VideoIntervalCollection.from_django_qs(Shot.objects.annotate(
        duration = ExpressionWrapper((F('max_frame') - F('min_frame')) / F('video__fps'), output_field=FloatField())
    ).filter(
        duration__lt=MAX_SHOT_DURATION,
        duration__gt=0.,
        cinematic=True,
        video_id=video_id
    ).exclude(
        video__genres__name="animation"
    ), with_payload = lambda shot: {'shot_id': shot.id, 'vid': shot.video_id})
    
    # Slow
    def get_frame_brightness(intvl):
        vid = intvl.payload['vid']
        start = intvl.start
        end = intvl.end
        qs = Frame.objects.filter(video_id=vid, number__gte=start, number__lte=end).order_by('number')
        intvl.payload['brightness'] = [f.brightness for f in qs]
        return intvl
    
    def payload_in_array(intvl):
        return (intvl.start, intvl.end, [intvl.payload])
    
    def add_brightness_array(intvl):
        if 'brightness' not in intvl.payload:
            intvl.payload['brightness'] = []
        return intvl       
    
    def bright_enough(intvl):
        BRIGHTNESS_THRESHOLD = 20.0
        average_above_threshold = np.array(
            [np.mean(np.array(p['brightness'])) if len(p['brightness'])>0 else 0 for p in intvl.payload]
        )> BRIGHTNESS_THRESHOLD
        return np.any(average_above_threshold)
    
    def get_all_frame_numbers(intervallist):
        frame = []
        for intvl in intervallist.intrvls:
            frame += list(range(intvl.start, intvl.end+1))
        return frame
    
    print("Getting brightness for {0} intervals".format(sum(
        [len(l.intrvls) for l in short_shots.get_allintervals().values()])))
    
    # Get brightnesses for each frame in payload
    for vid, intervals in tqdm(short_shots.get_allintervals().items()):
        frames = get_all_frame_numbers(intervals)
        qs = Frame.objects.filter(video_id=vid, number__in=frames, regularly_sampled=True).order_by('number').values()
        interval_index = 0
        for frame in qs:
            while frame['number'] > intervals.intrvls[interval_index].end:
                interval_index += 1
            if frame['number'] >= intervals.intrvls[interval_index].start:
                intvl = intervals.intrvls[interval_index]
                if 'brightness' in intvl.payload:
                    intvl.payload['brightness'].append(frame['brightness'])
                else:
                    intvl.payload['brightness'] = [frame['brightness']] 
    short_shots = short_shots.map(add_brightness_array)
    n_shots = short_shots.map(payload_in_array)
    for n in range(2, NUM_SHOTS + 1):
        print('Constructing {} consecutive short shots'.format(n))
        
        n_shots = n_shots.merge(
            short_shots, predicate=meets_before(epsilon=1), working_window=1,
            payload_merge_op = lambda arr, el: arr+[el]
        )

        print('There are {} videos with {} consecutive short shots'.format(
            len(n_shots.get_allintervals().keys()), n)
    )
    n_shots = n_shots.coalesce().filter(bright_enough).filter_length(min_length=1)
    
    # Filter out any intervals with too many spoken lines
    print("Getting captions")
    vids = n_shots.get_allintervals().keys()
    caption_results = get_all_segments(vids)
    fps_map = dict((i, Video.objects.get(id=i).fps) for i in vids)
    captioned_intervals = VideoIntervalCollection({
        video_id: [(
            word[0] * fps_map[video_id], # start frame
            word[1] * fps_map[video_id], # end frame
            {'shots': [],
             'words': [word[2]]
            }
            ) # payload is the word
            for word in words]
        for video_id, words in caption_results
    })
    
    def organize_payload(intvl):
        p = intvl.payload
        new_payload = {
            'shots': p,
            'words': []
        }
        return (intvl.start, intvl.end, new_payload)
    
    # payload:
    # shots: list of shots
    # words: list of word intervals
    n_shots = n_shots.map(organize_payload)
    n_shots = n_shots.join(
        captioned_intervals,
        lambda shot_interval, word_interval:
            [(shot_interval.start, shot_interval.end, {
                'shots': shot_interval.payload['shots'],
                'words': word_interval.payload['words']
            })],
        predicate=overlaps(),
        working_window=1
    ).coalesce(payload_merge_op=merge_named_payload({
        'shots': payload_first,
        'words': payload_plus,
    })).set_union(n_shots).coalesce(
        payload_merge_op=merge_named_payload({
            'shots': payload_first,
            'words': payload_plus
    }))
    
    def has_few_words(intvl):
        MAX_NUM_WORDS_PER_SECOND = 1.0
        
        p = intvl.payload
        n_words = len(p['words'])
        time = (intvl.end-intvl.start) / fps_map[p['shots'][0]['vid']]
        return n_words / time <= MAX_NUM_WORDS_PER_SECOND
    
    action_shots = n_shots.filter(has_few_words)
    
    return intrvllists_to_result_with_objects(
        action_shots.get_allintervals(), 
        lambda a, b: [],
#         lambda payload, video_id: [
#             bbox_to_result_object(bbox, video_id) for bbox in payload['faces']],
        stride=stride)

In [8]:
def show_query(video_id, num_shots, max_duration, stride):
    result = query(video_id, num_shots, max_duration, stride)
    
    widget = esper_widget(result, jupyter_keybindings=True, crop_bboxes=False)
    display(widget)
    
    return widget, result

In [52]:
def convert_segments(segments):
    output_segments = []
    for seg in segments:
        for res in result['result']:
            if res['elements'][0]['min_frame'] == seg[0]:
                output_segments.append((res['elements'][0]['min_frame'], res['elements'][0]['max_frame']))
    print(output_segments)

# Braveheart

In [55]:
widget, result = show_query(28, 3, 1.5, 1)

  0%|                                                                                                                                                                                                                  | 0/1 [00:00<?, ?it/s]

Getting brightness for 818 intervals


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.81s/it]


Constructing 2 consecutive short shots
There are 1 videos with 2 consecutive short shots
Constructing 3 consecutive short shots
There are 1 videos with 3 consecutive short shots
Getting captions


VGridWidget(jsglobals={'schema': [['Identity', ['id', 'name']], ['Genre', ['id', 'name']], ['Director', ['id',…

In [16]:
selected_segments_braveheart = [
    (result['result'][i]['elements'][0]['min_frame'], result['result'][i]['elements'][0]['min_frame'])
    for i in widget.selected
]
print(selected_segments_braveheart)

[(34611, 34611), (10111, 10111), (70956, 70956), (71890, 71890), (72133, 72133), (72650, 72650), (102362, 102362), (127941, 127941), (124902, 124902), (174768, 174768), (175704, 175704), (175851, 175851), (176572, 176572), (183086, 183086), (129528, 129528), (130398, 130398), (120090, 120090), (142940, 142940), (176009, 176009), (176284, 176284)]


In [56]:
convert_segments(selected_segments_braveheart)

[(34611, 34711), (10111, 10206), (70956, 71284), (71890, 71958), (72133, 72274), (72650, 72770), (102362, 102528), (127941, 128049), (124902, 125095), (174768, 174935), (175704, 175798), (175851, 175968), (176572, 176638), (183086, 183162), (129528, 129625), (130398, 130448), (120090, 120175), (142940, 143043), (176009, 176087), (176284, 176409)]


# Revenge of the Sith

In [18]:
start = time.time()

In [53]:
widget, result = show_query(186, 3, 2, 1)

  0%|                                                                                                                                                                                                                  | 0/1 [00:00<?, ?it/s]

Getting brightness for 725 intervals


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.08s/it]


Constructing 2 consecutive short shots
There are 1 videos with 2 consecutive short shots
Constructing 3 consecutive short shots
There are 1 videos with 3 consecutive short shots
Getting captions


VGridWidget(jsglobals={'schema': [['Identity', ['id', 'name']], ['Genre', ['id', 'name']], ['Director', ['id',…

In [21]:
selected_segments_rots = [
    (result['result'][i]['elements'][0]['min_frame'], result['result'][i]['elements'][0]['min_frame'])
    for i in widget.selected
]
print(selected_segments_rots)

[(5229, 5229), (5713, 5713), (8477, 8477), (10228, 10228), (11301, 11301), (18859, 18859), (19341, 19341), (19832, 19832), (22999, 22999), (28097, 28097), (72188, 72188), (29818, 29818), (83163, 83163), (86697, 86697), (94645, 94645), (102915, 102915), (104151, 104151), (155578, 155578), (162400, 162400), (162632, 162632), (171350, 171350)]


In [54]:
convert_segments(selected_segments_rots)

[(5229, 5307), (5713, 5810), (8477, 8584), (10228, 10369), (11301, 11428), (18859, 19022), (19341, 19437), (19832, 20155), (22999, 23098), (28097, 28180), (72188, 72259), (29818, 29892), (83163, 83269), (86697, 86934), (94645, 94766), (102915, 103031), (104151, 104267), (155578, 155706), (162400, 162473), (162632, 162706), (171350, 171452)]


In [22]:
end = time.time()

In [23]:
print("Seconds to label: ", end - start)

Seconds to label:  852.9286942481995


# Steve Jobs

In [24]:
start = time.time()

In [57]:
widget, result = show_query(520, 3, 4, 1)

  0%|                                                                                                                                                                                                                  | 0/1 [00:00<?, ?it/s]

Getting brightness for 1025 intervals


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.91s/it]


Constructing 2 consecutive short shots
There are 1 videos with 2 consecutive short shots
Constructing 3 consecutive short shots
There are 1 videos with 3 consecutive short shots
Getting captions


VGridWidget(jsglobals={'schema': [['Identity', ['id', 'name']], ['Genre', ['id', 'name']], ['Director', ['id',…

In [27]:
selected_segments_jobs = [
    (result['result'][i]['elements'][0]['min_frame'], result['result'][i]['elements'][0]['min_frame'])
    for i in widget.selected
]
print(selected_segments_jobs)

[(26103, 26103), (35885, 35885), (41726, 41726), (64245, 64245), (102774, 102774), (105988, 105988), (132567, 132567), (169275, 169275)]


In [58]:
convert_segments(selected_segments_jobs)

[(26103, 26216), (35885, 36118), (41726, 41838), (64245, 64389), (102774, 103008), (105988, 106145), (132567, 132684), (169275, 169448)]


In [28]:
end = time.time()

In [29]:
print("Seconds to label: ", end - start)

Seconds to label:  250.78352189064026


# Guardians of the Galaxy

In [30]:
start = time.time()

In [59]:
widget, result = show_query(74, 3, 2, 1)

  0%|                                                                                                                                                                                                                  | 0/1 [00:00<?, ?it/s]

Getting brightness for 846 intervals


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.90s/it]


Constructing 2 consecutive short shots
There are 1 videos with 2 consecutive short shots
Constructing 3 consecutive short shots
There are 1 videos with 3 consecutive short shots
Getting captions


VGridWidget(jsglobals={'schema': [['Identity', ['id', 'name']], ['Genre', ['id', 'name']], ['Director', ['id',…

In [35]:
selected_segments_gotg = [
    (result['result'][i]['elements'][0]['min_frame'], result['result'][i]['elements'][0]['min_frame'])
    for i in widget.selected
]

print(selected_segments_gotg)

[(13276, 13276), (15410, 15410), (26541, 26541), (28356, 28356), (56012, 56012), (52560, 52560), (85015, 85015), (89149, 89149), (89491, 89491), (91984, 91984), (103407, 103407), (122103, 122103), (124120, 124120), (124919, 124919), (125610, 125610), (131503, 131503), (129493, 129493), (132114, 132114), (135673, 135673), (147868, 147868)]


In [60]:
convert_segments(selected_segments_gotg)

[(13276, 13382), (15410, 15684), (26541, 26679), (28356, 28503), (56012, 56208), (52560, 52666), (85015, 85127), (89149, 89223), (89491, 89606), (91984, 92036), (103407, 103540), (122103, 122177), (124120, 124193), (124919, 125013), (125610, 125722), (131503, 131629), (129493, 129749), (132114, 132217), (135673, 135828), (147868, 147938)]


In [36]:
end = time.time()

In [37]:
print("Seconds to label: ", end - start)

Seconds to label:  523.8553323745728


# Daddy's Home

In [38]:
start = time.time()

In [61]:
widget, result = show_query(334, 3, 3, 1)

  0%|                                                                                                                                                                                                                  | 0/1 [00:00<?, ?it/s]

Getting brightness for 1274 intervals


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.23s/it]


Constructing 2 consecutive short shots
There are 1 videos with 2 consecutive short shots
Constructing 3 consecutive short shots
There are 1 videos with 3 consecutive short shots
Getting captions


VGridWidget(jsglobals={'schema': [['Identity', ['id', 'name']], ['Genre', ['id', 'name']], ['Director', ['id',…

In [41]:
selected_segments_daddy = [
    (result['result'][i]['elements'][0]['min_frame'], result['result'][i]['elements'][0]['min_frame'])
    for i in widget.selected
]
print(selected_segments_daddy)

[(10173, 10173), (16441, 16441), (35905, 35905), (49783, 49783), (50483, 50483), (54680, 54680), (99226, 99226), (99773, 99773), (108821, 108821), (130270, 130270)]


In [62]:
convert_segments(selected_segments_daddy)

[(10173, 10419), (16441, 17023), (35905, 36286), (49783, 50033), (50483, 50578), (54680, 54836), (99226, 99595), (99773, 99901), (108821, 108904), (130270, 130438)]


In [42]:
end = time.time()

In [43]:
print("Seconds to label: ", end - start)

Seconds to label:  214.2823235988617


# Batman v Superman

In [44]:
start = time.time()

In [63]:
widget, result = show_query(299, 3, 2, 1)

  0%|                                                                                                                                                                                                                  | 0/1 [00:00<?, ?it/s]

Getting brightness for 606 intervals


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.30s/it]
No document for video id: 299


Constructing 2 consecutive short shots
There are 1 videos with 2 consecutive short shots
Constructing 3 consecutive short shots
There are 1 videos with 3 consecutive short shots
Getting captions


VGridWidget(jsglobals={'schema': [['Identity', ['id', 'name']], ['Genre', ['id', 'name']], ['Director', ['id',…

In [49]:
selected_segments_bvs = [
    (result['result'][i]['elements'][0]['min_frame'], result['result'][i]['elements'][0]['min_frame'])
    for i in widget.selected
]
print(selected_segments_bvs)

[(13207, 13207), (20728, 20728), (38251, 38251), (63535, 63535), (96489, 96489), (98687, 98687), (112839, 112839), (114901, 114901), (130444, 130444), (179427, 179427), (181749, 181749), (182978, 182978), (190543, 190543), (198495, 198495), (205006, 205006), (209383, 209383), (216314, 216314), (219301, 219301), (220373, 220373), (220560, 220560), (223711, 223711)]


In [64]:
convert_segments(selected_segments_bvs)

[(13207, 13440), (20728, 20846), (38251, 38365), (63535, 63616), (96489, 96613), (98687, 98818), (112839, 112942), (114901, 115000), (130444, 130534), (179427, 179618), (181749, 181858), (182978, 183048), (190543, 190656), (198495, 198648), (205006, 205187), (209383, 209576), (216314, 216452), (219301, 219405), (220373, 220464), (220560, 220647), (223711, 223847)]


In [50]:
end = time.time()

In [51]:
print("Seconds to label: ", end - start)

Seconds to label:  545.7174379825592
