# Supercut Template
A notebook to copy to make supercuts from. Subject to change based on experience.

In [9]:
from esper.supercuts import *
from query.models import Shot
from rekall.temporal_predicates import overlaps

In [2]:
def query():
    '''
    Write your query here. Below is a query for Harry, Ron, and Hermione in a row.
    This function should return a rekall VideoIntervalCollection.
    '''
    from query.models import Shot
    from rekall.video_interval_collection import VideoIntervalCollection
    from rekall.temporal_predicates import meets_before, overlaps
    from esper.rekall import intrvllists_to_result_with_objects
    from django.db.models import ExpressionWrapper, FloatField
    from esper.captions import get_all_segments
    from rekall.merge_ops import payload_first, payload_plus, merge_named_payload

    NUM_SHOTS=5
    MAX_SHOT_DURATION=0.8

    # payload: shot_id, vid
    short_shots = VideoIntervalCollection.from_django_qs(Shot.objects.annotate(
        duration = ExpressionWrapper((F('max_frame') - F('min_frame')) / F('video__fps'), output_field=FloatField())
    ).filter(
        duration__lt=MAX_SHOT_DURATION,
        duration__gt=0.,
        cinematic=True,
        video__ignore_film=False
    ).exclude(
        video__genres__name="animation"
    ), with_payload = lambda shot: {'shot_id': shot.id, 'vid': shot.video_id})
    
    # Slow
    def get_frame_brightness(intvl):
        vid = intvl.payload['vid']
        start = intvl.start
        end = intvl.end
        qs = Frame.objects.filter(video_id=vid, number__gte=start, number__lte=end).order_by('number')
        intvl.payload['brightness'] = [f.brightness for f in qs]
        return intvl
    
    def payload_in_array(intvl):
        return (intvl.start, intvl.end, [intvl.payload])
    
    def add_brightness_array(intvl):
        if 'brightness' not in intvl.payload:
            intvl.payload['brightness'] = []
        return intvl       
    
    def bright_enough(intvl):
        BRIGHTNESS_THRESHOLD = 20.0
        average_above_threshold = np.array(
            [np.mean(np.array(p['brightness'])) if len(p['brightness'])>0 else 0 for p in intvl.payload]
        )> BRIGHTNESS_THRESHOLD
        return np.any(average_above_threshold)
    
    def get_all_frame_numbers(intervallist):
        frame = []
        for intvl in intervallist.intrvls:
            frame += list(range(intvl.start, intvl.end+1))
        return frame
    
    print("Getting brightness for {0} intervals".format(sum(
        [len(l.intrvls) for l in short_shots.get_allintervals().values()])))
    
    # Get brightnesses for each frame in payload
    for vid, intervals in tqdm(short_shots.get_allintervals().items()):
        frames = get_all_frame_numbers(intervals)
        qs = Frame.objects.filter(video_id=vid, number__in=frames, regularly_sampled=True).order_by('number').values()
        interval_index = 0
        for frame in qs:
            while frame['number'] > intervals.intrvls[interval_index].end:
                interval_index += 1
            if frame['number'] >= intervals.intrvls[interval_index].start:
                intvl = intervals.intrvls[interval_index]
                if 'brightness' in intvl.payload:
                    intvl.payload['brightness'].append(frame['brightness'])
                else:
                    intvl.payload['brightness'] = [frame['brightness']] 
    short_shots = short_shots.map(add_brightness_array)
    n_shots = short_shots.map(payload_in_array)
    for n in range(2, NUM_SHOTS + 1):
        print('Constructing {} consecutive short shots'.format(n))
        
        n_shots = n_shots.merge(
            short_shots, predicate=meets_before(epsilon=1), working_window=1,
            payload_merge_op = lambda arr, el: arr+[el]
        )

        print('There are {} videos with {} consecutive short shots'.format(
            len(n_shots.get_allintervals().keys()), n)
    )
    n_shots = n_shots.coalesce().filter(bright_enough).filter_length(min_length=1)
    
    # Filter out any intervals with too many spoken lines
    print("Getting captions")
    vids = n_shots.get_allintervals().keys()
    caption_results = get_all_segments(vids)
    fps_map = dict((i, Video.objects.get(id=i).fps) for i in vids)
    captioned_intervals = VideoIntervalCollection({
        video_id: [(
            word[0] * fps_map[video_id], # start frame
            word[1] * fps_map[video_id], # end frame
            {'shots': [],
             'words': [word[2]]
            }
            ) # payload is the word
            for word in words]
        for video_id, words in caption_results
    })
    
    def organize_payload(intvl):
        p = intvl.payload
        new_payload = {
            'shots': p,
            'words': []
        }
        return (intvl.start, intvl.end, new_payload)
    
    # payload:
    # shots: list of shots
    # words: list of word intervals
    n_shots = n_shots.map(organize_payload)
    n_shots = n_shots.join(
        captioned_intervals,
        lambda shot_interval, word_interval:
            [(shot_interval.start, shot_interval.end, {
                'shots': shot_interval.payload['shots'],
                'words': word_interval.payload['words']
            })],
        predicate=overlaps(),
        working_window=1
    ).coalesce(payload_merge_op=merge_named_payload({
        'shots': payload_first,
        'words': payload_plus,
    })).set_union(n_shots).coalesce(
        payload_merge_op=merge_named_payload({
            'shots': payload_first,
            'words': payload_plus
    }))
    
    def has_few_words(intvl):
        MAX_NUM_WORDS_PER_SECOND = 1.0
        
        p = intvl.payload
        n_words = len(p['words'])
        time = (intvl.end-intvl.start) / fps_map[p['shots'][0]['vid']]
        return n_words / time <= MAX_NUM_WORDS_PER_SECOND
    
    action_shots = n_shots.filter(has_few_words)

    return action_shots

In [3]:
def generate_supercut_intervals(
    interval_collection,
    dilation=0,
    snap_to_cinematic_shots=False,
    limit=None,
    stride=1
):
    '''
    Generates supercut intervals for the supercut pipeline.
    Dilates interval_collection by dilation.
    If dilation > 0, snaps the dilations to cinematic shot boundaries.
    '''
    if dilation > 0:
        if snap_to_cinematic_shots:
            shots = VideoIntervalCollection.from_django_qs(
                Shot.objects.filter(
                    video_id__in=list(interval_collection.get_allintervals().keys()),
                    labeler__name="shot-hsvhist-face"
                )
            ).filter_against(interval_collection, predicate=overlaps())
        interval_collection = interval_collection.dilate(dilation).coalesce()
        if snap_to_cinematic_shots:
            interval_collection = interval_collection.overlaps(shots)
    
    supercut_intervals = []
    for video_id in sorted(list(interval_collection.get_allintervals().keys())):
        intervallist = interval_collection.get_intervallist(video_id)
        for intrvl in intervallist.get_intervals():
            supercut_intervals.append((video_id, intrvl.get_start(), intrvl.get_end()))
            
    if limit is not None:
        return supercut_intervals[:limit * stride:stride]
    
    return supercut_intervals[::stride]

In [5]:
# output path
OUTPUT_PATH = '/app/result/supercut/acion_shots.mp4'

In [8]:
action_shots = query()

  1%|██▏                                                                                                                                                                                                     | 6/559 [00:00<00:10, 51.64it/s]

Getting brightness for 38570 intervals


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 559/559 [00:17<00:00, 32.58it/s]


Constructing 2 consecutive short shots
There are 530 videos with 2 consecutive short shots
Constructing 3 consecutive short shots
There are 370 videos with 3 consecutive short shots
Constructing 4 consecutive short shots
There are 224 videos with 4 consecutive short shots
Constructing 5 consecutive short shots
There are 103 videos with 5 consecutive short shots
Getting captions


No document for video id: 299
No document for video id: 321
No document for video id: 69
No document for video id: 65


In [10]:
stitch_video_temporal(generate_supercut_intervals(query(), limit=50), out_path=OUTPUT_PATH, width=1920, height=1080)

  1%|█▊                                                                                                                                                                                                      | 5/559 [00:00<00:12, 43.83it/s]

Getting brightness for 38570 intervals


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 559/559 [00:18<00:00, 30.28it/s]


Constructing 2 consecutive short shots
There are 530 videos with 2 consecutive short shots
Constructing 3 consecutive short shots
There are 370 videos with 3 consecutive short shots
Constructing 4 consecutive short shots
There are 224 videos with 4 consecutive short shots
Constructing 5 consecutive short shots
There are 103 videos with 5 consecutive short shots
Getting captions


No document for video id: 299
No document for video id: 321
No document for video id: 69
No document for video id: 65


HBox(children=(IntProgress(value=0, max=50), HTML(value='')))


