# Supercut Template
A notebook to copy to make supercuts from. Subject to change based on experience.

In [84]:
from esper.supercuts import *
from query.models import Shot
from rekall.temporal_predicates import overlaps

In [63]:
from rekall.video_interval_collection import VideoIntervalCollection
from rekall.merge_ops import payload_plus
from rekall.payload_predicates import payload_satisfies
from rekall.temporal_predicates import overlaps
from rekall.parsers import in_array, merge_dict_parsers, bbox_payload_parser, dict_payload_parser
from esper.caption_metadata import caption_metadata_for_video
from esper.captions import get_all_segments
videos = Video.objects.filter(id__in=[20]).all()
    
# Load script data
metadata = VideoIntervalCollection({
    video.id: caption_metadata_for_video(video.id)
    for video in videos
}).filter(
    lambda meta_interval: (meta_interval.payload['speaker'] is not None and
                            "man's voice" not in meta_interval.payload['speaker'] and
                            meta_interval.payload['speaker'].strip() != "gene krantz")
)
# Annotate face rows with start and end frames and the video ID
faces_with_character_actor_qs = FaceCharacterActor.objects.annotate(
    min_frame=F('face__frame__number'),
    max_frame=F('face__frame__number'),
    video_id=F('face__frame__video_id'),
    character_name=F('characteractor__character__name')
).filter(video_id__in=[v.id for v in videos])

frames_with_identity = VideoIntervalCollection.from_django_qs(
    faces_with_character_actor_qs,
    with_payload=in_array(
        dict_payload_parser(VideoIntervalCollection.django_accessor, { 'character': 'character_name' }),
    )
).coalesce(payload_merge_op=payload_plus)

In [52]:
Video.objects.get(id=20).name

'batman returns'

In [64]:
frames_with_identity.get_intervallist(20)

[<Interval start:1416 end:1416 payload:[{'character': 'max shreck'}]>, <Interval start:1428 end:1428 payload:[{'character': 'max shreck'}]>, <Interval start:1440 end:1440 payload:[{'character': 'max shreck'}]>, <Interval start:1452 end:1452 payload:[{'character': 'max shreck'}]>, <Interval start:10764 end:10764 payload:[{'character': 'catwoman / selina'}]>, <Interval start:10776 end:10776 payload:[{'character': 'catwoman / selina'}]>, <Interval start:10788 end:10788 payload:[{'character': 'catwoman / selina'}]>, <Interval start:10800 end:10800 payload:[{'character': 'catwoman / selina'}]>, <Interval start:10812 end:10812 payload:[{'character': 'catwoman / selina'}]>, <Interval start:10824 end:10824 payload:[{'character': 'catwoman / selina'}]>, <Interval start:10836 end:10836 payload:[{'character': 'catwoman / selina'}]>, <Interval start:10848 end:10848 payload:[{'character': 'catwoman / selina'}]>, <Interval start:10860 end:10860 payload:[{'character': 'catwoman / selina'}]>, <Interva

In [85]:
video_ids = [15]
# video_ids = [20]

In [86]:
def query():
    '''
    Write your query here. Below is a query for Harry, Ron, and Hermione in a row.
    This function should return a rekall VideoIntervalCollection.
    '''
    from rekall.video_interval_collection import VideoIntervalCollection
    from rekall.merge_ops import payload_plus
    from rekall.payload_predicates import payload_satisfies
    from rekall.temporal_predicates import overlaps
    from rekall.parsers import in_array, merge_dict_parsers, bbox_payload_parser, dict_payload_parser
    from esper.caption_metadata import caption_metadata_for_video
    from esper.captions import get_all_segments
    
    videos = Video.objects.filter(id__in=video_ids).all()
    
    # Load script data
    metadata = VideoIntervalCollection({
        video.id: caption_metadata_for_video(video.id)
        for video in videos
    }).filter(
        lambda meta_interval: (meta_interval.payload['speaker'] is not None and
                                "man's voice" not in meta_interval.payload['speaker'] and
                                meta_interval.payload['speaker'].strip() != "gene krantz")
    )
    
    all_segments = get_all_segments([video.id for video in videos])
    
    captions_interval_collection = VideoIntervalCollection({
        video: intervals
        for video, intervals in all_segments
    })
    
    captions_with_speaker_id = captions_interval_collection.overlaps(
        metadata.filter(payload_satisfies(lambda p: p['aligned'])),
        payload_merge_op=lambda word, script_meta: (word[0], script_meta['speaker'])
    )
    
    # Annotate face rows with start and end frames and the video ID
    faces_with_character_actor_qs = FaceCharacterActor.objects.annotate(
        min_frame=F('face__frame__number'),
        max_frame=F('face__frame__number'),
        video_id=F('face__frame__video_id'),
        character_name=F('characteractor__character__name')
    ).filter(video_id__in=[v.id for v in videos])

    frames_with_identity = VideoIntervalCollection.from_django_qs(
        faces_with_character_actor_qs,
        with_payload=in_array(
            dict_payload_parser(VideoIntervalCollection.django_accessor, { 'character': 'character_name' }),
        )
    ).coalesce(payload_merge_op=payload_plus)
    
    # Annotate shots with all the people in them
    shots_qs = Shot.objects.filter(
        cinematic=True, video_id__in=[v.id for v in videos]
    ).annotate(fps=F('video__fps'))
    shots = VideoIntervalCollection.from_django_qs(shots_qs, with_payload=lambda shot:shot.fps)

    # Annotate shots with mode shot scale
    frames_with_shot_scale_qs = Frame.objects.filter(
        regularly_sampled=True,
        video_id__in=[v.id for v in videos]
    ).annotate(
        min_frame=F('number'),
        max_frame=F('number'),
        shot_scale_name=F('shot_scale__name')
    ).all()
    frames_with_shot_scale = VideoIntervalCollection.from_django_qs(
        frames_with_shot_scale_qs,
        with_payload=lambda f: f.shot_scale_name
    )

    def get_mode(items):
        return max(set(items), key=items.count)
    shots_with_scale = shots.merge(
        frames_with_shot_scale,
        predicate=overlaps(),
        payload_merge_op=lambda shot_fps, shot_scale: [(shot_fps, shot_scale)]
    ).coalesce(
        payload_merge_op = payload_plus
    ).map(
        lambda intrvl: (intrvl.start, intrvl.end, {
            'fps': intrvl.payload[0][0],
            'shot_scale': get_mode([p[1] for p in intrvl.payload])
        })
    )

    shots_with_people_in_them = shots_with_scale.overlaps(
        frames_with_identity,
        payload_merge_op=lambda shot_payload, identities: (shot_payload, identities),
        working_window=1
    ).coalesce(payload_merge_op=lambda p1, p2: (p1[0], p1[1] + p2[1])).map(
        lambda intrvl: (intrvl.start / intrvl.payload[0]['fps'], intrvl.end / intrvl.payload[0]['fps'], {
            'fps': intrvl.payload[0]['fps'],
            'shot_scale': intrvl.payload[0]['shot_scale'],
            'characters': set([
                name.strip().split(' ')[0].strip()
                for d in intrvl.payload[1]
                for name in d['character'].split('/')
                if len(name.strip()) > 0
            ])
        })
    )

    reaction_shots = captions_with_speaker_id.overlaps(
        shots_with_people_in_them.filter(
            payload_satisfies(lambda p: p['shot_scale'] in ['medium_close_up', 'close_up', 'extreme_close_up'])
        ),
        predicate = lambda captions, shots: captions.payload[1].strip().split(' ')[0] not in shots.payload['characters'],
        payload_merge_op = lambda word_and_speaker, fps_and_characters: (fps_and_characters['fps'], word_and_speaker)
    ).map(
        lambda intrvl: (
            int(intrvl.start * intrvl.payload[0]), 
            int(intrvl.end * intrvl.payload[0]), 
            [intrvl.payload[1]]
        )
    ).dilate(12).coalesce(payload_merge_op=payload_plus).dilate(-12).filter_length(min_length=12)
    
    return reaction_shots

In [87]:
def generate_supercut_intervals(
    interval_collection,
    dilation=0,
    snap_to_cinematic_shots=False,
    limit=None,
    stride=1
):
    '''
    Generates supercut intervals for the supercut pipeline.
    Dilates interval_collection by dilation.
    If dilation > 0, snaps the dilations to cinematic shot boundaries.
    '''
    if dilation > 0:
        if snap_to_cinematic_shots:
            shots = VideoIntervalCollection.from_django_qs(
                Shot.objects.filter(
                    video_id__in=list(interval_collection.get_allintervals().keys()),
                    labeler__name="shot-hsvhist-face"
                )
            ).filter_against(interval_collection, predicate=overlaps())
        interval_collection = interval_collection.dilate(dilation).coalesce()
        if snap_to_cinematic_shots:
            interval_collection = interval_collection.overlaps(shots)
    
    supercut_intervals = []
    for video_id in sorted(list(interval_collection.get_allintervals().keys())):
        intervallist = interval_collection.get_intervallist(video_id)
        for intrvl in intervallist.get_intervals():
            supercut_intervals.append((video_id, intrvl.get_start(), intrvl.get_end()))
            
    if limit is not None:
        return supercut_intervals[:limit * stride:stride]
    
    return supercut_intervals[::stride]

In [88]:
# output path
OUTPUT_PATH = '/app/result/supercut/reaction_shots_apollo_13.mp4'

In [89]:
stitch_video_temporal(generate_supercut_intervals(query()), out_path=OUTPUT_PATH, width=1920, height=1080)

HBox(children=(IntProgress(value=0, max=119), HTML(value='')))