In [None]:
from esper.prelude import *

def get_fps_map(vids):
    from query.models import Video
    vs = Video.objects.filter(id__in=vids)
    return {v.id: v.fps for v in vs}

def frame_second_conversion(c, mode='f2s'):
    from rekall.domain_interval_collection import DomainIntervalCollection
    from rekall.interval_set_3d import Interval3D
    fps_map = get_fps_map(set(c.get_grouped_intervals().keys()))
    
    def second_to_frame(fps):
        def map_fn(intrvl):
            i2 = intrvl.copy()
            t1,t2 = intrvl.t
            i2.t = (int(t1*fps), int(t2*fps))
            return i2
        return map_fn
    
    def frame_to_second(fps):
        def map_fn(intrvl):
            i2 = intrvl.copy()
            t1,t2 = intrvl.t
            i2.t = (int(t1/fps), int(t2/fps))
            return i2
        return map_fn
    
    if mode=='f2s':
        fn = frame_to_second
    if mode=='s2f':
        fn = second_to_frame
    output = {}
    for vid, intervals in c.get_grouped_intervals().items():
        output[vid] = intervals.map(fn(fps_map[vid]))
    return DomainIntervalCollection(output)

def frame_to_second_collection(c):
    return frame_second_conversion(c, 'f2s')

def second_to_frame_collection(c):
    return frame_second_conversion(c, 's2f')

def convert_to_1d_collection(collection):
    from rekall.interval_list import Interval
    from rekall.video_interval_collection import VideoIntervalCollection
    video_map = collection.get_grouped_intervals()
    return VideoIntervalCollection({vid: [Interval(
        i.t[0], i.t[1], None) for i in video_map[vid].get_intervals()] for vid in video_map})

def display_result(collection_1d):
    from esper.rekall import intrvllists_to_result
    results = intrvllists_to_result(collection_1d.get_allintervals())
    return esper_widget(results,
            crop_bboxes=False, show_middle_frame=False, disable_captions=False,
            results_per_page=25, jupyter_keybindings=True)  

def topN(gen,n=25):
    from tqdm import tqdm_notebook as tqdm
    from rekall.runtime import disjoint_domain_combiner
    result = None
    count = 0
    with tqdm(total=n) as pbar:
        for collection in gen:
            delta = len(collection.get_grouped_intervals())
            pbar.update(delta)
            count += delta
            if result is None:
                result = collection
            else:
                result = disjoint_domain_combiner(result, collection)
            if count >= n:
                break
    return result

# time dimension in seconds
def get_commercial_intervals_in_vids(vids, in_seconds=True):
    from query.models import Commercial
    from rekall.domain_interval_collection import DomainIntervalCollection
    
    qs = Commercial.objects.filter(video_id__in=vids)
    commercials = DomainIntervalCollection.from_django_qs(qs)
    if in_seconds:
        return frame_to_second_collection(commercials)
    return commercials

# Interviews

In [None]:
GUEST_LIST = [name.lower() for name in ['Barack Obama', 'Donald Trump', 'Ted Cruz', 'John Kasich', 'Marco Rubio', 'Ben Carson', 'Jeb Bush',
'Jim Gilmore', 'Chris Christie', 'Carly Fiorina', 'Rick Santorum', 'Rand Paul', 'Mike Huckabee',
'Hillary Clinton', 'Bernie Sanders', 'Lincoln Chafee', 'Martin O’Malley', 'Jim Webb',
'Sarah Palin', 'John Boehner', 'Paul Ryan', 'Newt Gingrich','Nancy Pelosi','Elizabeth Warren', 'Mitch McConnell',
'Chuck Schumer','Harry Reid','Joe Biden', 'Kevin McCarthy', 'Steve Scalise', 'Bobby Jindal', 'John Cornyn',
'Dick Durbin','Orrin Hatch', 'Lindsey Graham', 'Mitt Romney', 'Michelle Obama' ,'Bill Clinton', 
'George W Bush', 'Tim Kaine' ]]
HOST_LIST = list(set([h.name for s in CanonicalShow.objects.exclude(hosts=None) for h in s.hosts.all()]))
VIDEOS = sorted([v.id for v in Video.objects.exclude(show__hosts=None)])

def get_name_to_labeler_id(names):
    from tqdm import tqdm
    def get_labeler_ids(n):
        from query.models import Labeler
        labeler_names = ['face-identity:'+n, 'face-identity-converted:'+n, 'face-identity-uncommon:'+n]
        return [l.id for l in Labeler.objects.filter(name__in=labeler_names)]
    output = {}
    for n in tqdm(names):
        output[n] = get_labeler_ids(n)
    return output

NAME_TO_LABELER_ID = get_name_to_labeler_id(GUEST_LIST+HOST_LIST)

def name_to_id(name):
    from query.models import Identity
    return Identity.objects.get(name=name).id

GUEST_IDS=[name_to_id(n) for n in GUEST_LIST]
HOST_IDS=[name_to_id(n) for n in HOST_LIST]

# time dimension in seconds
# Outputs a dictionary from name to video interval collection
def get_person_intervals_in_vids(person_names, vids, probability=0.7, min_height=None):
    from query.models import FaceIdentity
    from django.db.models import F,Q
    from rekall.domain_interval_collection import DomainIntervalCollection
    from rekall.interval_set_3d import Interval3D
    from rekall.interval_set_3d_utils import P
    
    SAMPLE_RATE = 3 # Every 3s

    lids = []
    for n in person_names:
        lids.extend(NAME_TO_LABELER_ID[n])
    
    face_id_qs = FaceIdentity.objects.filter(
        probability__gte=probability,
        face__frame__video_id__in=vids,
        face__frame__shot_boundary=False,
        labeler_id__in=lids,
    ).annotate(
        height=F('face__bbox_y2')-F('face__bbox_y1'),
        labeler_name=F('labeler__name'),
        video_id=F('face__frame__video_id'),
        frame_number=F('face__frame__number'),
        x1=F('face__bbox_x1'),
        x2=F('face__bbox_x2'),
        y1=F('face__bbox_y1'),
        y2=F('face__bbox_y2'),
    )
    if min_height is not None:
        face_id_qs = face_id_qs.filter(height__gte=min_height)
    
    faces = DomainIntervalCollection.from_django_qs(face_id_qs, {
        't1':'frame_number',
        't2':'frame_number',
        'x1':'x1','x2':'x2','y1':'y1','y2':'y2',
    }, with_payload=lambda row: row.labeler_name.split(':')[1], progress=False)

    fps_map = get_fps_map(set(faces.get_grouped_intervals().keys()))
    names_to_collection = {}
    for n in person_names:
        faces_one_person = faces.filter(P(lambda p: p==n))
        output = {}
        for vid, intervals in faces_one_person.get_grouped_intervals().items():
            fps = fps_map[vid]
            eps = round(fps * SAMPLE_RATE)
            output[vid] = intervals.temporal_coalesce(epsilon=eps)
        names_to_collection[n] = frame_to_second_collection(DomainIntervalCollection(output))
    return names_to_collection

# Returns interview_IS<person_only_IS<>, host_only_IS<>, person_with_host_IS<>>
def interview_query(guest, hosts, commercials):
    from rekall.interval_set_3d import Interval3D
    from rekall.interval_set_3d_utils import T, P, or_preds, overlap_bound
    from rekall.temporal_predicates import overlaps, before, after
    
    SEGMENT_LENGTH=30
    OVERLAP_LAX=60
    HOST_GUEST_GAP=120
    MIN_LENGTH=240
    SMALL_FACE_THRESHOLD=0.3
    MIN_GUEST_TIME_RATIO=0.35
    MAX_SMALL_GUEST_RATIO=0.7
    
    fuzzy_overlap = or_preds(overlaps(), before(max_dist=OVERLAP_LAX), after(max_dist=OVERLAP_LAX))
    
    interview_candidates = hosts.merge(guest, T(fuzzy_overlap), time_window=OVERLAP_LAX).temporal_coalesce()
    interviews = interview_candidates.temporal_coalesce(
        epsilon=HOST_GUEST_GAP
    ).filter_size(min_size=MIN_LENGTH
    ).minus(commercials
    ).filter_size(min_size=MIN_LENGTH)

    def select_second(p):
        return p[1]
    
    # Interview<Guest<height>>
    interview_with_guest = interviews.collect_by_interval(
        guest,
        T(overlaps()),
        filter_empty=True,
        time_window=0,
    ).map_payload(
        select_second)

    def total_time(intervals):
        return intervals.fold(lambda s, i: s+i.length(), 0)
    
    def filter_time(interview):
        guest = interview.payload
        small_guest = guest.filter_size(max_size=SMALL_FACE_THRESHOLD, axis='Y')
        small_guest_time = total_time(small_guest)
        total_guest_time = total_time(guest)
        segment_time = interview.length()
        return (total_guest_time / segment_time > MIN_GUEST_TIME_RATIO and
                small_guest_time / total_guest_time < MAX_SMALL_GUEST_RATIO)
    # Interview<Guest<height>>
    interviews = interview_with_guest.filter(filter_time)

    # Guest<height>
    guest_in_interviews = guest.filter_against(interviews, T(overlaps()), time_window=0)
    # HostAndGuest<(Host, Guest)>
    guest_with_host = guest_in_interviews.join(
        hosts,
        T(overlaps()),
        lambda guest, host: [Interval3D(overlap_bound(guest.t, host.t), payload=(guest, host))],
        time_window=0)
    guest_only = guest_in_interviews.minus(guest_with_host)

    hosts_in_interviews = hosts.filter_against(interviews, T(overlaps()), time_window=0)
    hosts_only = hosts_in_interviews.minus(guest_with_host)
    
    interview_with_metadata = interviews.collect_by_interval(
        guest_only,
        T(overlaps()),
        filter_empty=False,
        time_window=0
    ).map_payload(select_second).collect_by_interval(
        hosts_only,
        T(overlaps()),
        filter_empty=False,
        time_window=0
    ).collect_by_interval(
        guest_with_host,
        T(overlaps()),
        filter_empty=False,
        time_window=0
    ).map_payload(lambda p: (p[0][0],p[0][1],p[1]))
    
    return interview_with_metadata

def get_interviews_for_vids(vids):
    from rekall.domain_interval_collection import DomainIntervalCollection
    from tqdm import tqdm
    
    people_to_intervals = get_person_intervals_in_vids(HOST_LIST + GUEST_LIST, vids, 0.7,0.2)
    hosts = DomainIntervalCollection({})
    for host_name in HOST_LIST:
        hosts = hosts.union(people_to_intervals[host_name])
    commercials = get_commercial_intervals_in_vids(vids)
    ret = DomainIntervalCollection({})
    for guest_name in tqdm(GUEST_LIST):
        guest = people_to_intervals[guest_name]
        interviews = interview_query(guest, hosts, commercials)
        ret = ret.union(interviews)
    return ret       

## Run on a few videos

In [None]:
vids = VIDEOS[::10000]
answer = get_interviews_for_vids(vids)
display_result(convert_to_1d_collection(second_to_frame_collection(answer)))

## Run On All of TVNews

In [None]:
import ipyparallel as ipp
from esper.rekall_parallel import get_runtime_for_ipython_cluster
import pickle
c = ipp.Client(profile='local')
rt = get_runtime_for_ipython_cluster(c)

vids = VIDEOS[:10000]
answer,_ = rt.run(get_interviews_for_vids, vids, randomize=False, chunksize=20, progress=True)
# pickle.dump(answer, open('../data/interviews/interviews{0}-{1}.pkl'.format(vids[0],vids[-1]), 'wb'))

## Run with Streaming

In [None]:
import ipyparallel as ipp
from esper.rekall_parallel import get_runtime_for_ipython_cluster
import pickle
c = ipp.Client(profile='local')
rt = get_runtime_for_ipython_cluster(c)

answer = topN(rt.get_result_iterator(get_interviews_for_vids, VIDEOS, randomize=True),n=25)
display_result(convert_to_1d_collection(second_to_frame_collection(answer)))

# Faces in a Row

In [None]:
VIDEOS = sorted([v.id for v in Video.objects.all()])

def get_faces_in_a_row_for_vids(vids):
    from rekall.domain_interval_collection import DomainIntervalCollection
    from rekall.interval_set_3d_utils import P, XY, and_preds
    from rekall.bbox_predicates import left_of, same_value
    from tqdm import tqdm
    
    MIN_NUM_FACES = 10
    MIN_HEIGHT = 0.1
    EPSILON = 0.05
    
    qs = Face.objects.filter(frame__video_id__in=vids).annotate(
        video_id=F("frame__video_id"),
        min_frame=F("frame__number"),
        max_frame=F("frame__number") + 1,
        height=F('bbox_y2')-F('bbox_y1'),
    ).filter(height__gte=MIN_HEIGHT)
    faces = DomainIntervalCollection.from_django_qs(qs, DomainIntervalCollection.django_bbox_default_schema(),
                                            progress=True)
    def has_enough_faces(n):
        def pred(faces):
            return faces.size() >= n
        return pred
    
    def get_pattern(n):
        assert(n>1)
        constraints = []
        for i in range(n-1):
            name1 = str(i)
            name2 = str(i+1)
            constraints.append(([name1, name2],[XY(
                and_preds(
                    left_of(),
                    same_value('y1', epsilon=EPSILON),
                    same_value('y2', epsilon=EPSILON)))]))
        return constraints          
    
    def faces_aligned():
        def pred(faces):
            pattern = get_pattern(faces.size())
            return len(faces.match(pattern, exact=True)) > 0
        return pred
        
    commercials = get_commercial_intervals_in_vids(vids, in_seconds=False)
    
    aligned_faces_in_frames = faces.minus(commercials).group_by_time().filter(P(and_preds(
        has_enough_faces(MIN_NUM_FACES),
        faces_aligned())))
    
    return aligned_faces_in_frames

## Run on a few videos

In [None]:
vids = VIDEOS[::10003]
answer = get_faces_in_a_row_for_vids(vids)
display_result(convert_to_1d_collection(answer))

## Run On All of TVNews

In [None]:
import ipyparallel as ipp
from esper.rekall_parallel import get_runtime_for_ipython_cluster
import pickle
c = ipp.Client(profile='local')
rt = get_runtime_for_ipython_cluster(c)

vids = VIDEOS[4::100]
answer,_ = rt.run(get_faces_in_a_row_for_vids, vids, randomize=False, chunksize=15, progress=True)
print("Total results:", sum(answer.size().values()))

In [None]:
def filter_vids(c, vids=None):
    from rekall.video_interval_collection_3d import VideoIntervalCollection3D
    if vids is None or len(vids)==0:
        return c
    d = c.get_allintervals()
    ret = {}
    for v in vids:
        if v in d:
            ret[v] = d[v]
    return VideoIntervalCollection3D(ret)

display_result(convert_to_1d_collection(filter_vids(answer,[])))

# Donald Trump on All Channels

In [None]:
TRUMP_FACE_LABELER_IDS = get_name_to_labeler_id(['donald trump'])['donald trump']
def get_video_ids_for_dates(dates):
    assert(len(dates)>0)
    import datetime as dt
    from django.db.models import Q
    from query.models import Video
    
    one_day = dt.timedelta(days=1)
    f = None
    for d in dates:
        new_term = Q(time__gte=d) & Q(time__lt=d+one_day)
        if f is None:
            f = new_term
        else:
            f = f | new_term
    return [v.id for v in Video.objects.filter(duplicate=False, corrupted=False).filter(f)]
    
def get_donald_faces_on_dates(dates, probability=0.7, min_height=None):
    SAMPLING_RATE = 3.0
    
    from query.models import FaceIdentity
    from django.db.models import F, FloatField
    from django.db.models.functions import Cast
    from rekall.runtime import Runtime
    from rekall.domain_interval_collection import DomainIntervalCollection

    vids = get_video_ids_for_dates(dates)
    print("{0} videos found".format(len(vids)))
    
    def get_faces_for_vid(vs):
        face_id_qs = FaceIdentity.objects.filter(
            probability__gte=probability,
            face__frame__video_id__in=vs,
            face__frame__shot_boundary=False,
            labeler_id__in=TRUMP_FACE_LABELER_IDS,
        ).annotate(
            height=F('face__bbox_y2')-F('face__bbox_y1'),
            video_id=F('face__frame__video_id'),
            start=Cast(F('face__frame__number') / F('face__frame__video__fps'),
                     FloatField()),
            end=Cast(F('face__frame__number') / F('face__frame__video__fps') + SAMPLING_RATE,
                     FloatField()),
            x1=F('face__bbox_x1'),
            x2=F('face__bbox_x2'),
            y1=F('face__bbox_y1'),
            y2=F('face__bbox_y2')
        )
        if min_height is not None:
            face_id_qs = face_id_qs.filter(height__gte=min_height)
        
        faces = DomainIntervalCollection.from_django_qs(face_id_qs, {
            't1':'start',
            't2':'end',
            'x1':'x1','x2':'x2','y1':'y1','y2':'y2',
            }, progress=False)
        return faces
    if len(vids) == 0:
        return DomainIntervalCollection({})
    # Read from Django in small batches, otherwise it gets stuck
    faces,_ = Runtime.inline().run(get_faces_for_vid, vids, chunksize=10, progress=True)
    return faces

# Time dimension will be unix timestamp
# Outputs one IntervalSet3D<VideoID, ChannelName>
def convert_to_absolute_time_and_add_channel(collection):
    from query.models import Video
    from rekall.interval_set_3d import IntervalSet3D, Interval3D
    
    vids = collection.get_grouped_intervals().keys()
    vs = Video.objects.filter(id__in=vids)
    # Seconds since Unix Epoch
    start_time_map = {v.id: v.time.timestamp() for v in vs}
    channel_map = {v.id: v.channel.name for v in vs}
    
    faces = collection.add_domain_to_payload().get_flattened_intervalset()
    
    # Interval<VideoID, ChannelName>
    def convert(interval):
        vid = interval.payload[1]
        start = start_time_map[vid]
        channel = channel_map[vid]
        return Interval3D((interval.t[0]+start, interval.t[1]+start),
                          interval.x, interval.y, payload=(vid, channel))
    return faces.map(convert)

# intervals: IntervalSet<(VideoID,...)>, in absolute time
# Outputs a collection grouped by video id, in relative time (seconds)
def group_by_video_and_use_relative_time(intervals):
    from rekall.domain_interval_collection import DomainIntervalCollection
    from rekall.interval_set_3d import IntervalSet3D
    
    by_vids = DomainIntervalCollection.from_intervalset(intervals, lambda i: i.payload[0])
    vids = by_vids.get_grouped_intervals().keys()
    start_time_map = {v.id: v.time.timestamp() for v in Video.objects.filter(id__in=vids)}
    
    def convert_time(i):
        vid = i.payload[0]
        start = start_time_map[vid]
        j = i.copy()
        j.t = i.t[0]-start, i.t[1]-start
        return j
            
    return by_vids.map(convert_time)

# Returns Interval<Faces>
def donald_on_all_channels(dates):
    MIN_FACE_PROB = 0.7
    MIN_FACE_HEIGHT = 0.3
    MIN_NUM_CHANNELS = 3
    
    from rekall.temporal_predicates import overlaps
    from rekall.interval_set_3d import Interval3D, IntervalSet3D
    from rekall.interval_set_3d_utils import T, overlap_bound
    from rekall.domain_interval_collection import DomainIntervalCollection

    faces = get_donald_faces_on_dates(
        dates,
        probability=MIN_FACE_PROB,
        min_height=MIN_FACE_HEIGHT)
    print("got faces")
    
    # Face<VideoID, Channel>
    faces_with_channel = convert_to_absolute_time_and_add_channel(faces)
    print("converted to absolute time")
    
    faces_per_channel = DomainIntervalCollection.from_intervalset(faces_with_channel, lambda i: i.payload[1])
    
    if len(faces_per_channel.get_grouped_intervals()) < MIN_NUM_CHANNELS:
        return IntervalSet3D([])
    
    output = None
    for channel, faces in faces_per_channel.get_grouped_intervals().items():
        if output is None:
            output = faces.map(lambda i: Interval3D(i.t, payload=[i]))
        else:
            output = output.join(
                faces,
                T(overlaps()),
                lambda f1, f2: [
                    Interval3D(overlap_bound(f1.t,f2.t), payload=f1.payload + [f2])
                ],
                time_window=0,
            )
        print("{0} intervals found".format(output.size()))
    if output is None:
        return IntervalSet3D([])
    output = output.map_payload(lambda p: group_by_video_and_use_relative_time(IntervalSet3D(p)))
    return output



## run on a few dates

In [None]:
import datetime
NUM_DATES=1
dates = [datetime.date(2017,1,20)+ i*datetime.timedelta(days=1) for i in range(NUM_DATES)]
answer = donald_on_all_channels(dates)

display_result(convert_to_1d_collection(second_to_frame_collection(answer.get_intervals()[0].payload)))

## Run on one Year

In [None]:
YEAR = 2015
is_leap = YEAR % 4 == 0 and (YEAR % 100 != 0 or YEAR % 400 == 0)
start = datetime.date(YEAR,1,1)
delta = datetime.timedelta(days=1)
dates = [start + delta * i for i in range(366 if is_leap else 365)]

import ipyparallel as ipp
from esper.rekall_parallel import get_runtime_for_ipython_cluster
import pickle
c = ipp.Client(profile='local')
rt = get_runtime_for_ipython_cluster(c)

answer,_ = rt.run(donald_on_all_channels, dates, randomize=False, chunksize=10, progress=True)
print("Total results:", answer.size())

## Run on entire dataset

In [None]:
YEARS = range(2009,2019)
dates = []
for y in YEARS:
    is_leap = y % 4 == 0 and (y % 100 != 0 or y % 400 == 0)
    start = datetime.date(y,1,1)
    delta = datetime.timedelta(days=1)
    dates.extend([start + delta * i for i in range(366 if is_leap else 365)])

import ipyparallel as ipp
from esper.rekall_parallel import get_runtime_for_ipython_cluster
import pickle
c = ipp.Client(profile='local')
rt = get_runtime_for_ipython_cluster(c)

answer,_ = rt.run(donald_on_all_channels, dates, randomize=False, chunksize=5, progress=True)
print("Total results:", answer.size())

# Scratchpad

In [None]:
vids = [763, 3769, 5281, 8220, 9901, 12837, 13141, 26386, 33004, 33004, 34642, 38275, 42756, 50164, 50164, 50164, 50164, 50164, 50164, 50164, 52075, 52945, 54377, 54377, 59122, 59122, 59398, 59398, 59398, 59398]

answer = get_person_intervals_in_vids(HOST_LIST + GUEST_LIST, vids, 0.7,0.2)
display_result(convert_to_1d_collection(second_to_frame_collection(answer['bernie sanders'])))

In [None]:
display_result(convert_to_1d_collection(second_to_frame_collection(answer['jake tapper'])))

In [None]:
answer = get_interviews_for_vids(vids)

In [None]:
display_result(convert_to_1d_collection(second_to_frame_collection(answer)))

In [None]:
ls=[l.name for l in Labeler.objects.all() if l.name.startswith('face-identity:') or l.name.startswith('face-identity-converted:') or l.name.startswith('face-identity-uncommon:')]

In [None]:
ls=[l.split(':')[1] for l in ls]

In [None]:
for g in GUEST_LIST:
    if g not in ls:
        print(g)

In [None]:
for h in HOST_LIST:
    if h not in ls:
        print(h)

In [None]:
ls

In [None]:
sorted(HOST_LIST)

In [None]:
interviews = LabeledInterview.objects \
        .annotate(fps=F('video__fps')) \
        .annotate(min_frame=F('fps') * F('start')) \
        .annotate(max_frame=F('fps') * F('end')) \
        .filter(guest1="bernie sanders", original=True)
print([i.video.id for i in interviews])

In [None]:
len(vids)

In [None]:
answer.get_allintervals()[10]

In [None]:
answer

In [None]:
len(VIDEOS)

In [None]:
VIDEOS[:100]

In [None]:
Video.objects.count()

In [None]:
from rekall.video_interval_collection_3d import VideoIntervalCollection3D
vids = [188346]
people_to_intervals = get_person_intervals_in_vids(HOST_LIST + GUEST_LIST, vids, 0.7,0.2)
hosts = VideoIntervalCollection3D({})
for host_name in HOST_LIST:
    hosts = hosts.union(people_to_intervals[host_name])

In [None]:
display_result(convert_to_1d_collection(second_to_frame_collection(hosts)))

In [None]:
import pickle
a = pickle.load(open('../data/interviews/paper/interview_10y-all.pkl', 'rb'))

In [None]:
a['John Kasich'][188346]

In [None]:
VIDEOS.index(188346)

In [None]:
people_to_intervals['john kasich'].get_allintervals()[188346]

In [None]:
Video.objects.get(id=188346).fps * 3

In [None]:
def get_person_intervals_in_vids_frames(person_names, vids, probability=0.7, min_height=None):
    from query.models import FaceIdentity
    from django.db.models import F,Q
    from rekall.video_interval_collection_3d import VideoIntervalCollection3D
    from rekall.interval_set_3d import Interval3D
    from rekall.interval_set_3d_utils import P
    
    SAMPLE_RATE = 3 # Every 3s

    lids = []
    for n in person_names:
        lids.extend(NAME_TO_LABELER_ID[n])
    
    face_id_qs = FaceIdentity.objects.filter(
        probability__gte=probability,
        face__frame__video_id__in=vids,
        face__frame__shot_boundary=False,
        labeler_id__in=lids,
    ).annotate(
        height=F('face__bbox_y2')-F('face__bbox_y1'),
        labeler_name=F('labeler__name'),
        video_id=F('face__frame__video_id'),
        frame_number=F('face__frame__number'),
        x1=F('face__bbox_x1'),
        x2=F('face__bbox_x2'),
        y1=F('face__bbox_y1'),
        y2=F('face__bbox_y2'),
    )
    if min_height is not None:
        face_id_qs = face_id_qs.filter(height__gte=min_height)
    
    total = face_id_qs.count()
    faces = VideoIntervalCollection3D.from_django_qs(face_id_qs, {
        't1':'frame_number',
        't2':'frame_number',
        'x1':'x1','x2':'x2','y1':'y1','y2':'y2',
    }, with_payload=lambda row: row.labeler_name.split(':')[1], progress=True, total=total)

    fps_map = get_fps_map(set(faces.get_allintervals().keys()))
    names_to_collection = {}
    for n in person_names:
        faces_one_person = faces.filter(P(lambda p: p==n))
        output = {}
        for vid, intervals in faces_one_person.get_allintervals().items():
            fps = fps_map[vid]
            eps = fps * SAMPLE_RATE
            output[vid] = intervals.temporal_coalesce(epsilon=eps)
        names_to_collection[n] = VideoIntervalCollection3D(output)
    return names_to_collection
jk = get_person_intervals_in_vids_frames(['john kasich'], [188346], probability=0.7, min_height=0.2)
jk['john kasich'].get_allintervals()


In [None]:
len(answer.get_allintervals())

In [None]:
answer.get_allintervals()[42341].map_payload(lambda _:None)

In [None]:
answer.get_allintervals()[42341].split(lambda i:i.payload[2])

In [None]:
len(vids)

In [None]:
keys = answer.get_allintervals().keys()
[(k, answer.get_allintervals()[k].)]

In [None]:
def discard(p):
    return None
frame_to_second_collection(answer).get_allintervals()[128907].map_payload(discard)

In [None]:
Video.objects.get(id=1).time.timestamp()

In [None]:
import datetime
date = datetime.date(2016,12,16)
vs=Video.objects.filter(time__gte=date, time__lt=date+datetime.timedelta(days=1), duplicate=False, corrupted=False)
vs.count()

In [None]:
[v.time for v in vs.filter(channel_id=1).order_by('time')]

In [None]:
Labeler.objects.filter(name__contains='donald trump')

In [None]:
TRUMP_FACE_LABELER_ID

In [None]:
FaceIdentity.objects.filter(labeler_id=TRUMP_FACE_LABELER_ID, face__frame__video_id)

In [None]:
vids = list(range(10))
probability=0.7
face_id_qs = FaceIdentity.objects.filter(
    probability__gte=probability,
    face__frame__video_id__in=vids,
    face__frame__shot_boundary=False,
    labeler_id__in=[419],
).annotate(
    height=F('face__bbox_y2')-F('face__bbox_y1'),
    labeler_name=F('labeler__name'),
    video_id=F('face__frame__video_id'),
    start=Cast(F('face__frame__number') / F('face__frame__video__fps'),
                 FloatField()),
    end=F('start') + 3.0,
    x1=F('face__bbox_x1'),
    x2=F('face__bbox_x2'),
    y1=F('face__bbox_y1'),
    y2=F('face__bbox_y2'),
    time=F('face__frame__video__time')
).filter(height__gte=0.4)
face_id_qs[0].face.frame.video

In [None]:
date

In [None]:
answer.get_intervals()[0]

In [None]:
answer.get_intervals()[0].payload

In [None]:
sorted(answer.get_allintervals().keys())

In [None]:
answer.get_allintervals()['2009-10-28']

In [None]:
display_result(convert_to_1d_collection(second_to_frame_collection(sort_by_video(answer.get_intervals()[1].payload))))