<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc" style="margin-top: 1em;"><ul class="toc-item"><li><span><a href="#Shot-Boundary-Eval" data-toc-modified-id="Shot-Boundary-Eval-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Shot Boundary Eval</a></span><ul class="toc-item"><li><span><a href="#Human-(Dan-and-David)-Annotated-Clips" data-toc-modified-id="Human-(Dan-and-David)-Annotated-Clips-1.1"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>Human (Dan and David) Annotated Clips</a></span></li><li><span><a href="#Microshots" data-toc-modified-id="Microshots-1.2"><span class="toc-item-num">1.2&nbsp;&nbsp;</span>Microshots</a></span><ul class="toc-item"><li><span><a href="#Shot-Boundaries" data-toc-modified-id="Shot-Boundaries-1.2.1"><span class="toc-item-num">1.2.1&nbsp;&nbsp;</span>Shot Boundaries</a></span></li><li><span><a href="#Incorporating-Motion-Flow" data-toc-modified-id="Incorporating-Motion-Flow-1.2.2"><span class="toc-item-num">1.2.2&nbsp;&nbsp;</span>Incorporating Motion Flow</a></span></li><li><span><a href="#Compute-which-shots-are-accurate/inaccurate" data-toc-modified-id="Compute-which-shots-are-accurate/inaccurate-1.2.3"><span class="toc-item-num">1.2.3&nbsp;&nbsp;</span>Compute which shots are accurate/inaccurate</a></span></li><li><span><a href="#Precision/Recall-across-the-three-clips" data-toc-modified-id="Precision/Recall-across-the-three-clips-1.2.4"><span class="toc-item-num">1.2.4&nbsp;&nbsp;</span>Precision/Recall across the three clips</a></span></li><li><span><a href="#Visualizing-Mistakes-and-Missed-Shot-Boundaries" data-toc-modified-id="Visualizing-Mistakes-and-Missed-Shot-Boundaries-1.2.5"><span class="toc-item-num">1.2.5&nbsp;&nbsp;</span>Visualizing Mistakes and Missed Shot Boundaries</a></span></li><li><span><a href="#Frames-at-Mistakes-and-Missed-Boundaries" data-toc-modified-id="Frames-at-Mistakes-and-Missed-Boundaries-1.2.6"><span class="toc-item-num">1.2.6&nbsp;&nbsp;</span>Frames at Mistakes and Missed Boundaries</a></span><ul class="toc-item"><li><span><a href="#Clip-1" data-toc-modified-id="Clip-1-1.2.6.1"><span class="toc-item-num">1.2.6.1&nbsp;&nbsp;</span>Clip 1</a></span></li><li><span><a href="#Clip-2" data-toc-modified-id="Clip-2-1.2.6.2"><span class="toc-item-num">1.2.6.2&nbsp;&nbsp;</span>Clip 2</a></span></li><li><span><a href="#Clip-3" data-toc-modified-id="Clip-3-1.2.6.3"><span class="toc-item-num">1.2.6.3&nbsp;&nbsp;</span>Clip 3</a></span></li></ul></li></ul></li></ul></li></ul></div>

In [None]:
import scannerpy
import scannertools as st
import numpy as np
from scipy.spatial import distance
from esper.prelude import *
from rekall.interval_list import IntervalList
from rekall.video_interval_collection import VideoIntervalCollection
from rekall.logical_predicates import *
from rekall.temporal_predicates import *
from rekall.payload_predicates import *
from rekall.list_predicates import *
from rekall.bbox_predicates import *
from rekall.spatial_predicates import *
from rekall.merge_ops import *

# Shot Boundary Eval
In this notebook we evaluate shot boundaries against human annotations.

In [None]:
video_id = 123

## Human (Dan and David) Annotated Clips
We manually annotated shot boundaries in three five-minute clips in the movie Mr. and Mrs. Smith (2005). The first clip is minutes 10-15, the second clip is minutes 60-65, and the third clip is minutes 90-95.

In [None]:
human_shots = IntervalList(
    [(shot.min_frame, shot.max_frame, shot.id)
     for shot in Shot.objects.filter(video_id=video_id, labeler_id=12).all()]
)

In [None]:
clips = human_shots.dilate(1).coalesce().dilate(-1)

In [None]:
for i, clip in enumerate(clips.get_intervals()):
    print('Clip {} starts at frame {} and ends at frame {}'.format(
        i+1, clip.get_start(), clip.get_end()
    ))

## Microshots
We use RGB color histograms to generate candidate microshots by computing the difference between color histograms and detecting outliers in the difference. Then we take out any shot boundaries that are fewer than ten frames after another shot boundary.

In [None]:
db = scannerpy.Database()

In [None]:
videos = Video.objects.filter(id=123).all()
hists = st.histograms.compute_histograms(
    db,
    videos=[video.for_scannertools() for video in videos]
)

# Do simple outlier detection to find boundaries between shots
WINDOW_SIZE=500
hists_list = [hist for hist in hists[0].load()]
diffs = np.array([
    np.mean([distance.chebyshev(hists_list[i - 1][j], hists_list[i][j]) for j in range(3)])
    for i in range(1, len(hists_list))
])
diffs = np.insert(diffs, 0, 0)
n = len(diffs)
boundaries = []
for i in range(1, n):
    window = diffs[max(i - WINDOW_SIZE, 0):min(i + WINDOW_SIZE, n)]
    if diffs[i] - np.mean(window) > 2.5 * np.std(window):
        boundaries.append(i)
        
video = videos[0]
frames = list(range(0, video.num_frames, int(round(video.fps) / 2)))
frames_set = set(frames)
frames_set = frames_set.union(set(boundaries))
frames_set = frames_set.union(set([boundary - 1 for boundary in boundaries if boundary > 0]))
frames = sorted(list(frames_set))

faces = st.face_detection.detect_faces(
    db,
    videos=[video.for_scannertools()],
    frames=[frames]
)

faces_per_frames = IntervalList([
    (frame, frame, facelist)
    for frame, facelist in zip(frames, faces[0].load())
])

transitions = IntervalList([(boundary - 1, boundary, 0) for boundary in boundaries])

faces_at_boundaries = faces_per_frames.filter_against(
    transitions,
    predicate=overlaps()
).filter(payload_satisfies(length_at_least(1)))

# Get all transitions where there are faces before and after the transition
boundaries_with_faces = transitions.filter_against(
    faces_at_boundaries, predicate=starts_inv()
).filter_against(
    transitions.filter_against(faces_at_boundaries, predicate=finishes_inv()),
    predicate=equal()
)

boundaries_starting_faces = boundaries_with_faces.merge(
    faces_at_boundaries, predicate = starts_inv(),
    payload_merge_op = payload_second
)

boundaries_ending_faces = boundaries_with_faces.merge(
    faces_at_boundaries, predicate = finishes_inv(),
    payload_merge_op = payload_second
)

boundaries_transition_faces = boundaries_starting_faces.merge(
    boundaries_ending_faces, predicate=equal(),
    payload_merge_op = lambda starts_payload, finishes_payload: { 'starts': starts_payload, 'finishes': finishes_payload }
)

def similar_face_lists(faces):
    graph = {
        'nodes': [
            {
                'name': 'face{}'.format(idx),
                'predicates': [ position(face.x1, face.y1, face.x2, face.y2, epsilon=.05),
                              lambda face: face['score'] > 0.9 ]
            }
            for idx, face in enumerate(faces['starts'])
            if face.score > 0.9
        ],
        'edges': []
    }
    return scene_graph(graph, exact=True)([
        { 'x1': face.x1, 'y1': face.y1, 'x2': face.x2, 'y2': face.y2, 'score': face.score }
        for face in faces['finishes']
    ])
    
bad_boundaries = boundaries_transition_faces.filter(
    payload_satisfies(similar_face_lists)
)

In [None]:
def boundaries_to_shots_fold(acc, frame):
    if acc == []:
        return [frame.copy()]
    top = acc[-1]
    top.end = frame.start - 1
    if top.length() > 0:
        acc.append(frame.copy())
    else:
        top.end = frame.start
    return acc

def boundaries_to_shots(boundaries):
    boundaries = [0] + boundaries
    boundary_list = IntervalList([(boundary, boundary, 0) for boundary in boundaries])
    shots = boundary_list.fold_list(boundaries_to_shots_fold, [])
    
    return shots

In [None]:
microshots = boundaries_to_shots(boundaries)
short_microshots = microshots.filter_length(max_length=10)
shots = microshots.set_union(
    short_microshots.map(lambda i: (i.start, i.end + 1, i.payload)).coalesce()
).coalesce()

In [None]:
# Remove the bad boundaries we identified earlier
bad_shots = shots.filter_against(
    bad_boundaries.map(lambda i: (i.start+1, i.end, i.payload)),
    predicate=starts_inv()
)
shot_boundaries = shots.map(lambda i: (i.start, i.start, i.payload))
shot_boundaries_without_bad_shots = shot_boundaries.minus(bad_shots)
shots = shot_boundaries_without_bad_shots.fold_list(boundaries_to_shots_fold, [])

### Shot Boundaries
For most of our precision/recall analysis, we're interested in shot boundaries, so we convert the `shots` and `human_shots` arrays to boundaries.

In [None]:
shot_boundaries = shots.map(lambda i: (i.start, i.start, i.payload))
human_shot_boundaries = human_shots.map(lambda i: (i.start, i.start, i.payload))

### Incorporating Motion Flow
If you want to incorporate motion flow into the shot detector, uncomment and run the three cells below.

In [None]:
# Uncomment and run this cell to get our best go using motion flow histograms too
# import scannertools as st
# import scannerpy
# video = Video.objects.get(id=video_id)
# frames = [list(range(0, video.num_frames))]

# db = scannerpy.Database()

# flow_histograms = st.histograms.compute_flow_histograms(
#     db,
#     videos=[video.for_scannertools()],
#     frames=frames)

# avg_magnitudes = [
#     np.sum([i * bin_num for bin_num, i in enumerate(hist[0])]) /
#     np.sum(hist[0])
#     for hist in flow_histograms[0].load()
# ]

# avg_magnitudes_list = IntervalList([(frame, frame, mag) for frame, mag in enumerate(avg_magnitudes)])

# def window(intervallist, n):
#     from rekall.logical_predicates import or_pred
#     from rekall.temporal_predicates import before, after
#     from rekall.merge_ops import payload_plus
    
#     def my_merge_op(interval1, interval2):
#         return [(interval1.start, interval1.end, [{
#             'interval1': interval1.payload,
#             'interval2': interval2.payload
#         }])]
    
#     return intervallist.join(
#         intervallist, merge_op=my_merge_op,
#         predicate=or_pred(before(max_dist=n), after(max_dist=n), arity=2),
#         working_window=n
#     ).coalesce(payload_merge_op=payload_plus)

# avg_magnitudes_windows = window(avg_magnitudes_list, 5)

# window_means_stds = avg_magnitudes_windows.map(
#     lambda interval: (interval.start, interval.end, {
#         'mean_mag': np.mean([p['interval2'] for p in interval.payload]),
#         'std_mag': np.std([p['interval2'] for p in interval.payload]),
#         'my_mag': interval.payload[0]['interval1']
#     }))

In [None]:
# Generate microshot boundary candidates
# flow_microshot_boundaries = window_means_stds.filter(
#     payload_satisfies(lambda payload: 
#                       (payload['my_mag'] - payload['mean_mag'] > 2 * payload['std_mag']))).map(
#     lambda intrvl: (intrvl.start + 1, intrvl.end + 1, intrvl.payload)
# )
# heavy_flow_microshot_boundaries = window_means_stds.filter(
#     payload_satisfies(lambda payload: payload['my_mag'] - payload['mean_mag'] > 3 * payload['std_mag'])).map(
#     lambda intrvl: (intrvl.start + 1, intrvl.end + 1, intrvl.payload)
# )

# def boundaries_to_shots_fold(acc, frame):
#     if acc == []:
#         return [frame.copy()]
#     top = acc[-1]
#     top.end = frame.start - 1
#     if top.length() > 0:
#         acc.append(frame.copy())
#     else:
#         top.end = frame.start
#     return acc

# # Generate new shots

# # rgb_and_flow_microshot_boundaries = shot_boundaries.set_union(heavy_flow_microshot_boundaries)
# # rgb_and_flow_microshot_boundaries = shot_boundaries.overlaps(flow_microshot_boundaries)
# rgb_and_flow_microshot_boundaries = shot_boundaries.overlaps(
#     flow_microshot_boundaries).set_union(heavy_flow_microshot_boundaries)
# rgb_and_flow_microshots = rgb_and_flow_microshot_boundaries.fold_list(boundaries_to_shots_fold, [])
# short_rgb_and_flow = rgb_and_flow_microshots.filter_length(max_length=10)
# rgb_and_flow_shots = rgb_and_flow_microshots.set_union(
#     short_microshots.map(lambda i: (i.start, i.end + 1, i.payload)).coalesce()
# ).coalesce()

In [None]:
#shot_boundaries = rgb_and_flow_shots.map(lambda i: (i.start, i.start, i.payload))

### Compute which shots are accurate/inaccurate

In [None]:
accurate_shot_boundaries = shot_boundaries.filter_against(human_shot_boundaries, predicate=overlaps())
inaccurate_shot_boundaries = shot_boundaries.minus(accurate_shot_boundaries)

found_human_shot_boundaries = human_shot_boundaries.filter_against(shot_boundaries, predicate=overlaps())
missed_human_shot_boundaries = human_shot_boundaries.minus(found_human_shot_boundaries)

### Precision/Recall across the three clips
Let's graph precision/recall numbers for our three clips.

In [None]:
data = []
for i, clip in enumerate(clips.get_intervals()):
    def filter_fn(intrvl):
        return overlaps()(intrvl, clip)
    
    precision = accurate_shot_boundaries.filter(filter_fn).size() / shot_boundaries.filter(filter_fn).size()
    recall = found_human_shot_boundaries.filter(filter_fn).size() / human_shot_boundaries.filter(filter_fn).size()
    data.append((precision, recall, 'Clip {}'.format(i + 1)))
    
    print("Total human shot boundaries in Clip {}: {}".format(
        i+1, human_shot_boundaries.filter(filter_fn).size()))
    
precisions = [precision for precision, _, _ in data]
recalls = [recall for _, recall, _ in data]
names = [name for _, _, name in data]
N = len(names)

ax = plt.gca()

width = 0.35
ind = np.arange(N)
p1 = ax.bar(ind, precisions, width)
p2 = ax.bar(ind + width, recalls, width)

ax.set_title('Precision and Recall by clip')
ax.set_xticks(ind + width / 2)
ax.set_xticklabels(names)
ax.set_ylim((0, 1))

ax.legend((p1[0], p2[0]), ('Precision', 'Recall'))

def autolabel(rects):
    """
    Attach a text label above each bar displaying its height
    """
    for rect in rects:
        height = rect.get_height()
        ax.text(rect.get_x() + rect.get_width()/2., 1.05*height,
                '%f' % height,
                ha='center', va='bottom')

autolabel(p1)
autolabel(p2)

plt.show()

### Visualizing Mistakes and Missed Shot Boundaries
Let's visualize where the mistakes happen.

In [None]:
for i, clip in enumerate(clips.get_intervals()):
    ax = plt.gca()
    ax.set_title("Clip {}".format(i + 1))
    ax.scatter([intrvl.get_start() for intrvl in shot_boundaries.get_intervals()],
               [2.2 for i in range(0, shot_boundaries.size())],
               label='Histogram shot boundaries')
    ax.scatter([intrvl.get_start() for intrvl in inaccurate_shot_boundaries.get_intervals()],
               [2.1 for i in range(0, inaccurate_shot_boundaries.size())],
               label='Inaccurate histogram shot boundaries')
    ax.scatter([intrvl.get_start() for intrvl in missed_human_shot_boundaries.get_intervals()],
               [2 for i in range(0, missed_human_shot_boundaries.size())],
               label='Missed human shot boundaries')
    ax.scatter([intrvl.get_start() for intrvl in human_shot_boundaries.get_intervals()],
               [1.9 for i in range(0, human_shot_boundaries.size())],
               label='All human shot boundaries')
    ax.set_ylim(0, 4.0)
    ax.set_xlim(int(math.floor(clip.get_start() / 100.0)) * 100, 
               int(math.ceil(clip.get_end() / 100.0)) * 100)
    ax.set_xlabel('frame number')
    ax.legend()
    plt.show()

### Frames at Mistakes and Missed Boundaries
Let's look at the frames where the histogram shot boundary detector made mistakes or where it missed a shot boundary.

In [None]:
def visualize_frames(video_id, boundaries, clip):
    frame_nums = [
        intrvl.get_start()
        for intrvl in boundaries.filter(lambda intrvl: overlaps()(intrvl, clip)).get_intervals()
    ]
    
    from esper.stdlib import simple_result
    materialized_result = []
    for frame_num in frame_nums:
        materialized_result.append({
            'video': video_id,
            'min_frame': frame_num,
            'objects': []
        })
    return simple_result(materialized_result, 'frames')

In [None]:
clip1 = clips.get_intervals()[0]
clip2 = clips.get_intervals()[1]
clip3 = clips.get_intervals()[2]

#### Clip 1

In [None]:
# Inaccuracies in clip 1
print("Inaccurate shot boundaries in Clip 1")
esper_widget(visualize_frames(video_id, inaccurate_shot_boundaries, clip1))

In [None]:
# Missed shot boundaries in clip 1
print("Missed shot boundaries in Clip 1")
esper_widget(visualize_frames(video_id, missed_human_shot_boundaries, clip1))

#### Clip 2

In [None]:
# Inaccuracies in clip 2
print("Inaccurate shot boundaries in Clip 2")
esper_widget(visualize_frames(video_id, inaccurate_shot_boundaries, clip2))

In [None]:
# Missed shot boundaries in clip 2
print("Missed shot boundaries in Clip 2")
esper_widget(visualize_frames(video_id, missed_human_shot_boundaries, clip2))

#### Clip 3

In [None]:
# Inaccuracies in clip 3
print("Inaccurate shot boundaries in Clip 3")
esper_widget(visualize_frames(video_id, inaccurate_shot_boundaries, clip3))

In [None]:
# Missed shot boundaries in clip 3
print("Missed shot boundaries in Clip 3")
esper_widget(visualize_frames(video_id, missed_human_shot_boundaries, clip3))