# Weak labels for timeseries model

To use Shiori's timeseries model for shot detection, we need to make the following changes to the label matrix:
* Each window of 16 frames is a single datapoint
* The windows are overlapping, so the first window is frames `[1, 2, ..., 16]`, second is `[9, 10, ..., 24]`, etc
* The output label matrix needs to put these frames/predictions sequentially:
  * `[1, 2, ..., 16, 9, 10, ..., 24, 17, 18, ..., 32, ...]`
  * So if the original label matrix of all the frames is `N x 5`, The output matrix is roughly `(2N) x 5`

In [32]:
import numpy as np
from scipy.sparse import csr_matrix
import pickle
import rekall
from rekall.video_interval_collection import VideoIntervalCollection
from rekall.interval_list import IntervalList
from rekall.temporal_predicates import *

# Load manually annotated data

In [17]:
with open('../../data/manually_annotated_shots.pkl', 'rb') as f:
    shots = VideoIntervalCollection(pickle.load(f))

In [18]:
with open('../../data/shot_detection_folds.pkl', 'rb') as f:
    shot_detection_folds = pickle.load(f)

In [19]:
clips = shots.dilate(1).coalesce().dilate(-1)

100%|██████████| 28/28 [00:00<00:00, 12072.42it/s]
100%|██████████| 28/28 [00:00<00:00, 49407.03it/s]


In [20]:
shot_boundaries = shots.map(
    lambda intrvl: (intrvl.start, intrvl.start, intrvl.payload)
).set_union(
    shots.map(lambda intrvl: (intrvl.end + 1, intrvl.end + 1, intrvl.payload))
).coalesce()

In [21]:
boundary_frames = {
    video_id: [
        intrvl.start
        for intrvl in shot_boundaries.get_intervallist(video_id).get_intervals()
    ]
    for video_id in shot_boundaries.get_allintervals()
}

In [22]:
video_ids = sorted(list(clips.get_allintervals().keys()))

In [23]:
frames_per_video = {
    video_id: sorted([
        f
        for interval in clips.get_intervallist(video_id).get_intervals()
        for f in range(interval.start, interval.end + 2)
    ])
    for video_id in video_ids
}

In [24]:
ground_truth = {
    video_id: [
        1 if f in boundary_frames[video_id] else 2
        for f in frames_per_video[video_id]
    ] 
    for video_id in video_ids
}

## Load label matrix with all the frames in it

In [4]:
with open('../../data/shot_detection_weak_labels/all_labels.pkl', 'rb') as f:
    weak_labels_all_movies = pickle.load(f)

## Load videos and number of frames per video

In [5]:
with open('../../data/frame_counts.pkl', 'rb') as f:
    frame_counts = pickle.load(f)

In [27]:
video_ids_all = sorted(list(frame_counts.keys()))

In [38]:
video_ids_train = sorted(list(set(video_ids_all).difference(set(video_ids))))

## Construct windows for each video

In [28]:
# First, construct windows of 16 frames for each video
windows = VideoIntervalCollection({
    video_id: [
        (f, f + 16, video_id)
        for f in range(0, frame_counts[video_id] - 16, 8)
    ]
    for video_id in video_ids_all
})

## Construct L/Y matrices for each video

In [37]:
%%time
# Construct L_dev
L_dev = csr_matrix([
    [lf_labels[video_id][f] for lf_labels in weak_labels_all_movies]
    for video_id in video_ids
    for intrvl in windows.filter_against(
        clips, predicate=overlaps(), working_window=1
    ).get_intervallist(video_id).get_intervals()
    for f in range(intrvl.start, intrvl.end)
])

CPU times: user 15.6 s, sys: 208 ms, total: 15.9 s
Wall time: 15.8 s


In [None]:
%%time
# Construct Y_dev? Not sure what to do

In [42]:
%%time
# Construct L_train
L_train = csr_matrix([
    [lf_labels[video_id][f] for lf_labels in weak_labels_all_movies]
    for video_id in video_ids_train
    for intrvl in windows.get_intervallist(video_id).get_intervals()
    for f in range(intrvl.start, intrvl.end)
])

CPU times: user 5min 50s, sys: 30 s, total: 6min 20s
Wall time: 6min 19s
