In [1]:
import sys
sys.path.append('/lfs/1/danfu/metal')
import metal

In [2]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [3]:
import pickle
import rekall
from rekall.video_interval_collection import VideoIntervalCollection
from rekall.interval_list import IntervalList
from rekall.temporal_predicates import *
import numpy as np
from scipy.sparse import csr_matrix
import os
from tqdm import tqdm
import random

from metal.analysis import lf_summary
from metal.label_model.baselines import MajorityLabelVoter
from metal.label_model import LabelModel

# Load Shot Data

In [4]:
with open('../../data/shot_detection_folds.pkl', 'rb') as f:
    shot_detection_folds = pickle.load(f)

In [5]:
with open('../../data/manually_annotated_shots.pkl', 'rb') as f:
    shots = VideoIntervalCollection(pickle.load(f))

In [6]:
clips = shots.dilate(1).coalesce().dilate(-1)

100%|██████████| 28/28 [00:00<00:00, 12147.34it/s]
100%|██████████| 28/28 [00:00<00:00, 38874.71it/s]


In [7]:
shot_boundaries = shots.map(
    lambda intrvl: (intrvl.start, intrvl.start, intrvl.payload)
).set_union(
    shots.map(lambda intrvl: (intrvl.end + 1, intrvl.end + 1, intrvl.payload))
).coalesce()

In [8]:
boundary_frames = {
    video_id: [
        intrvl.start
        for intrvl in shot_boundaries.get_intervallist(video_id).get_intervals()
    ]
    for video_id in shot_boundaries.get_allintervals()
}

In [9]:
video_ids = sorted(list(clips.get_allintervals().keys()))

In [10]:
frames_per_video = {
    video_id: sorted([
        f
        for interval in clips.get_intervallist(video_id).get_intervals()
        for f in range(interval.start, interval.end + 2)
    ])
    for video_id in video_ids
}

In [11]:
ground_truth = {
    video_id: [
        1 if f in boundary_frames[video_id] else 2
        for f in frames_per_video[video_id]
    ] 
    for video_id in video_ids
}

# Load Weak Labels

In [12]:
with open('../../data/frame_counts.pkl', 'rb') as f:
    frame_counts = pickle.load(f)

In [13]:
labeling_function_folders = [
    '../../data/shot_detection_weak_labels/rgb_hists_high_pre',
    '../../data/shot_detection_weak_labels/hsv_hists_high_pre',
#     '../../data/shot_detection_weak_labels/flow_hists_magnitude', # this is just really really bad
    '../../data/shot_detection_weak_labels/flow_hists_diffs_high_pre',
    '../../data/shot_detection_weak_labels/face_counts',
    '../../data/shot_detection_weak_labels/face_positions'
]

In [14]:
# Or load weak labels
with open('../../data/shot_detection_weak_labels/all_labels_high_pre.pkl', 'rb') as f:
    weak_labels_all_movies = pickle.load(f)

In [15]:
weak_labels_gt_only = [
    {
        video_id: [
            lf[video_id][f-1]
            for f in frames_per_video[video_id]
        ]
        for video_id in sorted(list(clips.get_allintervals().keys()))
    }
    for lf in weak_labels_all_movies
]

# Train LabelModel

In [17]:
video_ids_all = sorted(list(frame_counts.keys()))

In [18]:
train_movies_all = sorted(list(set(video_ids_all).difference(set(clips.get_allintervals().keys()))))

In [19]:
test_videos = sorted(list(clips.get_allintervals().keys()))

## Frame Based

In [20]:
L_train_everything = csr_matrix([
    [
        label
        for video_id in train_movies_all
        for label in lf[video_id]
    ]
    for lf in weak_labels_all_movies
]).transpose()

Y_test = np.array([
    label
    for video_id in test_videos
    for label in ground_truth[video_id]
])
L_test = csr_matrix([
    [
        label
        for video_id in test_videos
        for label in lf[video_id]
    ]
    for lf in weak_labels_gt_only
]).transpose()

In [21]:
MajorityLabelVoter(seed=123).score((L_test, Y_test), metric=['accuracy','precision', 'recall', 'f1'])

Accuracy: 0.968
Precision: 0.248
Recall: 0.920
F1: 0.391
        y=1    y=2   
 l=1    600   1818   
 l=2    52    55683  


[0.9678434474575688,
 0.24813895781637718,
 0.9202453987730062,
 0.3908794788273615]

In [22]:
label_model_everything = LabelModel(k=2, seed=123)
label_model_everything.train_model(L_train_everything, class_balance=(0.01, 0.99), n_epochs=5000, log_train_every=50)

Computing O...
Estimating \mu...
[50 epo]: TRAIN:[loss=0.077]
[100 epo]: TRAIN:[loss=0.063]
[150 epo]: TRAIN:[loss=0.063]
[200 epo]: TRAIN:[loss=0.063]
[250 epo]: TRAIN:[loss=0.063]
[300 epo]: TRAIN:[loss=0.063]
[350 epo]: TRAIN:[loss=0.063]
[400 epo]: TRAIN:[loss=0.063]
[450 epo]: TRAIN:[loss=0.063]
[500 epo]: TRAIN:[loss=0.063]
[550 epo]: TRAIN:[loss=0.063]
[600 epo]: TRAIN:[loss=0.063]
[650 epo]: TRAIN:[loss=0.062]
[700 epo]: TRAIN:[loss=0.062]
[750 epo]: TRAIN:[loss=0.062]
[800 epo]: TRAIN:[loss=0.062]
[850 epo]: TRAIN:[loss=0.062]
[900 epo]: TRAIN:[loss=0.062]
[950 epo]: TRAIN:[loss=0.062]
[1000 epo]: TRAIN:[loss=0.062]
[1050 epo]: TRAIN:[loss=0.062]
[1100 epo]: TRAIN:[loss=0.062]
[1150 epo]: TRAIN:[loss=0.062]
[1200 epo]: TRAIN:[loss=0.062]
[1250 epo]: TRAIN:[loss=0.062]
[1300 epo]: TRAIN:[loss=0.062]
[1350 epo]: TRAIN:[loss=0.062]
[1400 epo]: TRAIN:[loss=0.062]
[1450 epo]: TRAIN:[loss=0.062]
[1500 epo]: TRAIN:[loss=0.062]
[1550 epo]: TRAIN:[loss=0.062]
[1600 epo]: TRAIN:[loss=0.

In [23]:
label_model_everything.score((L_test, Y_test), metric=['accuracy','precision', 'recall', 'f1'])

Accuracy: 0.996
Precision: 0.891
Recall: 0.741
F1: 0.809
        y=1    y=2   
 l=1    483    59    
 l=2    169   57442  


[0.9960793080322597, 0.8911439114391144, 0.74079754601227, 0.8090452261306532]

### Predict on everything

In [24]:
L_everything_frame = csr_matrix([
    [
        label
        for video_id in sorted(list(video_ids_all))
        for label in lf[video_id]
    ]
    for lf in weak_labels_all_movies
]).transpose()

KeyboardInterrupt: 

In [None]:
frame_predictions_everything = label_model_everything.predict_proba(L_everything_frame)

In [None]:
video_frame_nums = [
    (video_id, f+1)
    for video_id in sorted(list(video_ids_all))
    for f in range(frame_counts[video_id])
]

In [None]:
predictions_to_save = [
    (frame_info, prediction.tolist())
    for frame_info, prediction in zip(video_frame_nums, frame_predictions_everything)
]

In [None]:
# save predictions to disk
with open('../../data/shot_detection_weak_labels/noisy_labels_all_frame_high_pre.npy', 'wb') as f:
    np.save(f, preds_np)

## Window Based

In [25]:
# First, construct windows of 16 frames for each video
windows_train = VideoIntervalCollection({
    video_id: [
        (f, f + 16, video_id)
        for f in range(0, frame_counts[video_id] - 16, 8)
    ]
    for video_id in train_movies_all
})

In [26]:
windows_test = VideoIntervalCollection({
    video_id: [
        (f, f + 16, video_id)
        for f in range(0, frame_counts[video_id] - 16, 8)
    ]
    for video_id in test_videos
})

In [27]:
# Next, intersect the windows with ground truth and get ground truth labels for the windows
windows_intersecting_ground_truth = windows_test.filter_against(
    clips,
    predicate=overlaps()
).map(lambda intrvl: (intrvl.start, intrvl.end, 2))
windows_with_shot_boundaries = windows_intersecting_ground_truth.filter_against(
    shot_boundaries,
    predicate = lambda window, shot_boundary:
        shot_boundary.start >= window.start and shot_boundary.start < window.end
).map(
    lambda intrvl: (intrvl.start, intrvl.end, 1)
)
windows_with_labels = windows_with_shot_boundaries.set_union(
    windows_intersecting_ground_truth
).coalesce(
    predicate = equal(),
    payload_merge_op = lambda p1, p2: min(p1, p2)
)

In [28]:
# Label windows with the weak labels in our labeling functions
def label_window(per_frame_weak_labels):
    if 1 in per_frame_weak_labels:
        return 1
    if len([l for l in per_frame_weak_labels if l == 2]) >= len(per_frame_weak_labels) / 2:
        return 2
    return 0

windows_with_weak_labels_train = windows_train.map(
    lambda window: (
        window.start,
        window.end,
        [
            label_window([
                lf[window.payload][f-1]
                for f in range(window.start, window.end)
            ])
            for lf in weak_labels_all_movies
        ]
    )
)

In [29]:
windows_with_weak_labels_test = windows_test.map(
    lambda window: (
        window.start,
        window.end,
        [
            label_window([
                lf[window.payload][f-1]
                for f in range(window.start, window.end)
            ])
            for lf in weak_labels_all_movies
        ]
    )
)

In [30]:
windows_with_weak_labels_gt_only = windows_with_weak_labels_test.filter_against(
    clips, predicate=overlaps(), working_window=1
)

In [31]:
windows_with_weak_labels_all = windows_with_weak_labels_train.set_union(
    windows_with_weak_labels_test
)

In [32]:
Y_test_windows = np.array([
    intrvl.payload
    for video_id in test_videos
    for intrvl in windows_with_labels.get_intervallist(video_id).get_intervals()
])
L_test_windows = csr_matrix([
    intrvl.payload
    for video_id in test_videos
    for intrvl in windows_with_weak_labels_gt_only.get_intervallist(video_id).get_intervals()
])

In [42]:
L_train_windows_all = csr_matrix([
    intrvl.payload
    for video_id in train_movies_all
    for intrvl in windows_with_weak_labels_train.get_intervallist(video_id).get_intervals()
])

In [34]:
label_model_everything_windows = LabelModel(k=2, seed=123)
label_model_everything_windows.train_model(L_train_windows_all, Y_dev = Y_test_windows,
                                           n_epochs=100000, log_train_every=1000)

Computing O...
Estimating \mu...
[1000 epo]: TRAIN:[loss=0.048]
[2000 epo]: TRAIN:[loss=0.048]
[3000 epo]: TRAIN:[loss=0.048]
[4000 epo]: TRAIN:[loss=0.048]
[5000 epo]: TRAIN:[loss=0.048]
[6000 epo]: TRAIN:[loss=0.048]
[7000 epo]: TRAIN:[loss=0.048]
[8000 epo]: TRAIN:[loss=0.048]
[9000 epo]: TRAIN:[loss=0.048]
[10000 epo]: TRAIN:[loss=0.048]
[11000 epo]: TRAIN:[loss=0.048]
[12000 epo]: TRAIN:[loss=0.048]
[13000 epo]: TRAIN:[loss=0.048]
[14000 epo]: TRAIN:[loss=0.048]
[15000 epo]: TRAIN:[loss=0.048]
[16000 epo]: TRAIN:[loss=0.048]
[17000 epo]: TRAIN:[loss=0.048]
[18000 epo]: TRAIN:[loss=0.048]
[19000 epo]: TRAIN:[loss=0.048]
[20000 epo]: TRAIN:[loss=0.048]
[21000 epo]: TRAIN:[loss=0.048]
[22000 epo]: TRAIN:[loss=0.048]
[23000 epo]: TRAIN:[loss=0.048]
[24000 epo]: TRAIN:[loss=0.048]
[25000 epo]: TRAIN:[loss=0.048]
[26000 epo]: TRAIN:[loss=0.048]
[27000 epo]: TRAIN:[loss=0.048]
[28000 epo]: TRAIN:[loss=0.048]
[29000 epo]: TRAIN:[loss=0.048]
[30000 epo]: TRAIN:[loss=0.048]
[31000 epo]: TRA

In [35]:
label_model_everything_windows.score((L_test_windows, Y_test_windows),
                                     metric=['accuracy','precision', 'recall', 'f1'])

Accuracy: 0.938
Precision: 0.856
Recall: 0.780
F1: 0.816
        y=1    y=2   
 l=1   1006    169   
 l=2    284   5867   


[0.9381654381654382,
 0.8561702127659574,
 0.7798449612403101,
 0.8162271805273833]

In [51]:
for row in L_test_windows.todense()[:500]:
    print(row)

[[1 1 1 2 2]]
[[1 1 1 0 0]]
[[2 2 1 0 0]]
[[2 2 1 0 0]]
[[2 2 2 0 0]]
[[2 2 2 0 0]]
[[2 2 2 0 0]]
[[2 2 2 0 0]]
[[2 2 2 0 0]]
[[1 1 1 0 0]]
[[1 1 1 0 0]]
[[2 2 2 0 0]]
[[2 2 2 0 0]]
[[2 2 2 0 0]]
[[2 2 1 0 0]]
[[2 2 1 0 0]]
[[2 2 2 0 0]]
[[2 2 2 0 0]]
[[2 2 2 0 0]]
[[2 2 2 0 0]]
[[1 1 2 0 0]]
[[1 1 2 0 0]]
[[2 2 2 0 0]]
[[2 2 2 0 0]]
[[1 1 1 0 0]]
[[1 1 1 0 0]]
[[2 2 2 0 0]]
[[2 2 2 0 0]]
[[2 2 2 0 0]]
[[2 2 2 0 0]]
[[1 1 1 0 0]]
[[1 1 1 0 0]]
[[2 2 2 0 0]]
[[2 2 2 0 0]]
[[2 2 2 0 0]]
[[2 2 2 0 0]]
[[2 2 2 0 0]]
[[2 2 2 0 0]]
[[2 2 2 0 0]]
[[2 2 2 0 0]]
[[2 2 2 0 0]]
[[1 2 1 0 0]]
[[1 2 1 0 0]]
[[2 2 2 0 0]]
[[1 1 1 0 0]]
[[1 1 1 0 0]]
[[2 2 2 0 0]]
[[2 2 2 0 0]]
[[1 1 1 0 0]]
[[1 1 1 0 0]]
[[2 2 2 0 0]]
[[1 1 1 0 0]]
[[1 1 1 0 0]]
[[2 2 2 0 0]]
[[2 1 1 0 1]]
[[2 1 1 2 1]]
[[2 2 2 2 1]]
[[2 2 2 2 1]]
[[2 2 2 0 1]]
[[2 2 2 0 0]]
[[2 2 2 0 0]]
[[2 2 2 0 0]]
[[2 2 2 2 2]]
[[2 2 2 2 1]]
[[2 2 2 2 1]]
[[2 2 2 2 1]]
[[2 2 2 0 0]]
[[2 2 2 0 0]]
[[2 2 2 0 0]]
[[2 2 2 0 0]]
[[2 2 2 0 0]]
[[2 2 

## Tune the Training

In [36]:
from metal.tuners.random_tuner import RandomSearchTuner
#label_model_everything_windows_tuned = LabelModel(k=2, seed=123)
random_tuner = RandomSearchTuner(LabelModel, seed=123, validation_metric='f1')

In [37]:
search_space = {
    'seed' : [123],
    'n_epochs': list(range(1000, 20000, 1000)),
    'lr': {'range': [1e-5, .1], 'scale': 'log'},
    'l2': {'range': [1e-5, .1], 'scale': 'log'},
    'log_train_every': [1000],
    'class_balance': [
        (i * .1, 1 - i * .1)
        for i in range(1, 10)
    ]
#     'Y_dev': [Y_test_windows]
}

In [38]:
best_random_model = random_tuner.search(search_space,
                                (L_test_windows, Y_test_windows),
                               train_args= [L_train_windows_all],
                               train_kwargs = {
#                                    'Y_dev': Y_test_windows
#                                    'class_balance': (0.2, 0.8)
                               },
                               init_kwargs={
                                   'k': 2
                               }, verbose=True)

[0] Testing {'seed': 123, 'n_epochs': 14000, 'log_train_every': 1000, 'class_balance': (0.5, 0.5), 'lr': 3.205382306597884e-05, 'l2': 0.07938131059226453}
Computing O...
Estimating \mu...
[1000 epo]: TRAIN:[loss=0.213]
[2000 epo]: TRAIN:[loss=0.125]
[3000 epo]: TRAIN:[loss=0.113]
[4000 epo]: TRAIN:[loss=0.105]
[5000 epo]: TRAIN:[loss=0.098]
[6000 epo]: TRAIN:[loss=0.093]
[7000 epo]: TRAIN:[loss=0.087]
[8000 epo]: TRAIN:[loss=0.083]
[9000 epo]: TRAIN:[loss=0.079]
[10000 epo]: TRAIN:[loss=0.076]
[11000 epo]: TRAIN:[loss=0.073]
[12000 epo]: TRAIN:[loss=0.070]
[13000 epo]: TRAIN:[loss=0.068]
[14000 epo]: TRAIN:[loss=0.066]
Finished Training
[1] Testing {'seed': 123, 'n_epochs': 3000, 'log_train_every': 1000, 'class_balance': (0.1, 0.9), 'lr': 0.0011706146276222952, 'l2': 0.0032996699441735027}
Computing O...
Estimating \mu...
[1000 epo]: TRAIN:[loss=0.054]
[2000 epo]: TRAIN:[loss=0.049]
[3000 epo]: TRAIN:[loss=0.048]
Finished Training
[2] Testing {'seed': 123, 'n_epochs': 8000, 'log_train_

[13] Testing {'seed': 123, 'n_epochs': 1000, 'log_train_every': 1000, 'class_balance': (0.4, 0.6), 'lr': 0.00012218152840205883, 'l2': 0.0010597561541543527}
Computing O...
Estimating \mu...
[1000 epo]: TRAIN:[loss=0.098]
Finished Training
[14] Testing {'seed': 123, 'n_epochs': 13000, 'log_train_every': 1000, 'class_balance': (0.9, 0.09999999999999998), 'lr': 6.502617448607791e-05, 'l2': 0.00019419091457447823}
Computing O...
Estimating \mu...
[1000 epo]: TRAIN:[loss=0.118]
[2000 epo]: TRAIN:[loss=0.115]
[3000 epo]: TRAIN:[loss=0.115]
[4000 epo]: TRAIN:[loss=0.114]
[5000 epo]: TRAIN:[loss=0.114]
[6000 epo]: TRAIN:[loss=0.114]
[7000 epo]: TRAIN:[loss=0.113]
[8000 epo]: TRAIN:[loss=0.113]
[9000 epo]: TRAIN:[loss=0.112]
[10000 epo]: TRAIN:[loss=0.112]
[11000 epo]: TRAIN:[loss=0.112]
[12000 epo]: TRAIN:[loss=0.111]
[13000 epo]: TRAIN:[loss=0.111]
Finished Training
[15] Testing {'seed': 123, 'n_epochs': 11000, 'log_train_every': 1000, 'class_balance': (0.1, 0.9), 'lr': 0.0003914539897032439

Computing O...
Estimating \mu...
[1000 epo]: TRAIN:[loss=0.048]
[2000 epo]: TRAIN:[loss=0.048]
[3000 epo]: TRAIN:[loss=0.048]
[4000 epo]: TRAIN:[loss=0.048]
[5000 epo]: TRAIN:[loss=0.048]
[6000 epo]: TRAIN:[loss=0.048]
[7000 epo]: TRAIN:[loss=0.048]
[8000 epo]: TRAIN:[loss=0.048]
[9000 epo]: TRAIN:[loss=0.048]
[10000 epo]: TRAIN:[loss=0.048]
[11000 epo]: TRAIN:[loss=0.048]
[12000 epo]: TRAIN:[loss=0.048]
[13000 epo]: TRAIN:[loss=0.048]
[14000 epo]: TRAIN:[loss=0.048]
[15000 epo]: TRAIN:[loss=0.048]
Finished Training
[27] Testing {'seed': 123, 'n_epochs': 2000, 'log_train_every': 1000, 'class_balance': (0.7000000000000001, 0.29999999999999993), 'lr': 0.0003830480553627742, 'l2': 7.881324848230943e-05}
Computing O...
Estimating \mu...
[1000 epo]: TRAIN:[loss=0.072]
[2000 epo]: TRAIN:[loss=0.054]
Finished Training
[28] Testing {'seed': 123, 'n_epochs': 13000, 'log_train_every': 1000, 'class_balance': (0.6000000000000001, 0.3999999999999999), 'lr': 0.0010116498292228329, 'l2': 3.1946620152

Computing O...
Estimating \mu...
[1000 epo]: TRAIN:[loss=0.064]
[2000 epo]: TRAIN:[loss=0.051]
[3000 epo]: TRAIN:[loss=0.049]
[4000 epo]: TRAIN:[loss=0.048]
[5000 epo]: TRAIN:[loss=0.048]
[6000 epo]: TRAIN:[loss=0.048]
[7000 epo]: TRAIN:[loss=0.048]
[8000 epo]: TRAIN:[loss=0.048]
[9000 epo]: TRAIN:[loss=0.048]
[10000 epo]: TRAIN:[loss=0.048]
[11000 epo]: TRAIN:[loss=0.048]
[12000 epo]: TRAIN:[loss=0.048]
[13000 epo]: TRAIN:[loss=0.048]
[14000 epo]: TRAIN:[loss=0.048]
[15000 epo]: TRAIN:[loss=0.048]
[16000 epo]: TRAIN:[loss=0.048]
[17000 epo]: TRAIN:[loss=0.048]
[18000 epo]: TRAIN:[loss=0.048]
Finished Training
[40] Testing {'seed': 123, 'n_epochs': 16000, 'log_train_every': 1000, 'class_balance': (0.4, 0.6), 'lr': 0.0207697343098668, 'l2': 5.00627511985727e-05}
Computing O...
Estimating \mu...
[1000 epo]: TRAIN:[loss=0.048]
[2000 epo]: TRAIN:[loss=0.048]
[3000 epo]: TRAIN:[loss=0.048]
[4000 epo]: TRAIN:[loss=0.048]
[5000 epo]: TRAIN:[loss=0.048]
[6000 epo]: TRAIN:[loss=0.048]
[7000 epo

Computing O...
Estimating \mu...
[1000 epo]: TRAIN:[loss=0.111]
[2000 epo]: TRAIN:[loss=0.089]
[3000 epo]: TRAIN:[loss=0.075]
[4000 epo]: TRAIN:[loss=0.065]
[5000 epo]: TRAIN:[loss=0.059]
[6000 epo]: TRAIN:[loss=0.055]
[7000 epo]: TRAIN:[loss=0.053]
[8000 epo]: TRAIN:[loss=0.051]
[9000 epo]: TRAIN:[loss=0.050]
[10000 epo]: TRAIN:[loss=0.050]
[11000 epo]: TRAIN:[loss=0.049]
[12000 epo]: TRAIN:[loss=0.049]
[13000 epo]: TRAIN:[loss=0.048]
Finished Training
[51] Testing {'seed': 123, 'n_epochs': 2000, 'log_train_every': 1000, 'class_balance': (0.8, 0.19999999999999996), 'lr': 0.0010092139536042226, 'l2': 0.041865162672497444}
Computing O...
Estimating \mu...
[1000 epo]: TRAIN:[loss=0.074]
[2000 epo]: TRAIN:[loss=0.055]
Finished Training
[52] Testing {'seed': 123, 'n_epochs': 15000, 'log_train_every': 1000, 'class_balance': (0.5, 0.5), 'lr': 0.0001259172092579234, 'l2': 0.06598094345516514}
Computing O...
Estimating \mu...
[1000 epo]: TRAIN:[loss=0.105]
[2000 epo]: TRAIN:[loss=0.083]
[3000 

[15000 epo]: TRAIN:[loss=0.048]
[16000 epo]: TRAIN:[loss=0.048]
[17000 epo]: TRAIN:[loss=0.048]
[18000 epo]: TRAIN:[loss=0.048]
Finished Training
[63] Testing {'seed': 123, 'n_epochs': 3000, 'log_train_every': 1000, 'class_balance': (0.2, 0.8), 'lr': 0.00035156327173305435, 'l2': 0.00564881347364476}
Computing O...
Estimating \mu...
[1000 epo]: TRAIN:[loss=0.073]
[2000 epo]: TRAIN:[loss=0.058]
[3000 epo]: TRAIN:[loss=0.052]
Finished Training
[64] Testing {'seed': 123, 'n_epochs': 17000, 'log_train_every': 1000, 'class_balance': (0.5, 0.5), 'lr': 1.7805024567544152e-05, 'l2': 0.0001031449096631051}
Computing O...
Estimating \mu...
[1000 epo]: TRAIN:[loss=0.583]
[2000 epo]: TRAIN:[loss=0.181]
[3000 epo]: TRAIN:[loss=0.130]
[4000 epo]: TRAIN:[loss=0.120]
[5000 epo]: TRAIN:[loss=0.114]
[6000 epo]: TRAIN:[loss=0.108]
[7000 epo]: TRAIN:[loss=0.104]
[8000 epo]: TRAIN:[loss=0.100]
[9000 epo]: TRAIN:[loss=0.096]
[10000 epo]: TRAIN:[loss=0.092]
[11000 epo]: TRAIN:[loss=0.089]
[12000 epo]: TRAIN:

[6000 epo]: TRAIN:[loss=0.048]
[7000 epo]: TRAIN:[loss=0.048]
[8000 epo]: TRAIN:[loss=0.048]
[9000 epo]: TRAIN:[loss=0.048]
[10000 epo]: TRAIN:[loss=0.048]
[11000 epo]: TRAIN:[loss=0.048]
[12000 epo]: TRAIN:[loss=0.048]
[13000 epo]: TRAIN:[loss=0.048]
[14000 epo]: TRAIN:[loss=0.048]
Finished Training
[76] Testing {'seed': 123, 'n_epochs': 6000, 'log_train_every': 1000, 'class_balance': (0.2, 0.8), 'lr': 0.05722278972677177, 'l2': 0.0004223299007678817}
Computing O...
Estimating \mu...
[1000 epo]: TRAIN:[loss=0.048]
[2000 epo]: TRAIN:[loss=0.048]
[3000 epo]: TRAIN:[loss=0.048]
[4000 epo]: TRAIN:[loss=0.048]
[5000 epo]: TRAIN:[loss=0.048]
[6000 epo]: TRAIN:[loss=0.048]
Finished Training
[77] Testing {'seed': 123, 'n_epochs': 16000, 'log_train_every': 1000, 'class_balance': (0.7000000000000001, 0.29999999999999993), 'lr': 0.014657879938286433, 'l2': 2.7783835304337104e-05}
Computing O...
Estimating \mu...
[1000 epo]: TRAIN:[loss=0.048]
[2000 epo]: TRAIN:[loss=0.048]
[3000 epo]: TRAIN:[los

Computing O...
Estimating \mu...
[1000 epo]: TRAIN:[loss=0.789]
[2000 epo]: TRAIN:[loss=0.161]
[3000 epo]: TRAIN:[loss=0.109]
[4000 epo]: TRAIN:[loss=0.101]
[5000 epo]: TRAIN:[loss=0.097]
Finished Training
[90] Testing {'seed': 123, 'n_epochs': 3000, 'log_train_every': 1000, 'class_balance': (0.4, 0.6), 'lr': 0.019372566873631984, 'l2': 7.047651826940335e-05}
Computing O...
Estimating \mu...
[1000 epo]: TRAIN:[loss=0.048]
[2000 epo]: TRAIN:[loss=0.048]
[3000 epo]: TRAIN:[loss=0.048]
Finished Training
[91] Testing {'seed': 123, 'n_epochs': 4000, 'log_train_every': 1000, 'class_balance': (0.7000000000000001, 0.29999999999999993), 'lr': 1.2701797040380528e-05, 'l2': 0.00027933879336273284}
Computing O...
Estimating \mu...
[1000 epo]: TRAIN:[loss=1.886]
[2000 epo]: TRAIN:[loss=0.938]
[3000 epo]: TRAIN:[loss=0.408]
[4000 epo]: TRAIN:[loss=0.200]
Finished Training
[92] Testing {'seed': 123, 'n_epochs': 10000, 'log_train_every': 1000, 'class_balance': (0.4, 0.6), 'lr': 4.087885261759692e-05, 

[4000 epo]: TRAIN:[loss=0.120]
[5000 epo]: TRAIN:[loss=0.120]
[6000 epo]: TRAIN:[loss=0.120]
Finished Training
[103] Testing {'seed': 123, 'n_epochs': 13000, 'log_train_every': 1000, 'class_balance': (0.1, 0.9), 'lr': 2.6479378336275887e-05, 'l2': 0.020835404488834464}
Computing O...
Estimating \mu...
[1000 epo]: TRAIN:[loss=0.105]
[2000 epo]: TRAIN:[loss=0.079]
[3000 epo]: TRAIN:[loss=0.078]
[4000 epo]: TRAIN:[loss=0.077]
[5000 epo]: TRAIN:[loss=0.076]
[6000 epo]: TRAIN:[loss=0.076]
[7000 epo]: TRAIN:[loss=0.075]
[8000 epo]: TRAIN:[loss=0.074]
[9000 epo]: TRAIN:[loss=0.073]
[10000 epo]: TRAIN:[loss=0.072]
[11000 epo]: TRAIN:[loss=0.072]
[12000 epo]: TRAIN:[loss=0.071]
[13000 epo]: TRAIN:[loss=0.070]
Finished Training
[104] Testing {'seed': 123, 'n_epochs': 8000, 'log_train_every': 1000, 'class_balance': (0.4, 0.6), 'lr': 0.01028108568744907, 'l2': 0.0023512169304270077}
Computing O...
Estimating \mu...
[1000 epo]: TRAIN:[loss=0.048]
[2000 epo]: TRAIN:[loss=0.048]
[3000 epo]: TRAIN:[lo

[5000 epo]: TRAIN:[loss=0.090]
[6000 epo]: TRAIN:[loss=0.088]
[7000 epo]: TRAIN:[loss=0.087]
Finished Training
[114] Testing {'seed': 123, 'n_epochs': 8000, 'log_train_every': 1000, 'class_balance': (0.30000000000000004, 0.7), 'lr': 0.0007003874658021285, 'l2': 2.2969386970650398e-05}
Computing O...
Estimating \mu...
[1000 epo]: TRAIN:[loss=0.055]
[2000 epo]: TRAIN:[loss=0.049]
[3000 epo]: TRAIN:[loss=0.048]
[4000 epo]: TRAIN:[loss=0.048]
[5000 epo]: TRAIN:[loss=0.048]
[6000 epo]: TRAIN:[loss=0.048]
[7000 epo]: TRAIN:[loss=0.048]
[8000 epo]: TRAIN:[loss=0.048]
Finished Training
[115] Testing {'seed': 123, 'n_epochs': 10000, 'log_train_every': 1000, 'class_balance': (0.2, 0.8), 'lr': 0.004714096150812326, 'l2': 0.0002727292150283849}
Computing O...
Estimating \mu...
[1000 epo]: TRAIN:[loss=0.048]
[2000 epo]: TRAIN:[loss=0.048]
[3000 epo]: TRAIN:[loss=0.048]
[4000 epo]: TRAIN:[loss=0.048]
[5000 epo]: TRAIN:[loss=0.048]
[6000 epo]: TRAIN:[loss=0.048]
[7000 epo]: TRAIN:[loss=0.048]
[8000 e

Computing O...
Estimating \mu...
[1000 epo]: TRAIN:[loss=0.048]
[2000 epo]: TRAIN:[loss=0.048]
[3000 epo]: TRAIN:[loss=0.048]
[4000 epo]: TRAIN:[loss=0.048]
[5000 epo]: TRAIN:[loss=0.048]
[6000 epo]: TRAIN:[loss=0.048]
[7000 epo]: TRAIN:[loss=0.048]
[8000 epo]: TRAIN:[loss=0.048]
[9000 epo]: TRAIN:[loss=0.048]
Finished Training
[126] Testing {'seed': 123, 'n_epochs': 12000, 'log_train_every': 1000, 'class_balance': (0.7000000000000001, 0.29999999999999993), 'lr': 0.011740394135690729, 'l2': 0.027440437176223496}
Computing O...
Estimating \mu...
[1000 epo]: TRAIN:[loss=0.049]
[2000 epo]: TRAIN:[loss=0.049]
[3000 epo]: TRAIN:[loss=0.049]
[4000 epo]: TRAIN:[loss=0.049]
[5000 epo]: TRAIN:[loss=0.049]
[6000 epo]: TRAIN:[loss=0.049]
[7000 epo]: TRAIN:[loss=0.049]
[8000 epo]: TRAIN:[loss=0.049]
[9000 epo]: TRAIN:[loss=0.049]
[10000 epo]: TRAIN:[loss=0.049]
[11000 epo]: TRAIN:[loss=0.049]
[12000 epo]: TRAIN:[loss=0.049]
Finished Training
[127] Testing {'seed': 123, 'n_epochs': 6000, 'log_train

[3000 epo]: TRAIN:[loss=0.048]
[4000 epo]: TRAIN:[loss=0.048]
[5000 epo]: TRAIN:[loss=0.048]
[6000 epo]: TRAIN:[loss=0.048]
[7000 epo]: TRAIN:[loss=0.048]
[8000 epo]: TRAIN:[loss=0.048]
[9000 epo]: TRAIN:[loss=0.048]
Finished Training
[137] Testing {'seed': 123, 'n_epochs': 1000, 'log_train_every': 1000, 'class_balance': (0.6000000000000001, 0.3999999999999999), 'lr': 0.00242839494996605, 'l2': 0.009600758437598087}
Computing O...
Estimating \mu...
[1000 epo]: TRAIN:[loss=0.048]
Finished Training
[138] Testing {'seed': 123, 'n_epochs': 4000, 'log_train_every': 1000, 'class_balance': (0.6000000000000001, 0.3999999999999999), 'lr': 6.235938593136109e-05, 'l2': 0.003464353557005218}
Computing O...
Estimating \mu...
[1000 epo]: TRAIN:[loss=0.132]
[2000 epo]: TRAIN:[loss=0.104]
[3000 epo]: TRAIN:[loss=0.091]
[4000 epo]: TRAIN:[loss=0.081]
Finished Training
[139] Testing {'seed': 123, 'n_epochs': 5000, 'log_train_every': 1000, 'class_balance': (0.1, 0.9), 'lr': 1.916169542475139e-05, 'l2': 6

[2000 epo]: TRAIN:[loss=0.050]
[3000 epo]: TRAIN:[loss=0.048]
[4000 epo]: TRAIN:[loss=0.048]
[5000 epo]: TRAIN:[loss=0.048]
Finished Training
[152] Testing {'seed': 123, 'n_epochs': 7000, 'log_train_every': 1000, 'class_balance': (0.9, 0.09999999999999998), 'lr': 0.0011958536911892395, 'l2': 0.0016400853726943386}
Computing O...
Estimating \mu...
[1000 epo]: TRAIN:[loss=0.106]
[2000 epo]: TRAIN:[loss=0.092]
[3000 epo]: TRAIN:[loss=0.084]
[4000 epo]: TRAIN:[loss=0.081]
[5000 epo]: TRAIN:[loss=0.080]
[6000 epo]: TRAIN:[loss=0.078]
[7000 epo]: TRAIN:[loss=0.073]
Finished Training
[153] Testing {'seed': 123, 'n_epochs': 18000, 'log_train_every': 1000, 'class_balance': (0.8, 0.19999999999999996), 'lr': 3.002700744491432e-05, 'l2': 0.0013795543080363612}
Computing O...
Estimating \mu...
[1000 epo]: TRAIN:[loss=0.987]
[2000 epo]: TRAIN:[loss=0.134]
[3000 epo]: TRAIN:[loss=0.107]
[4000 epo]: TRAIN:[loss=0.105]
[5000 epo]: TRAIN:[loss=0.104]
[6000 epo]: TRAIN:[loss=0.103]
[7000 epo]: TRAIN:[los

Computing O...
Estimating \mu...
[1000 epo]: TRAIN:[loss=0.731]
[2000 epo]: TRAIN:[loss=0.118]
[3000 epo]: TRAIN:[loss=0.106]
[4000 epo]: TRAIN:[loss=0.105]
[5000 epo]: TRAIN:[loss=0.103]
[6000 epo]: TRAIN:[loss=0.102]
[7000 epo]: TRAIN:[loss=0.101]
[8000 epo]: TRAIN:[loss=0.100]
[9000 epo]: TRAIN:[loss=0.099]
[10000 epo]: TRAIN:[loss=0.097]
[11000 epo]: TRAIN:[loss=0.096]
Finished Training
[164] Testing {'seed': 123, 'n_epochs': 2000, 'log_train_every': 1000, 'class_balance': (0.1, 0.9), 'lr': 6.896414905691217e-05, 'l2': 0.04603152787966457}
Computing O...
Estimating \mu...
[1000 epo]: TRAIN:[loss=0.080]
[2000 epo]: TRAIN:[loss=0.077]
Finished Training
[165] Testing {'seed': 123, 'n_epochs': 4000, 'log_train_every': 1000, 'class_balance': (0.1, 0.9), 'lr': 0.017009446974337213, 'l2': 3.300421246076478e-05}
Computing O...
Estimating \mu...
[1000 epo]: TRAIN:[loss=0.048]
[2000 epo]: TRAIN:[loss=0.048]
[3000 epo]: TRAIN:[loss=0.048]
[4000 epo]: TRAIN:[loss=0.048]
Finished Training
[166]

In [39]:
best_random_model.save('tuned_label_model_high_pre.pkl')

## Predict on everything

In [41]:
L_everything_windows_all = csr_matrix([
    intrvl.payload
    for video_id in sorted(list(video_ids_all))
    for intrvl in windows_with_weak_labels_all.get_intervallist(video_id).get_intervals()
])

In [43]:
window_predictions_everything = label_model_everything_windows.predict_proba(L_everything_windows_all)

In [44]:
window_nums = [
    (video_id, intrvl.start, intrvl.end)
    for video_id in sorted(list(video_ids_all))
    for intrvl in windows_with_weak_labels_all.get_intervallist(video_id).get_intervals()
]

In [45]:
predictions_to_save_windows = [
    (window_info, prediction)
    for window_info, prediction in zip(window_nums, window_predictions_everything)
]

In [46]:
preds_np_windows = np.array(predictions_to_save_windows)

In [47]:
# save predictions to disk
with open('../../data/shot_detection_weak_labels/noisy_labels_all_windows_high_pre_tuned.npy', 'wb') as f:
    np.save(f, preds_np_windows)

## Save Non-high pre predictions

In [48]:
best_random_model_non_high_pre = LabelModel.load('tuned_label_model.pkl')

In [49]:
window_predictions_low_pre = best_random_model_non_high_pre.predict_proba(L_everything_windows_all)

In [50]:
predictions_to_save_windows_low_pre = [
    (window_info, prediction)
    for window_info, prediction in zip(window_nums, window_predictions_low_pre)
]

In [51]:
preds_np_windows_low_pre = np.array(predictions_to_save_windows_low_pre)

In [52]:
# save predictions to disk
with open('../../data/shot_detection_weak_labels/noisy_labels_all_windows_tuned.npy', 'wb') as f:
    np.save(f, preds_np_windows)