In [1]:
import os
import torch

In [2]:
os.chdir("/home/shuaman/video_sm/video_summarization")

In [3]:
!pwd

/home/shuaman/video_sm/video_summarization


In [4]:
from src.models import MSVA
from src.utils import VSMDataset, parse_configuration
from src.utils.utils_model import *

In [5]:
msva = MSVA()

In [6]:
path_weights_summe = "/home/shuaman/video_sm/MSVA/model_weights/summe_random_non_overlap_0.5359.tar.pth"
path_weights_tvsum = "/home/shuaman/video_sm/MSVA/model_weights/tvsum_random_non_overlap_0.6271.tar.pth"

In [31]:
msva.load_state_dict(torch.load(path_weights_summe))

<All keys matched successfully>

In [7]:
dict_paths = {
        'path_tvsum':"/data/shuaman/video_summarization/datasets/processed_datasets/eccv16_dataset_tvsum_google_pool5.h5",
        'path_summe':"/data/shuaman/video_summarization/datasets/processed_datasets/eccv16_dataset_summe_google_pool5.h5",
        'path_ovp':"/data/shuaman/video_summarization/datasets/processed_datasets/eccv16_dataset_ovp_google_pool5.h5",
        'path_youtube':"/data/shuaman/video_summarization/datasets/processed_datasets/eccv16_dataset_youtube_google_pool5.h5",
        'path_cosum':"/data/shuaman/video_summarization/datasets/processed_datasets/dataset_cosum_processed.h5",
#         'path_tvsum':"/data/shuaman/video_summarization/datasets/processed_datasets/dataset_tvsum_processed.h5",
#         'path_summe':"/data/shuaman/video_summarization/datasets/processed_datasets/dataset_summe_processed.h5",
}

In [8]:
dict_use_feature = get_flags_features("i3d", "googlenet")

In [9]:
dict_use_feature

{'googlenet': True,
 'resnext': False,
 'inceptionv3': False,
 'i3d_rgb': True,
 'i3d_flow': True,
 'resnet3d': False}

In [10]:
params = {
        'batch_size': 1,
        'num_workers': 4
        }


In [11]:
path_split = "/home/shuaman/video_sm/video_summarization/splits/vasnet_splits/summe_splits.json"
splits = parse_configuration(path_split)
split = splits[0]

In [12]:
split.keys()

dict_keys(['train_keys', 'test_keys'])

In [13]:
dataset_paths = get_paths('summe', 'canonical', **dict_paths)
dataset_paths

'/data/shuaman/video_summarization/datasets/processed_datasets/eccv16_dataset_summe_google_pool5.h5'

In [14]:
training_generator, test_generator = get_dataloaders(dataset_paths, split, 
                                                     dict_use_feature, params,
                                                    "/data/shuaman/video_summarization/datasets/processed_datasets/transformations.pk")

In [15]:
it = 0
for i in training_generator:
    it += 1
    continue
print(it)

20


In [16]:
it = 0
for i in test_generator:
    it += 1
    continue
print(it)

5


In [17]:
device = torch.device("cuda:3")

In [18]:
device

device(type='cuda', index=3)

In [19]:
optimizer = init_optimizer(msva, 0.00005, 0.00001)
criterion = torch.nn.MSELoss()
criterion.to(device)

MSELoss()

In [20]:
sameCount = 0
max_val_fscore = 0
maxkt = 0
maxsp = 0
maxtrl = 0
maxtsl = 0
max_val_fscoreLs=[]

In [21]:
import cv2
import numpy as np
import sys 

In [22]:
def train_step(training_generator, criterion, optimizer):
    msva.train()

    avg_loss = []

    for video_info, label in training_generator:
        
        target = (label['gtscore'].squeeze(0)).cpu().numpy()
        features = [(video_info[key].squeeze(0)).cpu().numpy() for key in video_info.keys() if 'features' in  key]

        shape_desire = target.shape[0]
        features = [cv2.resize(feature, (feature.shape[1],shape_desire), interpolation = cv2.INTER_AREA) for feature in features]

        features = [torch.from_numpy(feature).unsqueeze(0) for feature in features]
        target =  torch.from_numpy(target).unsqueeze(0)

        target -= target.min()
        target = np.true_divide(target, target.max())

        target = target.float().to(device)
        features = [feature.float().to(device) for feature in features]
        seq_len = features[0].shape[1]

        y, _ = msva(features, seq_len)

        loss = criterion(y, target)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        avg_loss.append(loss.item())

    avg_loss = np.mean(np.array(avg_loss))

    return avg_loss

In [23]:
msva.to(device)

MSVA(
  (att1_3): SelfAttention(
    (K): Linear(in_features=1024, out_features=1024, bias=False)
    (Q): Linear(in_features=1024, out_features=1024, bias=False)
    (V): Linear(in_features=1024, out_features=1024, bias=False)
    (output_linear): Linear(in_features=1024, out_features=1024, bias=False)
    (dropout): Dropout(p=0.5, inplace=False)
  )
  (ka1_3): Linear(in_features=1024, out_features=365, bias=True)
  (kb): Linear(in_features=365, out_features=365, bias=True)
  (kc): Linear(in_features=365, out_features=512, bias=True)
  (kd): Linear(in_features=512, out_features=1, bias=True)
  (sig): Sigmoid()
  (relu): ReLU()
  (dropout): Dropout(p=0.5, inplace=False)
  (softmax): Softmax(dim=0)
  (layer_norm_y_1_3): LayerNorm()
  (layer_norm_y_4): LayerNorm()
  (layer_norm_kc): LayerNorm()
  (layer_norm_kd): LayerNorm()
)

In [24]:
from src.utils import generate_summary, evaluate_summary
from scipy.stats import kendalltau, spearmanr, rankdata

In [25]:
def eval_function(test_generator):
    msva.eval()

    avg_loss = []
    fms = []
    kts = []
    sps = []

    with torch.no_grad():
        for video_info, label in test_generator:

            target = (label['gtscore'].squeeze(0)).cpu().numpy()
            features = [(video_info[key].squeeze(0)).cpu().numpy() for key in video_info.keys() if 'features' in  key]

            shape_desire = target.shape[0]
            features = [cv2.resize(feature, (feature.shape[1],shape_desire), interpolation = cv2.INTER_AREA) for feature in features]

            features = [torch.from_numpy(feature).unsqueeze(0) for feature in features]
            target =  torch.from_numpy(target).unsqueeze(0)

            target -= target.min()
            target = np.true_divide(target, target.max())

            target = target.float().to(device)
            features = [feature.float().to(device) for feature in features]

            y, _ = msva(features, shape_desire)

            criterion = torch.nn.MSELoss()
            criterion.to(device)

            test_loss = criterion(y, target)

            avg_loss.append(test_loss.item())
            summary = y[0].detach().cpu().numpy()

            machine_summary = generate_summary(summary, (video_info["change_points"].squeeze(0)).cpu().numpy(),
                                              (video_info["n_frames"].squeeze(0)).cpu().numpy(), (video_info["n_frame_per_seg"].squeeze(0)).cpu().numpy(),
                                                (video_info["picks"].squeeze(0)).cpu().numpy())

            eval_metric = 'avg' if video_info["name_dataset"][0] == "tvsum" else 'max'
            fm, _, _ = evaluate_summary(machine_summary, (label["user_summary"].squeeze(0)).cpu().numpy(),
                                            eval_metric)

            fms.append(fm)
            y_pred2 = machine_summary
            y_true2 = (label["user_summary"].squeeze(0)).cpu().numpy().mean(axis=0)
            pS = spearmanr(y_pred2, y_true2)[0]
            kT = kendalltau(rankdata(-np.array(y_true2)), rankdata(-np.array(y_pred2)))[0]
            kts.append(kT)
            sps.append(pS)


    f_score = np.mean(fms)
    kt = np.mean(kts)
    sp = np.mean(sps)
    avg_loss = np.mean(np.array(avg_loss))

    return f_score, kt, sp, avg_loss

In [32]:
for epoch in range(2):
    train_loss = train_step(training_generator, criterion, optimizer)
    f_score, kt, sp, test_loss = eval_function(test_generator)

In [33]:
sp

0.12407581160149403

In [34]:
kt

0.1102586827115101

In [35]:
f_score

0.43061976583898265

In [36]:
train_loss

0.03655345905572176

# TRAINING WITH SCRIPT

In [1]:
import os
import sys

os.chdir("/home/shuaman/video_sm/video_summarization")

In [None]:
# !wandb agent --count 10 stevramos/sports_video_summarization/bqfn7u4k

In [2]:
!python3 train_cross_val.py --use_sweep --params "/home/shuaman/video_sm/video_summarization/config.json"

[34m[1mwandb[0m: Currently logged in as: [33mstevramos[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: Tracking run with wandb version 0.12.4
[34m[1mwandb[0m: Syncing run [33mExp. i3d - inceptionv3[0m
[34m[1mwandb[0m:  View project at [34m[4mhttps://wandb.ai/stevramos/sports_video_summarization[0m
[34m[1mwandb[0m:  View run at [34m[4mhttps://wandb.ai/stevramos/sports_video_summarization/runs/hek4jblp[0m
[34m[1mwandb[0m: Run data is saved locally in /home/shuaman/video_sm/video_summarization/wandb/run-20211012_031203-hek4jblp
[34m[1mwandb[0m: Run `wandb offline` to turn off syncing.

Using device cuda:0
Split number 1
Training in NVIDIA GeForce RTX 2080 Ti
Losses/Metrics
Epoch [1/2], Train loss: 0.0926
Epoch [1/2], Test loss: 0.0808
 50%|██████████████████████▌                      | 1/2 [00:01<00:01,  1.19s/it]Epoch [1/2], F1 score: 0.2570
Epoch [1/2], Spearman s correlation: nan
Epoch [1/2], Kendall s correlation: nan
Losses/Metrics
10

In [3]:
from train import train

In [4]:
config_file = "/home/shuaman/video_sm/video_summarization/config.json"

In [6]:
train(config_file, use_wandb=True, pretrained_model="/home/shuaman/video_sm/MSVA/model_weights/summe_random_non_overlap_0.5359.tar.pth")

Using device cuda:0
Split number 1
Training in NVIDIA GeForce RTX 2080 Ti


 50%|█████     | 1/2 [00:01<00:01,  1.17s/it]

Losses/Metrics
Epoch [1/2], Train loss: 0.0395
Epoch [1/2], Test loss: 0.0343
Epoch [1/2], F1 score: 0.4336
Epoch [1/2], Spearman s correlation: 0.1293
Epoch [1/2], Kendall s correlation: 0.1151


100%|██████████| 2/2 [00:02<00:00,  1.23s/it]

Losses/Metrics
Epoch [2/2], Train loss: 0.0378
Epoch [2/2], Test loss: 0.0366
Epoch [2/2], F1 score: 0.4553
Epoch [2/2], Spearman s correlation: 0.1526
Epoch [2/2], Kendall s correlation: 0.1358
Split number 2
Training in NVIDIA GeForce RTX 2080 Ti



 50%|█████     | 1/2 [00:01<00:01,  1.24s/it]

Losses/Metrics
Epoch [1/2], Train loss: 0.0403
Epoch [1/2], Test loss: 0.0481
Epoch [1/2], F1 score: 0.5423
Epoch [1/2], Spearman s correlation: 0.3137
Epoch [1/2], Kendall s correlation: 0.2776


100%|██████████| 2/2 [00:02<00:00,  1.26s/it]

Losses/Metrics
Epoch [2/2], Train loss: 0.0372
Epoch [2/2], Test loss: 0.0509
Epoch [2/2], F1 score: 0.5136
Epoch [2/2], Spearman s correlation: 0.3236
Epoch [2/2], Kendall s correlation: 0.2866
Split number 3
Training in NVIDIA GeForce RTX 2080 Ti



 50%|█████     | 1/2 [00:01<00:01,  1.21s/it]

Losses/Metrics
Epoch [1/2], Train loss: 0.0408
Epoch [1/2], Test loss: 0.0323
Epoch [1/2], F1 score: 0.5600
Epoch [1/2], Spearman s correlation: 0.3565
Epoch [1/2], Kendall s correlation: 0.3202


100%|██████████| 2/2 [00:02<00:00,  1.25s/it]

Losses/Metrics
Epoch [2/2], Train loss: 0.0392
Epoch [2/2], Test loss: 0.0377
Epoch [2/2], F1 score: 0.5661
Epoch [2/2], Spearman s correlation: 0.3995
Epoch [2/2], Kendall s correlation: 0.3593
Split number 4
Training in NVIDIA GeForce RTX 2080 Ti



 50%|█████     | 1/2 [00:01<00:01,  1.23s/it]

Losses/Metrics
Epoch [1/2], Train loss: 0.0398
Epoch [1/2], Test loss: 0.0341
Epoch [1/2], F1 score: 0.5396
Epoch [1/2], Spearman s correlation: 0.2891
Epoch [1/2], Kendall s correlation: 0.2577


100%|██████████| 2/2 [00:02<00:00,  1.23s/it]

Losses/Metrics
Epoch [2/2], Train loss: 0.0370
Epoch [2/2], Test loss: 0.0342
Epoch [2/2], F1 score: 0.5393
Epoch [2/2], Spearman s correlation: 0.3374
Epoch [2/2], Kendall s correlation: 0.3012
Split number 5
Training in NVIDIA GeForce RTX 2080 Ti



 50%|█████     | 1/2 [00:01<00:01,  1.19s/it]

Losses/Metrics
Epoch [1/2], Train loss: 0.0419
Epoch [1/2], Test loss: 0.0423
Epoch [1/2], F1 score: 0.5248
Epoch [1/2], Spearman s correlation: 0.2632
Epoch [1/2], Kendall s correlation: 0.2367


100%|██████████| 2/2 [00:02<00:00,  1.22s/it]

Losses/Metrics
Epoch [2/2], Train loss: 0.0369
Epoch [2/2], Test loss: 0.0356
Epoch [2/2], F1 score: 0.5494
Epoch [2/2], Spearman s correlation: 0.3698
Epoch [2/2], Kendall s correlation: 0.3321
Metrics - cross validation
Train loss: 0.0388
Test loss: 0.0384
F1 score: 0.5305
Spearman s correlation: 0.3049
Kendall s correlation: 0.2725





VBox(children=(Label(value=' 18.68MB of 37.36MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.500011117…

0,1
epochsplit_2,▁█
epochsplit_3,▁█
epochsplit_4,▁█
epochsplit_5,▁█
epochsplit_6,▁█
f_score,▁
f_scoresplit_2,▁█
f_scoresplit_3,█▁
f_scoresplit_4,▁█
f_scoresplit_5,█▁

0,1
epochsplit_2,2.0
epochsplit_3,2.0
epochsplit_4,2.0
epochsplit_5,2.0
epochsplit_6,2.0
f_score,0.53054
f_scoresplit_2,0.45525
f_scoresplit_3,0.51364
f_scoresplit_4,0.56613
f_scoresplit_5,0.53927


# Using class VSM

In [1]:
import os
import sys

os.chdir("/home/shuaman/video_sm/video_summarization")

import wandb
import tqdm
from src.utils import parse_arguments_train, set_seed, configure_model, parse_configuration
from src.models import VideoSumarizer

In [2]:
PROJECT_WANDB = "sports_video_summarization"

In [3]:
use_wandb = True

In [4]:
config_file = "/home/shuaman/video_sm/video_summarization/config.json"

In [5]:
path_weights_summe = "/home/shuaman/video_sm/MSVA/model_weights/summe_random_non_overlap_0.5359.tar.pth"
path_weights_tvsum = "/home/shuaman/video_sm/MSVA/model_weights/tvsum_random_non_overlap_0.6271.tar.pth"

In [13]:
set_seed(12345)
config = configure_model(config_file, use_wandb)

#config.type_setting = "non_overlap_rand"

f_avg = 0
kt_avg = 0
sp_avg = 0
trl_avg = 0
tsl_avg = 0

# split_name = f'path_split_{config.type_dataset}_{config.type_setting}'
# path_split = vars(config)[split_name]

split_name = f'path_split_{config["type_dataset"]}_{config["type_setting"]}'
path_split = config[split_name]

splits = parse_configuration(path_split)

for n_split in range(len(splits)):
    run_name = f'xd_{n_split+1}'
    config = configure_model(config_file, use_wandb)
    if use_wandb:
        wandb.init(project=PROJECT_WANDB, config=config, 
                    name=run_name)
        config = wandb.config
        wandb.watch_called = False
        
    config.epochs_max = 2
    config.num_backups = 2
    config.save_weights = False
    
    vsm = VideoSumarizer(config, use_wandb)
    vsm.load_weights(path_weights_summe)

    max_val_fscore, maxkt, maxsp, maxtrl, maxtsl = vsm.train(splits[n_split], n_split+1)
    f_avg += max_val_fscore
    kt_avg += maxkt
    sp_avg += maxsp
    trl_avg += maxtrl
    tsl_avg += maxtsl
    print()

f_avg = f_avg/len(splits)
kt_avg = kt_avg/len(splits)
sp_avg = sp_avg/len(splits)
trl_avg = trl_avg/len(splits)
tsl_avg = tsl_avg/len(splits)

print("f_avg:", f_avg)
print("kt_avg:", kt_avg)
print("sp_avg:",sp_avg)
print("trl_avg:",trl_avg)
print("tsl_avg:",tsl_avg)

Using device cuda:0
Training in NVIDIA GeForce RTX 2080 Ti


 50%|█████     | 1/2 [00:01<00:01,  1.15s/it]

Losses/Metrics
Epoch [1/2], Train loss: 0.0385
Epoch [1/2], Test loss: 0.0456
Epoch [1/2], F1 score: 0.4414
Epoch [1/2], Spearman s correlation: 0.1387
Epoch [1/2], Kendall s correlation: 0.1235


100%|██████████| 2/2 [00:02<00:00,  1.15s/it]

Losses/Metrics
Epoch [2/2], Train loss: 0.0365
Epoch [2/2], Test loss: 0.0363
Epoch [2/2], F1 score: 0.4953
Epoch [2/2], Spearman s correlation: 0.1950
Epoch [2/2], Kendall s correlation: 0.1740





VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁█
f_score,▁█
kt,▁█
sp,▁█
test_loss,█▁
train_loss,█▁

0,1
epoch,2.0
f_score,0.49531
kt,0.17399
sp,0.195
test_loss,0.03626
train_loss,0.03646





Using device cuda:0
Training in NVIDIA GeForce RTX 2080 Ti


 50%|█████     | 1/2 [00:01<00:01,  1.18s/it]

Losses/Metrics
Epoch [1/2], Train loss: 0.0388
Epoch [1/2], Test loss: 0.0521
Epoch [1/2], F1 score: 0.5691
Epoch [1/2], Spearman s correlation: 0.3421
Epoch [1/2], Kendall s correlation: 0.3031


100%|██████████| 2/2 [00:02<00:00,  1.17s/it]

Losses/Metrics
Epoch [2/2], Train loss: 0.0375
Epoch [2/2], Test loss: 0.0542
Epoch [2/2], F1 score: 0.5995
Epoch [2/2], Spearman s correlation: 0.3666
Epoch [2/2], Kendall s correlation: 0.3248





VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁█
f_score,▁█
kt,▁█
sp,▁█
test_loss,▁█
train_loss,█▁

0,1
epoch,2.0
f_score,0.59951
kt,0.32478
sp,0.3666
test_loss,0.05423
train_loss,0.03751





Using device cuda:0
Training in NVIDIA GeForce RTX 2080 Ti


 50%|█████     | 1/2 [00:01<00:01,  1.15s/it]

Losses/Metrics
Epoch [1/2], Train loss: 0.0423
Epoch [1/2], Test loss: 0.0324
Epoch [1/2], F1 score: 0.5515
Epoch [1/2], Spearman s correlation: 0.3319
Epoch [1/2], Kendall s correlation: 0.2986


100%|██████████| 2/2 [00:02<00:00,  1.15s/it]

Losses/Metrics
Epoch [2/2], Train loss: 0.0377
Epoch [2/2], Test loss: 0.0360
Epoch [2/2], F1 score: 0.5757
Epoch [2/2], Spearman s correlation: 0.3570
Epoch [2/2], Kendall s correlation: 0.3218





VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁█
f_score,▁█
kt,▁█
sp,▁█
test_loss,▁█
train_loss,█▁

0,1
epoch,2.0
f_score,0.57566
kt,0.32176
sp,0.357
test_loss,0.03603
train_loss,0.03772





Using device cuda:0
Training in NVIDIA GeForce RTX 2080 Ti


 50%|█████     | 1/2 [00:01<00:01,  1.14s/it]

Losses/Metrics
Epoch [1/2], Train loss: 0.0396
Epoch [1/2], Test loss: 0.0410
Epoch [1/2], F1 score: 0.5676
Epoch [1/2], Spearman s correlation: 0.3059
Epoch [1/2], Kendall s correlation: 0.2733


100%|██████████| 2/2 [00:02<00:00,  1.13s/it]

Losses/Metrics
Epoch [2/2], Train loss: 0.0401
Epoch [2/2], Test loss: 0.0438
Epoch [2/2], F1 score: 0.5675
Epoch [2/2], Spearman s correlation: 0.3219
Epoch [2/2], Kendall s correlation: 0.2876





VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁█
f_score,█▁
kt,▁█
sp,▁█
test_loss,▁█
train_loss,▁█

0,1
epoch,2.0
f_score,0.56748
kt,0.28758
sp,0.32192
test_loss,0.04379
train_loss,0.04007





Using device cuda:0
Training in NVIDIA GeForce RTX 2080 Ti


 50%|█████     | 1/2 [00:01<00:01,  1.10s/it]

Losses/Metrics
Epoch [1/2], Train loss: 0.0385
Epoch [1/2], Test loss: 0.0354
Epoch [1/2], F1 score: 0.5406
Epoch [1/2], Spearman s correlation: 0.3599
Epoch [1/2], Kendall s correlation: 0.3234


100%|██████████| 2/2 [00:02<00:00,  1.12s/it]

Losses/Metrics
Epoch [2/2], Train loss: 0.0369
Epoch [2/2], Test loss: 0.0368
Epoch [2/2], F1 score: 0.5058
Epoch [2/2], Spearman s correlation: 0.3562
Epoch [2/2], Kendall s correlation: 0.3203





VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁█
f_score,█▁
kt,█▁
sp,█▁
test_loss,▁█
train_loss,█▁

0,1
epoch,2.0
f_score,0.50577
kt,0.3203
sp,0.35622
test_loss,0.0368
train_loss,0.03688



f_avg: 0.5557189257213155
kt_avg: 0.2834319440237536
sp_avg: 0.3168712526714493
trl_avg: 0.03797306406311691
tsl_avg: 0.040591421052813534


# Infer a summary

In [1]:
import os
import sys

os.chdir("/home/shuaman/video_sm/video_summarization")

import wandb
import tqdm
from src.utils import parse_arguments_train, set_seed, configure_model, parse_configuration
from src.models import VideoSumarizer
import math

In [2]:
config_file = "/home/shuaman/video_sm/video_summarization/configs/config_deployment.json"

In [3]:
use_wandb = False

In [4]:
path_weights_summe = "/home/shuaman/video_sm/MSVA/model_weights/summe_random_non_overlap_0.5359.tar.pth"
path_weights_tvsum = "/home/shuaman/video_sm/MSVA/model_weights/tvsum_random_non_overlap_0.6271.tar.pth"

In [5]:
set_seed(12345)

In [6]:
config = configure_model(config_file, use_wandb)

In [7]:
vsm = VideoSumarizer(config, use_wandb)

Using device cuda:0


In [8]:
vsm.load_weights_descriptor_models()

In [9]:
%%time
video_name, tam, res_w, res_h, fps, dur_orig, dur_spotlight, n_segments = vsm.infer('/data/shuaman/video_summarization/datasets/raw_datasets/VSUMM/new_database/v71.avi',
                                                                                             proportion=0.15)

processing video


  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


flow optical
extracting featres of rgb
extracting featres of flow
calculatin change points
forward prop
generating summary
CPU times: user 5min 17s, sys: 4.13 s, total: 5min 21s
Wall time: 1min 58s


In [10]:
video_name, tam, res_w, res_h, fps, dur_orig, dur_spotlight, n_segments

('v71.avi',
 16553384,
 320,
 240,
 29.916666666666668,
 275.0306406685237,
 41.1142061281337,
 11)

In [11]:
import mimetypes

In [12]:
mimetypes.init()

In [24]:
mimetypes.types_map['.csv']

'text/csv'

In [25]:
video_name, tam, res_w, res_h, fps, dur_orig, summary, change_points, n_frames, n_frame_per_seg, picks = vsm.summarize_video('/data/shuaman/video_summarization/datasets/raw_datasets/VSUMM/new_database/v71.avi')

processing video
flow optical
extracting featres of rgb
extracting featres of flow
calculatin change points
forward prop


In [27]:
video_name, tam, res_w, res_h, fps, dur_orig

('v71.avi', 16553384, 320, 240, 29.916666666666668, 275.0306406685237)

In [28]:
dur_spotlight, n_segments = vsm.generate_summary_proportion('/data/shuaman/video_summarization/datasets/raw_datasets/VSUMM/new_database/v71.avi',
                                                            summary, change_points, n_frames, n_frame_per_seg, picks, 0.15, "output.mp4")

generating summary


In [29]:
dur_spotlight, n_segments

(41.1142061281337, 11)

In [10]:
tam, res_w, res_h, fps, dur_orig, dur_spotlight, n_segments

(16553384,
 320,
 240,
 29.916666666666668,
 275.0306406685237,
 41.1142061281337,
 11)

In [13]:
round(tam/1e6,1)

16.6

In [15]:
round(fps,1)

29.9

In [25]:
math.floor(round(dur_orig,0)/60)

4

In [18]:
round(dur_orig,0)%60

35.0

In [None]:
round(fps,1)

In [10]:
tam, res_w, res_h, fps, dur_orig, dur_spotlight, n_segments

(16553384,
 320,
 240,
 29.916666666666668,
 275.0306406685237,
 41.1142061281337,
 11)

In [13]:
tam/1e6

16.553384

In [11]:
machine_summary

array([0., 0., 0., ..., 0., 0., 0.], dtype=float32)

In [12]:
machine_summary.sum()

1230.0

In [24]:
len(machine_summary)

8228

In [18]:
from itertools import groupby

In [19]:
def groups(l):
    return [sum(g) for i, g in groupby(l) if i == 1]

In [29]:
groups(machine_summary)

[30.0, 255.0, 165.0, 120.0, 90.0, 30.0, 45.0, 150.0, 195.0, 90.0, 60.0]

In [26]:
len(groups(machine_summary))

11

In [14]:
n_segments = 0
n_frames_spotlight = 0
for frame_idx in range(len(machine_summary)):
    if machine_summary[frame_idx]:
        n_frames_spotlight += 1
        n_segments += 1 if machine_summary[frame_idx]==0 else n_segments
    
        
    

array([0., 0., 0., ..., 0., 0., 0.], dtype=float32)

In [None]:
# video  3 min duration
# CPU times: user 5min 22s, sys: 5.13 s, total: 5min 27s
# Wall time: 1min 27s