# Notebook to retrieve the results reported in the paper 

# Helper functions and model loading:

In [23]:
import h5py 
import numpy as np 
import json 
from Utils import *
from Model import model_dict,params_dict
import os 
import torch
from Data import VideoData
from torch.utils.data import DataLoader
import torch.optim as optim
from tqdm import tqdm
seeds = [12412,31235,123123,53216,123151]

In [19]:
def correlation_single_pred(score,video_name,dataset,dataset_name='tvsum',downsample_gt=True):
    "This compares the scores with a downsampled version of the ground truth, Scenario 1"
    kendall_spearman_scores = []
    if dataset_name=="tvsum":
        data = load_tvsum_mat('Utils//ydata-tvsum50.mat')
        video_number = int(video_name.split('_')[1])
        all_user_summary = data[video_number-1]['user_anno'].T
        pick = dataset[video_name]['picks']
        all_correlations_tau = []
        all_correlations_spearman = []
        for user_summary in all_user_summary:
            if downsample_gt:
                down_sampled_summary = (user_summary/user_summary.max())[pick] # Change this to take the picks from which a certain frame was sampled from
            else:
                down_sampled_summary = (user_summary/user_summary.max())
        
            correlation_tau = kendalltau(-rankdata(down_sampled_summary),-rankdata(score))[0]
            correlation_spear = spearmanr(down_sampled_summary,score)[0]
            all_correlations_tau.append(correlation_tau)
            all_correlations_spearman.append(correlation_spear)
        kendall_spearman_scores.append(np.mean(all_correlations_tau))
        kendall_spearman_scores.append(np.mean(all_correlations_spearman))
    elif dataset_name =="summe":
        user_summarie = dataset[video_name]['user_summary']
        pick = dataset[video_name]['picks']
        if downsample_gt:
            averaged_downsampled_summary = np.average(user_summarie,axis=0)[::15]
        else:
            averaged_downsampled_summary = np.average(user_summarie,axis=0)
        kendall_score = kendalltau(rankdata(averaged_downsampled_summary),rankdata(score))[0]
        spearman_score = spearmanr(averaged_downsampled_summary,score)[0]
        kendall_spearman_scores.append(np.mean(kendall_score))
        kendall_spearman_scores.append(np.mean(spearman_score))
    
    return kendall_spearman_scores

# This should take an Upsampled score, or post knapsack score and then compare the correlation between them
def correlation_with_knapsack_scores(score,video_name,dataset):
    ''' This function first performs the knapsack processing'''
    kendall_spearman_scores = []
    avg_correlation_kendall = []
    avg_correlation_spearman = []
    user_summaries = dataset[video_name]['user_summary'][...]
    for user_summary in user_summaries:
        avg_correlation_kendall.append(kendalltau(-rankdata(user_summary),-rankdata(score))[0])
        avg_correlation_spearman.append(spearmanr(user_summary,score)[0])
    kendall_spearman_scores.append(np.mean(avg_correlation_kendall))
    kendall_spearman_scores.append(np.mean(avg_correlation_spearman))

    return kendall_spearman_scores

def correlation_with_average_gt(score,video_name,dataset):
    kendall_spearman_scores = []
    user_summary = dataset[video_name]['gtscore'][...]
    kendall_spearman_scores.append(kendalltau(-rankdata(user_summary),-rankdata(score))[0])
    kendall_spearman_scores.append(spearmanr(user_summary,score)[0])

    return kendall_spearman_scores


def upsample_prediction(score,picks,video_length):
    upsampled_pred = np.zeros(video_length)
    for i in range(len(picks)-1):
        upsampled_pred[picks[i]:picks[i+1]] = score[i]

    return upsampled_pred 
def knapsack_wrapper_with_rating(score,test_index,dataset,dataset_name):
    ''' This wrapper is used for scenario 2, Knapsack into evaluation of the correlation '''
    shot_boundaries = dataset[test_index]['change_points'][...]
    positions = dataset[test_index]['picks'][...]
    n_frames = dataset[test_index]['n_frames'][...]
    knapsack_pred = generate_summary_single(shot_boundaries,score,n_frames,positions)
    return correlation_single_pred(knapsack_pred,test_index,dataset,dataset_name,False)

    

def upsample_wrapper(score,test_index,dataset,dataset_name):
    '''This wrapper performs Scenario 3 post-processing, upsampling model prediction into evaluation'''
    positions = dataset[test_index]['picks'][...]
    n_frames = dataset[test_index]['n_frames'][...]
    upsampled_pred = upsample_prediction(score,positions,n_frames)
    return correlation_single_pred(upsampled_pred,test_index,dataset,dataset_name,False)

In [26]:
def evaluate(run_number, config_path,save_path = 'weights'):
    with open(config_path,'r') as config_file:
        config = json.load(config_file)
    assert config['Model'] in model_dict.keys(), "Model is not available, modify dictionary to include them or check spelling"
    dataset_name = config['split'].split("_")[0]
    split_string = config['split'].strip(dataset_name).strip('.json')
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    modelclass = model_dict[config['Model']]
    criterion = loss_dict[config['loss_function']]()
    num_epochs = config["num_epochs"]
    feature_extractor = config['feature_extractor']
    save_name = f'{feature_extractor}_{dataset_name}{split_string}'
    save_path = os.path.join(save_path,save_name,dataset_name,config['Model'])
    params = params_dict[config['Model']][config['feature_extractor']]

    if config['data_aug'] :  # Unused function for this work
        pass
    else:
        data_augmentations = []
    weight_path =save_path
    splits = config['total_splits'] if 'total_splits' in config.keys() else 5
    dataset = h5py.File(os.path.join('Data',config['feature_extractor'],f'{config["feature_extractor"]}_{dataset_name}.h5'))
    print(params)
    split_perfs_1 = [] 
    split_perfs_2 = []
    split_perfs_3 = []
    split_perfs_4 = []
    for split in range(splits):
        print(f"Running Split:  {split+1}  for model: {config['Model']}")
        model = modelclass(**params)
        testdata = VideoData('test',config['split'],split,feature_extractor=feature_extractor,trainval=True)
        testloader = DataLoader(testdata,batch_size=1,shuffle=False)
        test_datapoints = []
        test_names = []
        # Scenario 1 
        weight_path_split = os.path.join(weight_path,f"split_{split+1}",f'best_run_corr_run_{run_number}_scenario_1.pth')  
        model.load_state_dict(torch.load(weight_path_split,map_location=device))
        model.to(device)
        model.eval()
# Adding the correlation scores to have the picks from the datapoints 
        for inputs_t,names in testloader:
            with torch.no_grad():
                importance_scores = model(inputs_t.to(device))
            importance_scores = importance_scores[0].to('cpu').tolist()
            test_datapoints.append(importance_scores)
            test_names.append(names[0])
        correlation_dict = evaluate_correlation(test_datapoints ,dataset,test_names,dataset_name)
        split_perfs_1.append(correlation_dict['Average_Kendall'])
        del model   
# Adding the correlation scores to have the picks from the datapoints 
        test_datapoints = []
        test_names = []
        weight_path_split = os.path.join(weight_path,f"split_{split+1}",f'best_run_corr_run_{run_number}_scenario_2.pth')  
        model = modelclass(**params)
        model.load_state_dict(torch.load(weight_path_split,map_location=device))
        model.to(device)
        model.eval()
        for inputs_t,names in testloader:
            with torch.no_grad():
                importance_scores = model(inputs_t.to(device))
            importance_scores = importance_scores[0].to('cpu').tolist()
            test_datapoints.append(importance_scores)
            test_names.append(names[0])
        split_perfs_2.append(np.mean(np.array([knapsack_wrapper_with_rating(score,test_name,dataset,dataset_name) for score,test_name in zip(test_datapoints,test_names)])[:,0]))
        del model   
        test_datapoints = []
        test_names = []        
        weight_path_split = os.path.join(weight_path,f"split_{split+1}",f'best_run_corr_run_{run_number}_scenario_3.pth')  
        model = modelclass(**params)
        
        model.load_state_dict(torch.load(weight_path_split,map_location=device))
        model.to(device)

        model.eval()
        for inputs_t,names in testloader:
            with torch.no_grad():
                importance_scores = model(inputs_t.to(device))
            importance_scores = importance_scores[0].to('cpu').tolist()
            test_datapoints.append(importance_scores)
            test_names.append(names[0])
        del model
        split_perfs_3.append(np.mean(np.array([upsample_wrapper(score,test_name,dataset,dataset_name) for score,test_name in zip(test_datapoints,test_names)])[:,0]))        
        test_datapoints = []
        test_names = []        
        weight_path_split = os.path.join(weight_path,f"split_{split+1}",f'best_run_corr_run_{run_number}_scenario_4.pth')  
        model = modelclass(**params)
        model.load_state_dict(torch.load(weight_path_split,map_location=device))
        model.to(device)        
        model.eval()
        for inputs_t,names in testloader:
            with torch.no_grad():
                importance_scores = model(inputs_t.to(device))
            importance_scores = importance_scores[0].to('cpu').tolist()
            test_datapoints.append(importance_scores)
            test_names.append(names[0])
        split_perfs_4.append(np.mean(np.array([correlation_with_average_gt(score,test_name,dataset) for score,test_name in zip(test_datapoints,test_names)])[:,0]))        
    return np.mean(split_perfs_1),np.mean(split_perfs_2),np.mean(split_perfs_3),np.mean(split_perfs_4)

In [15]:
five_trial_scenario_1 = []
five_trial_scenario_2 = []
five_trial_scenario_3 = []
five_trial_scenario_4 = []
for i in range(5):
    torch.manual_seed(seeds[i])
    best_correlation,best_correlation_scenario_2,best_correlation_scenario_3,best_correlation_scenario_4  = evaluate(i,'Configs/MLP/googlenet_tvsum_can_1.json')
    five_trial_scenario_1.append(best_correlation)
    five_trial_scenario_2.append(best_correlation_scenario_2)
    five_trial_scenario_3.append(best_correlation_scenario_3)
    five_trial_scenario_4.append(best_correlation_scenario_4)

{'input_dims': 1024, 'feedforward_dims': 512}
Running Split:  1  for model: MLP
Data\googlenet
Data\googlenet\googlenet_summe.h5
googlenet_summe.h5
summe
Data\googlenet\googlenet_tvsum.h5
googlenet_tvsum.h5
tvsum


KeyError: 'tvsum50'

In [21]:
print('Mean over five iterations')
print(np.mean(five_trial_scenario_1))
print(np.mean(five_trial_scenario_2))
print(np.mean(five_trial_scenario_3))
print(np.mean(five_trial_scenario_4))

Mean over five iterations
0.10813986140117286
0.22213794167927006
0.09152080062119303
0.09762554758569422


In [27]:
five_trial_scenario_1 = []
five_trial_scenario_2 = []
five_trial_scenario_3 = []
five_trial_scenario_4 = []
for i in range(5):
    torch.manual_seed(seeds[i])
    best_correlation,best_correlation_scenario_2,best_correlation_scenario_3,best_correlation_scenario_4  = evaluate(i,'Configs/MLP/googlenet_summe_can_1.json')
    five_trial_scenario_1.append(best_correlation)
    five_trial_scenario_2.append(best_correlation_scenario_2)
    five_trial_scenario_3.append(best_correlation_scenario_3)
    five_trial_scenario_4.append(best_correlation_scenario_4)


{'input_dims': 1024, 'feedforward_dims': 512}
Running Split:  1  for model: MLP
Data\googlenet
Data\googlenet\googlenet_summe.h5
googlenet_summe.h5
summe
Data\googlenet\googlenet_tvsum.h5
googlenet_tvsum.h5
tvsum
Running Split:  2  for model: MLP
Data\googlenet
Data\googlenet\googlenet_summe.h5
googlenet_summe.h5
summe
Data\googlenet\googlenet_tvsum.h5
googlenet_tvsum.h5
tvsum
Running Split:  3  for model: MLP
Data\googlenet
Data\googlenet\googlenet_summe.h5
googlenet_summe.h5
summe
Data\googlenet\googlenet_tvsum.h5
googlenet_tvsum.h5
tvsum
Running Split:  4  for model: MLP
Data\googlenet
Data\googlenet\googlenet_summe.h5
googlenet_summe.h5
summe
Data\googlenet\googlenet_tvsum.h5
googlenet_tvsum.h5
tvsum
Running Split:  5  for model: MLP
Data\googlenet
Data\googlenet\googlenet_summe.h5
googlenet_summe.h5
summe
Data\googlenet\googlenet_tvsum.h5
googlenet_tvsum.h5
tvsum
{'input_dims': 1024, 'feedforward_dims': 512}
Running Split:  1  for model: MLP
Data\googlenet
Data\googlenet\googlenet

In [28]:
print('Mean over five iterations')
print(np.mean(five_trial_scenario_1))
print(np.mean(five_trial_scenario_2))
print(np.mean(five_trial_scenario_3))
print(np.mean(five_trial_scenario_4))

Mean over five iterations
0.08415445081748876
0.15273205176557683
0.08621844444669655
0.08415445081748876


# Replication 

In [25]:
def g(seq):
    # http://stackoverflow.com/questions/3382352/equivalent-of-numpy-argsort-in-basic-python/3383106#3383106
    #lambda version by Tony Veijalainen
    return [x for x,y in sorted(enumerate(seq), key = lambda x: x[1])]

In [33]:
gt_shot_boundary = h5py.File('Data/googlenet/googlenet_tvsum.h5')
googlenet_shots = np.load('googlenet_shot_boundaries.npy',allow_pickle=True)
resnet_shots = np.load('resnet_shot_boundaries.npy',allow_pickle=True)
densenet_shots = np.load('densenet_shot_boundaries.npy',allow_pickle=True)
dataset_keys = list(gt_shot_boundary.keys())
lengths  = [(gt_shot_boundary[key]['n_frames'][...].item()) for key in list(gt_shot_boundary.keys())]

indices =g(lengths)


In [34]:
googlenet_f1_scores = []
resnet_f1_scores = []
densenet_f1_scores = []

In [35]:
for i,index in enumerate(indices[:10]):
    precison, recall, f1 = calculate_metrics(gt_shot_boundary[dataset_keys[index]]['change_points'][...].flatten(),googlenet_shots[i].flatten())
    googlenet_f1_scores.append(f1)
    precison, recall, f1 = calculate_metrics(gt_shot_boundary[dataset_keys[index]]['change_points'][...].flatten(),resnet_shots[i].flatten())
    resnet_f1_scores.append(f1)
    precison, recall, f1 = calculate_metrics(gt_shot_boundary[dataset_keys[index]]['change_points'][...].flatten(),densenet_shots[i].flatten())
    densenet_f1_scores.append(f1)

In [36]:
perfs_avg = []
for i in range(10):
    _,_,f1_goog_res = calculate_metrics(resnet_shots[i].flatten(),googlenet_shots[i].flatten())
    _,_,f1_res_dense= calculate_metrics(densenet_shots[i].flatten(),resnet_shots[i].flatten())
    _,_,f1_dens_gog= calculate_metrics(densenet_shots[i].flatten(),googlenet_shots[i].flatten())
    perfs_avg.append( np.mean([f1_goog_res,f1_res_dense]))

In [37]:
print(np.mean(perfs_avg))

0.7576484500440622


In [38]:

print(f'Googlenet average f1 : {np.mean(googlenet_f1_scores)}')
print(f'resnet average f1 : {np.mean(resnet_f1_scores)}')
print(f'DenseNet average f1 : {np.mean(densenet_f1_scores)}')

Googlenet average f1 : 0.4517452491313002
resnet average f1 : 0.35952287214641804
DenseNet average f1 : 0.35952287214641804


In [None]:
googlenet_shots = np.load('googlenet_shot_boundaries_0.8_summe.npy',allow_pickle=True)
resnet_shots = np.load('resnet_shot_boundaries_vmax_0.8_summe.npy',allow_pickle=True)
densenet_shots = np.load('densenet_shot_boundaries_0.8_summe.npy',allow_pickle=True)
googlenet_f1_scores = []
resnet_f1_scores = []
densenet_f1_scores = []
for i,index in enumerate(indices[:10]):
    precison, recall, f1 = calculate_metrics(gt_shot_boundary[dataset_keys[index]]['change_points'][...].flatten(),googlenet_shots[i].flatten())
    googlenet_f1_scores.append(f1)
    precison, recall, f1 = calculate_metrics(gt_shot_boundary[dataset_keys[index]]['change_points'][...].flatten(),resnet_shots[i].flatten())
    resnet_f1_scores.append(f1)
    precison, recall, f1 = calculate_metrics(gt_shot_boundary[dataset_keys[index]]['change_points'][...].flatten(),densenet_shots[i].flatten())
    densenet_f1_scores.append(f1)


print(f'Googlenet average f1 : {np.mean(googlenet_f1_scores)}')
print(f'resnet average f1 : {np.mean(resnet_f1_scores)}')
print(f'DenseNet average f1 : {np.mean(densenet_f1_scores)}')

In [None]:
perfs_avg = []
for i in range(10):
    _,_,f1_goog_res = calculate_metrics(resnet_shots[i].flatten(),googlenet_shots[i].flatten())
    _,_,f1_res_dense= calculate_metrics(densenet_shots[i].flatten(),resnet_shots[i].flatten())
    _,_,f1_dens_gog= calculate_metrics(densenet_shots[i].flatten(),googlenet_shots[i].flatten())
    perfs_avg.append( np.mean([f1_goog_res,f1_res_dense]))
print(np.mean(perfs_avg))

In [None]:
googlenet_shots = np.load('googlenet_shot_boundaries_0.6_summe.npy',allow_pickle=True)
resnet_shots = np.load('resnet_shot_boundaries_vmax_0.6_summe.npy',allow_pickle=True)
densenet_shots = np.load('densenet_shot_boundaries_0.6_summe.npy',allow_pickle=True)
googlenet_f1_scores = []
resnet_f1_scores = []
densenet_f1_scores = []
for i,index in enumerate(indices[:10]):
    precison, recall, f1 = calculate_metrics(gt_shot_boundary[dataset_keys[index]]['change_points'][...].flatten(),googlenet_shots[i].flatten())
    googlenet_f1_scores.append(f1)
    precison, recall, f1 = calculate_metrics(gt_shot_boundary[dataset_keys[index]]['change_points'][...].flatten(),resnet_shots[i].flatten())
    resnet_f1_scores.append(f1)
    precison, recall, f1 = calculate_metrics(gt_shot_boundary[dataset_keys[index]]['change_points'][...].flatten(),densenet_shots[i].flatten())
    densenet_f1_scores.append(f1)


print(f'Googlenet average f1 : {np.mean(googlenet_f1_scores)}')
print(f'resnet average f1 : {np.mean(resnet_f1_scores)}')
print(f'DenseNet average f1 : {np.mean(densenet_f1_scores)}')

In [None]:
perfs_avg = []
for i in range(10):
    _,_,f1_goog_res = calculate_metrics(resnet_shots[i].flatten(),googlenet_shots[i].flatten())
    _,_,f1_res_dense= calculate_metrics(densenet_shots[i].flatten(),resnet_shots[i].flatten())
    _,_,f1_dens_gog= calculate_metrics(densenet_shots[i].flatten(),googlenet_shots[i].flatten())
    perfs_avg.append( np.mean([f1_goog_res,f1_res_dense]))
print(np.mean(perfs_avg))

In [None]:
googlenet_shots = np.load('googlenet_shot_boundaries_0.4_summe.npy',allow_pickle=True)
resnet_shots = np.load('resnet_shot_boundaries_vmax_0.4_summe.npy',allow_pickle=True)
densenet_shots = np.load('densenet_shot_boundaries_0.4_summe.npy',allow_pickle=True)
googlenet_f1_scores = []
resnet_f1_scores = []
densenet_f1_scores = []
for i,index in enumerate(indices[:10]):
    precison, recall, f1 = calculate_metrics(gt_shot_boundary[dataset_keys[index]]['change_points'][...].flatten(),googlenet_shots[i].flatten())
    googlenet_f1_scores.append(f1)
    precison, recall, f1 = calculate_metrics(gt_shot_boundary[dataset_keys[index]]['change_points'][...].flatten(),resnet_shots[i].flatten())
    resnet_f1_scores.append(f1)
    precison, recall, f1 = calculate_metrics(gt_shot_boundary[dataset_keys[index]]['change_points'][...].flatten(),densenet_shots[i].flatten())
    densenet_f1_scores.append(f1)


print(f'Googlenet average f1 : {np.mean(googlenet_f1_scores)}')
print(f'resnet average f1 : {np.mean(resnet_f1_scores)}')
print(f'DenseNet average f1 : {np.mean(densenet_f1_scores)}')

In [None]:
perfs_avg = []
for i in range(10):
    _,_,f1_goog_res = calculate_metrics(resnet_shots[i].flatten(),googlenet_shots[i].flatten())
    _,_,f1_res_dense= calculate_metrics(densenet_shots[i].flatten(),resnet_shots[i].flatten())
    _,_,f1_dens_gog= calculate_metrics(densenet_shots[i].flatten(),googlenet_shots[i].flatten())
    perfs_avg.append( np.mean([f1_goog_res,f1_res_dense]))
print(np.mean(perfs_avg))

In [None]:
gt_shot_boundary = h5py.File('Data/googlenet/googlenet_summe.h5')
googlenet_shots = np.load('Fisher_shot_boundaries.npy',allow_pickle=True)
Fishcher_1_f1_scores = []

for i,index in enumerate(indices[:10]):
    precison, recall, f1 = calculate_metrics(gt_shot_boundary[dataset_keys[index]]['change_points'][...].flatten(),googlenet_shots[i].flatten())
    Fishcher_1_f1_scores.append(f1)
print(f'Fisher average f1 : {np.mean(Fishcher_1_f1_scores)}')

gt_shot_boundary = h5py.File('Data/googlenet/googlenet_summe.h5')
googlenet_shots = np.load('Fisher_shot_boundaries_summe_0.8.npy',allow_pickle=True)
Fishcher_0_8_f1_scores = []

for i,index in enumerate(indices[:10]):
    precison, recall, f1 = calculate_metrics(gt_shot_boundary[dataset_keys[index]]['change_points'][...].flatten(),googlenet_shots[i].flatten())
    Fishcher_0_8_f1_scores.append(f1)
print(f'Fisher 0.8 average f1 : {np.mean(Fishcher_0_8_f1_scores)}')
Fishcher_0_6_f1_scores = []
googlenet_shots = np.load('Fisher_shot_boundaries_summe_0.6.npy',allow_pickle=True)
for i,index in enumerate(indices[:10]):
    precison, recall, f1 = calculate_metrics(gt_shot_boundary[dataset_keys[index]]['change_points'][...].flatten(),googlenet_shots[i].flatten())
    Fishcher_0_6_f1_scores.append(f1)
print(f'Fisher 0.6 average f1 : {np.mean(Fishcher_0_6_f1_scores)}')

Fishcher_0_4_f1_scores = []
googlenet_shots = np.load('Fisher_shot_boundaries_summe_0.4.npy',allow_pickle=True)
for i,index in enumerate(indices[:10]):
    precison, recall, f1 = calculate_metrics(gt_shot_boundary[dataset_keys[index]]['change_points'][...].flatten(),googlenet_shots[i].flatten())
    Fishcher_0_4_f1_scores.append(f1)
print(f'Fisher 0.4 average f1 : {np.mean(Fishcher_0_4_f1_scores)}')



results_dict = {'Vmax 1.0 ':np.mean(Fishcher_1_f1_scores) , 'Vmax 0.8':np.mean(Fishcher_0_8_f1_scores),'Vmax 0.6':np.mean(Fishcher_0_6_f1_scores),'Vmax 0.4':np.mean(Fishcher_0_8_f1_scores)}

json.dump(results_dict,open('Results/Fisher_Shot_boundary_results.json','w'),indent=4)