# Notebook to demonstrate evaluation divergence of video summarization models 

Rough approach: take trained model output, see if trained model output diverges 

# Helper functions and model loading:

In [None]:
import h5py 
import numpy as np 
import json 
from Utils import *

In [None]:
 # Should do comparisons of any pred with avg/user annotations 
def correlation_single_pred(score,video_name,dataset,dataset_name='tvsum',downsample_gt=True):
    "This compares the scores with a downsampled version of the ground truth"
    kendall_spearman_scores = []
    if dataset_name=="tvsum":
        data = load_tvsum_mat('Utils//ydata-tvsum50.mat')
        video_number = int(video_name.split('_')[1])
        all_user_summary = dataset[video_number-1]['user_anno'].T
        pick = dataset[video_name]['picks']
        all_correlations_tau = []
        all_correlations_spearman = []
        for user_summary in all_user_summary:
            if downsample_gt:
                down_sampled_summary = (user_summary/user_summary.max())[pick] # Change this to take the picks from which a certain frame was sampled from
            else:
                down_sampled_summary = (user_summary/user_summary.max())
            correlation_tau = kendalltau(-rankdata(down_sampled_summary),-rankdata(score))[0]
            correlation_spear = spearmanr(down_sampled_summary,score)[0]
            all_correlations_tau.append(correlation_tau)
            all_correlations_spearman.append(correlation_spear)
        kendall_spearman_scores.append(np.mean(all_correlations_tau))
        kendall_spearman_scores.append(np.mean(all_correlations_spearman))
    elif dataset_name =="summe":
        user_summarie = dataset[video_name]['user_summary']
        if downsample_gt:
            averaged_downsampled_summary = np.average(user_summarie,axis=0)[pick]
        else:
            averaged_downsampled_summary = np.average(user_summarie,axis=0)
        kendall_score = kendalltau(rankdata(averaged_downsampled_summary),rankdata(score))[0]
        spearman_score = spearmanr(averaged_downsampled_summary,score)[0]
        kendall_spearman_scores.append(np.mean(kendall_score))
        kendall_spearman_scores.append(np.mean(spearman_score))
    
    return kendall_spearman_scores

# This should take an Upsampled score, or post knapsack score and then compare the correlation between them
def correlation_with_knapsack_scores(score,video_name,dataset):
    kendall_spearman_scores = []
    avg_correlation_kendall = []
    avg_correlation_spearman = []
    user_summaries = dataset[video_name]['user_summary'][...]
    for user_summary in user_summaries:
        avg_correlation_kendall.append(kendalltau(-rankdata(user_summary),-rankdata(score))[0])
        avg_correlation_spearman.append(spearmanr(user_summary,score)[0])
    kendall_spearman_scores.append(np.mean(avg_correlation_kendall))
    kendall_spearman_scores.append(np.mean(avg_correlation_spearman))

    return kendall_spearman_scores

def upsample_prediction(score,picks,video_length):
    upsampled_pred = np.zeros(video_length)
    for i in range(len(picks)-1):
        upsampled_pred[picks[i]:picks[i+1]] = score[i]

    return upsampled_pred 


Testing before main experiment      

In [None]:
dataset = h5py.File('Data/original/googlenet_summe.h5')
test_split = json.load(open('Splits/tvsum_can_1.json'))
test_split_index = test_split[0]['test_keys']

model_outputs = json.load(open('SensitivityExpt/Attention/Output/outputs.json'))
scenario_1 =[]
scenario_2 = []
scenario_3 = []
scenario_4  = []
for test_index in test_split_index:
    shot_boundaries = dataset[test_index]['change_points'][...]
    scores = model_outputs[test_index] 
    positions = dataset[test_index]['picks'][...]
    n_frames = dataset[test_index]['n_frames'][...]
    user_summaries = dataset[test_index]['user_summaries'][...]
    post_knapsack_pred = generate_summary_single(shot_boundaries,scores,n_frames,positions)
    upsampled_pred = upsample_prediction(scores,positions,n_frames)
    print('Evaluating Correlation with Downsampled prediction and 0 to 1 scores')
    correlation_scores = correlation_single_pred(scores,test_index,dataset,'tvsum')
    print(f'Kendall scores:  {correlation_scores[0]}')
    scenario_1.append(correlation_scores)
    print('Evaluating scores with Post knapsack Processing and 0 to 1 scores')
    correlation_scores = correlation_single_pred(post_knapsack_pred,test_index,dataset,'tvsum',False)
    print(f'Kendall scores:  {correlation_scores[0]}')
    scenario_2.append(correlation_scores)
    print('Evaluating upsampled predictions with 0/1 selection annotations')
    correlation_scores = correlation_with_knapsack_scores(post_knapsack_pred,test_index,dataset)
    print(f'Kendall scores:  {correlation_scores[0]}')
    scenario_3.append(correlation_scores)
    print('Evaluating scores with Post knapsack Processing  with 0/1 selection annotations ')
    correlation_scores = correlation_with_knapsack_scores(upsampled_pred,test_index,dataset)
    print(f'Kendall scores:  {correlation_scores[0]}')
    scenario_4.append(correlation_scores)
    

    # Comparing correlation between the scores and the predictions directly 

    

In [None]:
print(f'Scenario 1 scores : {np.mean(scenario_1)}')
print(f'Scenario 2 scores : {np.mean(scenario_2)}')
print(f'Scenario 3 scores : {np.mean(scenario_3)}')
print(f'Scenario 4 scores : {np.mean(scenario_4)}')
