In [1]:
from Data import *
import h5py 
import torch

In [2]:

import numpy as np

def levenstein_distance(word1, word2):
    m, n = len(word1), len(word2)

    matrix = torch.zeros((m+1, n+1), dtype=int)

    matrix[:, 0] = torch.arange(m+1)
    matrix[0, :] = torch.arange(n+1)

    for i in range(1, m+1):
        for j in range(1, n+1):
            if word1[i-1] == word2[j-1]:
                substitution_cost = 0
            else:
                substitution_cost = 1

            matrix[i, j] = min(
                matrix[i-1, j] + 1,                # deletion
                matrix[i, j-1] + 1,                # insertion
                matrix[i-1, j-1] + substitution_cost    # substitution
            )

    similarity = 1 - matrix[m, n] / max(m, n)
    similarity_percentage = similarity * 100

    return similarity_percentage

In [3]:
flip      =      Flip(probability = 0)
shufflen  =  Shufflebylength(probability = 0,segment_length = 24)
intrashot = IntraShotShuffle(probability = 0)
intershot = ShuffleShots(probability = 0)
neighbourshot = ShuffleNeighbourShots(probability = 0)


TVSUM

In [4]:
video_dataset = h5py.File(os.path.join('Data\\h5datasets','eccv16_dataset_tvsum_google_pool5'+'.h5'),'r')


Flip

In [5]:
levenstein_distances = []
for index_in_question in video_dataset:
    original_video = torch.arange(0,len(video_dataset[index_in_question]['gtscore'][...]))
    levenstein_distances.append(levenstein_distance(original_video,flip.shuffle(original_video,original_video)[0]).item())
print(f' Average Levenstein distance for flip: {np.mean(levenstein_distances)}')
print(f'Levenstein distance variance for flip: {np.var(levenstein_distances)}')

 Average Levenstein distance for flip: 0.15411686897277832
Levenstein distance variance for flip: 0.025596522909040686


Intrashot

In [6]:
levenstein_distances = []
for index_in_question in video_dataset:
    shot_boundaries = video_dataset[index_in_question]['downsampled_shot_boundaries']
    shuffled_scores = []
    original_video = torch.arange(0,len(video_dataset[index_in_question]['gtscore'][...]))
    for i in range (3):
            shuffled_scores.append(levenstein_distance(original_video,intrashot.shuffle(original_video,original_video,shot_bounds = shot_boundaries)[0]).item())
    levenstein_distances.append(np.mean(shuffled_scores))
print(f'Average Levenstein distance for intrashot: {np.mean(levenstein_distances)}')
print(f'Levenstein distance variance for intrashot: {np.mean(levenstein_distances)}')

  for shot_bound in torch.asarray(shot_boundaries):


Average Levenstein distance for intrashot: 100.0
Levenstein distance variance for intrashot: 100.0


Neighbours

In [7]:
levenstein_distances = []
for index_in_question in video_dataset:
    shot_boundaries = video_dataset[index_in_question]['downsampled_shot_boundaries']
    shuffled_scores = []
    original_video = torch.arange(0,len(video_dataset[index_in_question]['gtscore'][...]))
    for i in range (3):
            shuffled_scores.append(levenstein_distance(original_video,neighbourshot.shuffle(original_video,original_video,shot_bounds = shot_boundaries)[0]).item())
    levenstein_distances.append(np.mean(shuffled_scores))
print(f'Average Levenstein distance for intrashot: {np.mean(levenstein_distances)}')
print(f'Levenstein distance variance for neighbourshot: {np.var(levenstein_distances)}')

Average Levenstein distance for intrashot: 58.513739242553704
Levenstein distance variance for neighbourshot: 23.627426546520663


Intershot

In [8]:
levenstein_distances = []
for index_in_question in video_dataset:
    shot_boundaries = video_dataset[index_in_question]['downsampled_shot_boundaries']
    shuffled_scores = []
    original_video = torch.arange(0,len(video_dataset[index_in_question]['gtscore'][...]))
    for i in range (3):
            shuffled_scores.append(levenstein_distance(original_video,intershot.shuffle(original_video,original_video,shot_bounds = shot_boundaries)[0]).item())
    levenstein_distances.append(np.mean(shuffled_scores))
print(f'Levenstein distance for intershot: {np.mean(levenstein_distances)}')
print(f'Levenstein distance variance for intershot: {np.var(levenstein_distances)}')

Levenstein distance for intershot: 6.439960781733195
Levenstein distance variance for intershot: 13.740797710901138


Segments

In [9]:
levenstein_distances = []
for index_in_question in video_dataset:
    shot_boundaries = video_dataset[index_in_question]['downsampled_shot_boundaries']
    shuffled_scores = []
    original_video = torch.arange(0,len(video_dataset[index_in_question]['gtscore'][...]))
    for i in range (3):
        shuffled_scores.append(levenstein_distance(original_video,shufflen.shuffle(original_video,original_video)[0]).item())
    levenstein_distances.append(np.mean(shuffled_scores))
print(f'Levenstein distance for segments: {np.mean(levenstein_distances)}')
print(f'Levenstein distance variance for segments: {np.var(levenstein_distances)}')

Levenstein distance for segments: 11.030543835957841
Levenstein distance variance for segments: 33.42716896825168


SumMe

In [10]:
video_dataset = h5py.File(os.path.join('Data\\h5datasets','eccv16_dataset_summe_google_pool5'+'.h5'),'r')

Flip

In [11]:
levenstein_distances = []
for index_in_question in video_dataset:
    original_video = torch.arange(0,len(video_dataset[index_in_question]['gtscore'][...]))
    levenstein_distances.append(levenstein_distance(original_video,flip.shuffle(original_video,original_video)[0]).item())
print(f' Average Levenstein distance for flip: {np.mean(levenstein_distances)}')
print(f'Levenstein distance variance for flip: {np.var(levenstein_distances)}')

 Average Levenstein distance for flip: 0.19608044624328613
Levenstein distance variance for flip: 0.06055240414093532


IntraShot

In [12]:
levenstein_distances = []
for index_in_question in video_dataset:
    shot_boundaries = video_dataset[index_in_question]['downsampled_shot_boundaries']
    shuffled_scores = []
    original_video = torch.arange(0,len(video_dataset[index_in_question]['gtscore'][...]))
    for i in range (3):
            shuffled_scores.append(levenstein_distance(original_video,intrashot.shuffle(original_video,original_video,shot_bounds = shot_boundaries)[0]).item())
    levenstein_distances.append(np.mean(shuffled_scores))
print(f'Average Levenstein distance for intrashot: {np.mean(levenstein_distances)}')
print(f'Levenstein distance variance for intrashot: {np.var(levenstein_distances)}')

Average Levenstein distance for intrashot: 100.0
Levenstein distance variance for intrashot: 0.0


Neighbours

In [13]:
levenstein_distances = []
for index_in_question in video_dataset:
    shot_boundaries = video_dataset[index_in_question]['downsampled_shot_boundaries']
    shuffled_scores = []
    original_video = torch.arange(0,len(video_dataset[index_in_question]['gtscore'][...]))
    for i in range (3):
            shuffled_scores.append(levenstein_distance(original_video,neighbourshot.shuffle(original_video,original_video,shot_bounds = shot_boundaries)[0]).item())
    levenstein_distances.append(np.mean(shuffled_scores))
print(f'Average Levenstein distance for neighboursshot: {np.mean(levenstein_distances)}')
print(f'Levenstein distance variance for neighbourshot: {np.var(levenstein_distances)}')

Average Levenstein distance for neighboursshot: 59.71926767985026
Levenstein distance variance for neighbourshot: 50.702490501236724


Intershot

In [14]:
levenstein_distances = []
for index_in_question in video_dataset:
    shot_boundaries = video_dataset[index_in_question]['downsampled_shot_boundaries']
    shuffled_scores = []
    original_video = torch.arange(0,len(video_dataset[index_in_question]['gtscore'][...]))
    for i in range (3):
            shuffled_scores.append(levenstein_distance(original_video,intershot.shuffle(original_video,original_video,shot_bounds = shot_boundaries)[0]).item())
    levenstein_distances.append(np.mean(shuffled_scores))
print(f'Levenstein distance for intershot: {np.mean(levenstein_distances)}')
print(f'Levenstein distance variance for intershot: {np.var(levenstein_distances)}')

Levenstein distance for intershot: 13.229750630060833
Levenstein distance variance for intershot: 106.09865319315887


Segment

In [15]:
levenstein_distances = []
for index_in_question in video_dataset:
    shot_boundaries = video_dataset[index_in_question]['downsampled_shot_boundaries']
    shuffled_scores = []
    original_video = torch.arange(0,len(video_dataset[index_in_question]['gtscore'][...]))
    for i in range (3):
        shuffled_scores.append(levenstein_distance(original_video,shufflen.shuffle(original_video,original_video)[0]).item())
    levenstein_distances.append(np.mean(shuffled_scores))
print(f'Levenstein distance for segments: {np.mean(levenstein_distances)}')
print(f'Levenstein distance variance for segments: {np.var(levenstein_distances)}')

Levenstein distance for segments: 18.79139928181966
Levenstein distance variance for segments: 110.29835594223745
