# Notebook to demonstrate the inconsistency in the Shot boundaries based on the feature representations

In [1]:
from PIL import Image
import cv2
import numpy as np
import torch
import torchvision
from tqdm import tqdm
import h5py

from torchvision.models import resnet50, ResNet50_Weights,googlenet,GoogLeNet_Weights
from torchvision.models.video import r3d_18, R3D_18_Weights
from torchvision.models import densenet121, DenseNet121_Weights
import torchvision.transforms as transforms

import torch.nn as nn
import cv2
import numpy as np
from sklearn.decomposition import PCA


In [None]:
import skimage
skimage.__version__

In [None]:

from sklearn.mixture import GaussianMixture

# Function to extract dense SIFT descriptors
def dense_sift(img, step=10, window_size=16):
    # Create SIFT detector
    sift = cv2.SIFT_create()
    
    # Get keypoints on a dense grid
    keypoints = [cv2.KeyPoint(x, y, window_size) for y in range(0, img.shape[0], step)
                 for x in range(0, img.shape[1], step)]
    
    # Compute descriptors
    _, descriptors = sift.compute(img, keypoints)
    
    return descriptors

# Function to encode the frame with Fisher Vector
def fisher_vector(gmm, descriptors):
    # Compute the responsibilities
    responsibilities = gmm.predict_proba(descriptors)
    
    # Compute the Fisher Vector
    means = gmm.means_
    covariances = gmm.covariances_
    priors = gmm.weights_

    # Initialize Fisher Vector
    fisher_vector = np.zeros(2 * gmm.n_components * descriptors.shape[1])
    
    # Compute mean and covariance gradient
    for i in range(gmm.n_components):
        diff = descriptors - means[i]
        fisher_vector[i * descriptors.shape[1]:(i + 1) * descriptors.shape[1]] = \
            np.sum(responsibilities[:, i][:, np.newaxis] * diff, axis=0) / np.sum(responsibilities[:, i])
        fisher_vector[(gmm.n_components + i) * descriptors.shape[1]:(gmm.n_components + i + 1) * descriptors.shape[1]] = \
            np.sum(responsibilities[:, i][:, np.newaxis] * (diff ** 2 - covariances[i]), axis=0) / np.sum(responsibilities[:, i])

    return fisher_vector

# Main function to process video frames
def process_video(video_path):
    cap = cv2.VideoCapture(video_path)

    # Step 1: Extract SIFT descriptors from video frames
    all_descriptors = []
    frame_count = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        if frame_count % 5 == 0:  # Process every 5th frame
            gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            descriptors = dense_sift(gray_frame)
            if descriptors is not None:
                all_descriptors.append(descriptors)

        frame_count += 1

    cap.release()

    # Stack all descriptors into a single array
    all_descriptors = np.vstack(all_descriptors) if all_descriptors else None

    # Step 2: PCA reduction
    if all_descriptors is not None:
        pca = PCA(n_components=64)
        reduced_descriptors = pca.fit_transform(all_descriptors)

        # Step 3: Fit GMM
        gmm = GaussianMixture(n_components=128)
        gmm.fit(reduced_descriptors)

        # Step 4: Encode each frame with Fisher Vector
        fisher_vectors = []

        for descriptors in all_descriptors:
            reduced = pca.transform(descriptors)
            fisher_vec = fisher_vector(gmm, reduced)
            fisher_vectors.append(fisher_vec)

        fisher_vectors = np.array(fisher_vectors)

        # Output shape of Fisher Vectors
        print("Shape of Fisher Vectors:", fisher_vectors.shape)

# Example usage
process_video('path_to_your_video.mp4')


In [26]:
def popatov_feat_extract(video_path):
    '''A description of the video feature extraction used by Popatov et al on Category Specific video summarization. Which is described as SIFT feature extraction, PCA and Fisher model'''
    cap = cv2.VideoCapture(video_path)

    # Step 1: Extract SIFT descriptors from video frames
    all_descriptors = []
    frame_count = 0
    sift = cv2.SIFT_create() # Create the sift feature extracot
    pca = PCA(n_components=64)
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        if frame_count % 5 == 0:  # Process every 5th frame
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            _,descriptors = sift.detectAndCompute(frame)
            if descriptors is not None:
                all_descriptors.append(pca.fit_transform(descriptors)) # Apply PCA to the SIFT features

        frame_count += 1
    k = 128
    gmm = learn_gmm(all_descriptors, n_modes=k)
    cap.release()
    def normalize(fisher_vector):
        fisher_vector = (fisher_vector-np.mean(fisher_vector,axis=0))/(np.std(fisher_vector,axis=0))
        v = np.sqrt(abs(fisher_vector)) * np.sign(fisher_vector)
        return v / np.sqrt(np.dot(v, v))

    fisher_vectors_array = np.array([normalize(fisher_vector(descriptor,gmm)) for descriptor in all_descriptors])
    return fisher_vectors_array
    

In [None]:
fisher_vector_arr = popatov_feat_extract(f'C:\\Users\\test\\Project-order\\Videos\\tvsum/video_10.mp4')

In [4]:
def cpd_auto(K, ncp, vmax, desc_rate=1, **kwargs):
    """Main interface

    Detect change points automatically selecting their number
        K       - kernel between each pair of frames in video
        ncp     - maximum ncp
        vmax    - special parameter
    Optional arguments:
        lmin     - minimum segment length
        lmax     - maximum segment length
        desc_rate - rate of descriptor sampling (vmax always corresponds to 1x)

    Note:
        - cps are always calculated in subsampled coordinates irrespective to
            desc_rate
        - lmin and m should be in agreement
    ---
    Returns: (cps, costs)
        cps   - best selected change-points
        costs - costs for 0,1,2,...,m change-points

    Memory requirement: ~ (3*N*N + N*ncp)*4 bytes ~= 16 * N^2 bytes
    That is 1,6 Gb for the N=10000.
    """
    m = ncp
    (_, scores) = cpd_nonlin(K, m, backtrack=False, **kwargs)

    N = K.shape[0]
    N2 = N*desc_rate  # length of the video before subsampling

    penalties = np.zeros(m+1)
    # Prevent division by zero (in case of 0 changes)
    ncp = np.arange(1, m+1)
    penalties[1:] = (vmax*ncp/(2.0*N2))*(np.log(float(N2)/ncp)+1)

    costs = scores/float(N) + penalties
    m_best = np.argmin(costs)
    (cps, scores2) = cpd_nonlin(K, m_best, **kwargs)

    return (cps, scores2)


#from scipy import weave

def calc_scatters(K):
    """
    Calculate scatter matrix:
    scatters[i,j] = {scatter of the sequence with starting frame i and ending frame j}
    """
    n = K.shape[0]
    K1 = np.cumsum([0] + list(np.diag(K)))
    K2 = np.zeros((n+1, n+1))
    K2[1:, 1:] = np.cumsum(np.cumsum(K, 0), 1) # TODO: use the fact that K - symmetric

    scatters = np.zeros((n, n))

    diagK2 = np.diag(K2)

    i = np.arange(n).reshape((-1,1))
    j = np.arange(n).reshape((1,-1))
    scatters = (K1[1:].reshape((1,-1))-K1[:-1].reshape((-1,1))
                - (diagK2[1:].reshape((1,-1)) + diagK2[:-1].reshape((-1,1)) - K2[1:,:-1].T - K2[:-1,1:]) / ((j-i+1).astype(float) + (j==i-1).astype(float)))
    scatters[j<i]=0
    #code = r"""
    #for (int i = 0; i < n; i++) {
    #    for (int j = i; j < n; j++) {
    #        scatters(i,j) = K1(j+1)-K1(i) - (K2(j+1,j+1)+K2(i,i)-K2(j+1,i)-K2(i,j+1))/(j-i+1);
    #    }
    #}
    #"""
    #weave.inline(code, ['K1','K2','scatters','n'], global_dict = \
    #    {'K1':K1, 'K2':K2, 'scatters':scatters, 'n':n}, type_converters=weave.converters.blitz)

    return scatters

def cpd_nonlin(K, ncp, lmin=1, lmax=100000, backtrack=True, verbose=True,
    out_scatters=None):
    """ Change point detection with dynamic programming
    K - square kernel matrix
    ncp - number of change points to detect (ncp >= 0)
    lmin - minimal length of a segment
    lmax - maximal length of a segment
    backtrack - when False - only evaluate objective scores (to save memory)

    Returns: (cps, obj)
        cps - detected array of change points: mean is thought to be constant on [ cps[i], cps[i+1] )
        obj_vals - values of the objective function for 0..m changepoints

    """
    m = int(ncp)  # prevent numpy.int64

    (n, n1) = K.shape
    assert(n == n1), "Kernel matrix awaited."

    assert(n >= (m + 1)*lmin)
    assert(n <= (m + 1)*lmax)
    assert(lmax >= lmin >= 1)

    if verbose:
        #print "n =", n
        print ("Precomputing scatters...")
    J = calc_scatters(K)

    if out_scatters != None:
        out_scatters[0] = J

    if verbose:
        print ("Inferring best change points...")
    # I[k, l] - value of the objective for k change-points and l first frames
    I = 1e101*np.ones((m+1, n+1))
    I[0, lmin:lmax] = J[0, lmin-1:lmax-1]

    if backtrack:
        # p[k, l] --- "previous change" --- best t[k] when t[k+1] equals l
        p = np.zeros((m+1, n+1), dtype=int)
    else:
        p = np.zeros((1,1), dtype=int)

    for k in range(1,m+1):
        for l in range((k+1)*lmin, n+1):
            tmin = max(k*lmin, l-lmax)
            tmax = l-lmin+1
            c = J[tmin:tmax,l-1].reshape(-1) + I[k-1, tmin:tmax].reshape(-1)
            I[k,l] = np.min(c)
            if backtrack:
                p[k,l] = np.argmin(c)+tmin

    #code = r"""
    ##define max(x,y) ((x)>(y)?(x):(y))
    #for (int k=1; k<m+1; k++) {
    #    for (int l=(k+1)*lmin; l<n+1; l++) {
    #        I(k, l) = 1e100; //nearly infinity
    #        for (int t=max(k*lmin,l-lmax); t<l-lmin+1; t++) {
    #            double c = I(k-1, t) + J(t, l-1);
    #            if (c < I(k, l)) {
    #                I(k, l) = c;
    #                if (backtrack == 1) {
    #                    p(k, l) = t;
    #                }
    #            }
    #        }
    #    }
    #}
    #"""

    #weave.inline(code, ['m','n','p','I', 'J', 'lmin', 'lmax', 'backtrack'], \
    #    global_dict={'m':m, 'n':n, 'p':p, 'I':I, 'J':J, \
    #    'lmin':lmin, 'lmax':lmax, 'backtrack': int(1) if backtrack else int(0)},
    #    type_converters=weave.converters.blitz)

    # Collect change points
    cps = np.zeros(m, dtype=int)

    if backtrack:
        cur = n
        for k in range(m, 0, -1):
            cps[k-1] = p[k, cur]
            cur = cps[k-1]

    scores = I[:, n].copy()
    scores[scores > 1e99] = np.inf
    return cps, scores

In [5]:
def kts(n_frames,features,vmax=1, frame_skip = 1):
      """ Receives the frame features from the CNN to do the Shot division based on KTS #TODO need to see how exactly this functions
      """
      seq_len = len(features)
      picks = np.arange(0, seq_len) * frame_skip

      # compute change points using KTS
      kernel = np.matmul(features, features.T)
      change_points, _ = cpd_auto(kernel, seq_len - 1, vmax, verbose=False)
      change_points *= frame_skip
      change_points = np.hstack((0, change_points, n_frames))
      begin_frames = change_points[:-1]
      end_frames = change_points[1:]
      change_points = np.vstack((begin_frames, end_frames - 1)).T

      n_frame_per_seg = end_frames - begin_frames
      return change_points, n_frame_per_seg, picks


class THWC_to_CTHW(torch.nn.Module):
    def forward(self, data):
        # Do some transformations
        return data.permute(3, 0, 1, 2)
class PreProcessorVidSum(object):
    def __init__(self,feature_extractor,target_downsample=2,shot_aware = True):
        self.target_downsample = target_downsample
        self.feature_extractor = feature_extractor # TODO add support for GPU
        self.shot_aware = shot_aware
    def run(self,video_path,shot_boundaries = []):
        ''' This is using the shot boundaries from the h5 datasets to frames to pick the selected, so it returns all the frames features and the selected ones
        '''
        shot_boundaries = np.array(shot_boundaries).astype(int)
        cap = cv2.VideoCapture(video_path)
        frame_rate = int(cap.get(cv2.CAP_PROP_FPS))
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        print(frame_rate)
        print(total_frames)
        downsample_target = frame_rate//self.target_downsample if self.target_downsample!=0 else 1
        picked_frames = np.arange(0,total_frames,downsample_target )
        selected_frames = np.union1d(shot_boundaries,picked_frames)
        print(len(selected_frames))
        if selected_frames[-1]>total_frames-1: selected_frames[-1]=total_frames-1
        print(selected_frames[-1],selected_frames[-2])
        all_frames = []
        for sub_frame in tqdm(selected_frames):
            cap.set(cv2.CAP_PROP_POS_FRAMES,sub_frame)
            ret,frame = cap.read()
            if not ret:
                print(f"Error reading frame at index {sub_frame}")
                continue
            all_frames.append(self.feature_extractor.run(Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))).numpy())
        cap.release()
        return all_frames, selected_frames

class FeatureExtractor():
    def __init__(self,model,transforms):
        self.model = model
        self.transforms = transforms # Transforms should act like one function, otherwise, one should do this outside and pass identity through this transform

    def run(self,input):
        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # The model has to be in eval mode and on the GPU/CPU set outside.
        with torch.no_grad():
            return self.model(self.transforms(input).unsqueeze(0).to(device)).squeeze().to('cpu')


In [6]:
def g(seq):
    # http://stackoverflow.com/questions/3382352/equivalent-of-numpy-argsort-in-basic-python/3383106#3383106
    #lambda version by Tony Veijalainen
    return [x for x,y in sorted(enumerate(seq), key = lambda x: x[1])]



In [7]:
tvsum_dataset = h5py.File('Data/googlenet/googlenet_tvsum.h5')

lengths  = [(tvsum_dataset[key]['n_frames'][...].item()) for key in list(tvsum_dataset.keys())]
indices =g(lengths)
dataset_keys = list(tvsum_dataset.keys())




Googlenet

In [None]:
model = googlenet(weights = GoogLeNet_Weights.IMAGENET1K_V1)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
preprocess = ResNet50_Weights.IMAGENET1K_V2.transforms()

submodel = nn.Sequential(*list(model.children())[:-2]).to(device).eval()
feature_extractor = FeatureExtractor(submodel,preprocess)
preprocesser_sum = PreProcessorVidSum(feature_extractor,target_downsample=0)
dataset_features = []
for i in range(10):
    index = indices[i]
    video_path = f'C:\\Users\\test\\Project-order\\Videos\\tvsum/{dataset_keys[index]}.mp4'
    features,_ = preprocesser_sum.run(video_path)
    dataset_features.append(features)



np.save('GoogleNet_Features_tvsum.npy',np.array(dataset_features, dtype=object), allow_pickle=True)

ResNet

In [None]:
hdf5file = tvsum_dataset


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = resnet50(weights=ResNet50_Weights.IMAGENET1K_V2)

submodel = nn.Sequential(*list(model.children())[:-1])
submodel.eval().to(device)
feature_extractor = FeatureExtractor(submodel,preprocess)
preprocesser_sum = PreProcessorVidSum(feature_extractor,target_downsample=0)
dataset_features = []
for i in range(10):
    index = indices[i]
    video_path = f'C:\\Users\\test\\Project-order\\Videos\\tvsum/{dataset_keys[index]}.mp4'
    features,_ = preprocesser_sum.run(video_path)
    dataset_features.append(features)



np.save('Resnet_Features_tvsum.npy',np.array(dataset_features, dtype=object), allow_pickle=True)



DenseNet

In [None]:
model = densenet121(weights =DenseNet121_Weights.IMAGENET1K_V1)
submodel = nn.Sequential(*list(model.children())[:-1],nn.AdaptiveAvgPool2d(1)).to('cuda')

processed_dataset = 'densnet'
feature_extractor = FeatureExtractor(submodel,preprocess)
preprocesser_sum = PreProcessorVidSum(feature_extractor,target_downsample=0)
dataset_features = []
for i in range(10):
    index = indices[i]
    video_path = f'C:\\Users\\test\\Project-order\\Videos\\tvsum/{dataset_keys[index]}.mp4'
    features,_ = preprocesser_sum.run(video_path)
    dataset_features.append(features)



np.save('Densnet_Features_tvsum.npy',np.array(dataset_features, dtype=object), allow_pickle=True)

Creation of Shot Boundaries

In [73]:
from numpy import linalg as LA

In [74]:
googlenet_features= np.load('GoogleNet_Features_tvsum.npy',allow_pickle = True)
resnet_features = np.load('Resnet_Features_tvsum.npy',allow_pickle = True)
densenet_features = np.load('Densnet_Features_tvsum.npy',allow_pickle = True)


In [29]:


shot_boundary = []

for feature in resnet_features:
    feature = [feat/LA.norm(feat) for feat in feature]
    n_frames = len(feature)
    change_points,_ ,_= kts(n_frames,np.array(feature))
    shot_boundary.append(change_points)
np.save('resnet_shot_boundaries.npy',np.array(shot_boundary, dtype=object), allow_pickle=True)

for feature in googlenet_features:
    feature = [feat/LA.norm(feat) for feat in feature]
    n_frames = len(feature)
    change_points,_ ,_= kts(n_frames,np.array(feature))
    shot_boundary.append(change_points)
np.save('googlenet_shot_boundaries.npy',np.array(shot_boundary, dtype=object), allow_pickle=True)

for feature in densenet_features:
    feature = [feat/LA.norm(feat) for feat in feature]
    n_frames = len(feature)
    change_points,_ ,_= kts(n_frames,np.array(feature))
    shot_boundary.append(change_points)
np.save('densenet_shot_boundaries.npy',np.array(shot_boundary, dtype=object), allow_pickle=True)

In [27]:


shot_boundary = []

for feature in resnet_features:
    feature = [feat/LA.norm(feat) for feat in feature]
    n_frames = len(feature)
    change_points,_ ,_= kts(n_frames,np.array(feature),vmax=0.8)
    shot_boundary.append(change_points)
np.save('resnet_shot_boundaries_vmax_0.8.npy',np.array(shot_boundary, dtype=object), allow_pickle=True)

for feature in googlenet_features:
    feature = [feat/LA.norm(feat) for feat in feature]
    n_frames = len(feature)
    change_points,_ ,_= kts(n_frames,np.array(feature),vmax=0.8)
    shot_boundary.append(change_points)
np.save('googlenet_shot_boundaries_0.8.npy',np.array(shot_boundary, dtype=object), allow_pickle=True)

for feature in densenet_features:
    feature = [feat/LA.norm(feat) for feat in feature]
    n_frames = len(feature)
    change_points,_ ,_= kts(n_frames,np.array(feature),vmax=0.8)
    shot_boundary.append(change_points)
np.save('densenet_shot_boundaries_0.8.npy',np.array(shot_boundary, dtype=object), allow_pickle=True)



shot_boundary = []

for feature in resnet_features:
    feature = [feat/LA.norm(feat) for feat in feature]
    n_frames = len(feature)
    change_points,_ ,_= kts(n_frames,np.array(feature),vmax=0.6)
    shot_boundary.append(change_points)
np.save('resnet_shot_boundaries_vmax_0.6.npy',np.array(shot_boundary, dtype=object), allow_pickle=True)

for feature in googlenet_features:
    feature = [feat/LA.norm(feat) for feat in feature]
    n_frames = len(feature)
    change_points,_ ,_= kts(n_frames,np.array(feature),vmax=0.6)
    shot_boundary.append(change_points)
np.save('googlenet_shot_boundaries_0.6.npy',np.array(shot_boundary, dtype=object), allow_pickle=True)

for feature in densenet_features:
    feature = [feat/LA.norm(feat) for feat in feature]
    n_frames = len(feature)
    change_points,_ ,_= kts(n_frames,np.array(feature),vmax=0.6)
    shot_boundary.append(change_points)
np.save('densenet_shot_boundaries_0.6.npy',np.array(shot_boundary, dtype=object), allow_pickle=True)

In [None]:
shot_boundary = []

for feature in resnet_features:
    feature = [feat/LA.norm(feat) for feat in feature]
    n_frames = len(feature)
    change_points,_ ,_= kts(n_frames,np.array(feature),vmax=0.4)
    shot_boundary.append(change_points)
np.save('resnet_shot_boundaries_vmax_0.4.npy',np.array(shot_boundary, dtype=object), allow_pickle=True)
shot_boundary = []

for feature in googlenet_features:
    feature = [feat/LA.norm(feat) for feat in feature]
    n_frames = len(feature)
    change_points,_ ,_= kts(n_frames,np.array(feature),vmax=0.4)
    shot_boundary.append(change_points)
np.save('googlenet_shot_boundaries_0.4.npy',np.array(shot_boundary, dtype=object), allow_pickle=True)
shot_boundary = []
for feature in densenet_features:
    feature = [feat/LA.norm(feat) for feat in feature]
    n_frames = len(feature)
    change_points,_ ,_= kts(n_frames,np.array(feature),vmax=0.4)
    shot_boundary.append(change_points)
np.save('densenet_shot_boundaries_0.4.npy',np.array(shot_boundary, dtype=object), allow_pickle=True)

In [None]:
Fishers_features= np.load('Fishers_Features_tvsum.npy',allow_pickle = True)

shot_boundary = []
for feature in resnet_features:
    feature = [feat/LA.norm(feat) for feat in feature]
    n_frames = len(feature)
    change_points,_ ,_= kts(n_frames,np.array(feature),vmax=1.0)
    shot_boundary.append(change_points)
np.save('Fisher_shot_boundaries.npy',np.array(shot_boundary, dtype=object), allow_pickle=True)


In [2]:
def calculate_metrics(true_boundaries, predicted_boundaries):
    TP = len(set(true_boundaries) & set(predicted_boundaries))
    FP = len(set(predicted_boundaries) - set(true_boundaries))
    FN = len(set(true_boundaries) - set(predicted_boundaries))

    precision = TP / (TP + FP) if (TP + FP) > 0 else 0
    recall = TP / (TP + FN) if (TP + FN) > 0 else 0
    f1_score = (2 * precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
  

    return precision, recall, f1_score

In [17]:
gt_shot_boundary = h5py.File('Data/googlenet/googlenet_tvsum.h5')
googlenet_shots = np.load('googlenet_shot_boundaries.npy',allow_pickle=True)[10:20]
resnet_shots = np.load('resnet_shot_boundaries.npy',allow_pickle=True)
densenet_shots = np.load('densenet_shot_boundaries.npy',allow_pickle=True)[20:30]

In [10]:
len(densenet_shots)

10

In [18]:
googlenet_f1_scores = []
resnet_f1_scores = []
densenet_f1_scores = []

In [19]:
for i,index in enumerate(indices[:10]):
    precison, recall, f1 = calculate_metrics(gt_shot_boundary[dataset_keys[index]]['change_points'][...].flatten(),googlenet_shots[i].flatten())
    googlenet_f1_scores.append(f1)
    precison, recall, f1 = calculate_metrics(gt_shot_boundary[dataset_keys[index]]['change_points'][...].flatten(),resnet_shots[i].flatten())
    resnet_f1_scores.append(f1)
    precison, recall, f1 = calculate_metrics(gt_shot_boundary[dataset_keys[index]]['change_points'][...].flatten(),densenet_shots[i].flatten())
    densenet_f1_scores.append(f1)


In [20]:
perfs_avg = []
for i in range(10):
    _,_,f1_goog_res = calculate_metrics(resnet_shots[i].flatten(),googlenet_shots[i].flatten())
    _,_,f1_res_dense= calculate_metrics(densenet_shots[i].flatten(),resnet_shots[i].flatten())
    _,_,f1_dens_gog= calculate_metrics(densenet_shots[i].flatten(),googlenet_shots[i].flatten())
    perfs_avg.append( np.mean([f1_goog_res,f1_res_dense]))

In [21]:
print(np.mean(perfs_avg))

0.27122742499849734


In [22]:

print(f'Googlenet average f1 : {np.mean(googlenet_f1_scores)}')
print(f'resnet average f1 : {np.mean(resnet_f1_scores)}')
print(f'DenseNet average f1 : {np.mean(densenet_f1_scores)}')

Googlenet average f1 : 0.4517452491313002
resnet average f1 : 0.35952287214641804
DenseNet average f1 : 0.08209442409442409


0.8 Vmax

In [70]:
densenet_shots[0]

array([[   0, 2499]])

In [16]:
googlenet_shots = np.load('googlenet_shot_boundaries_0.8.npy',allow_pickle=True)[10:20]
resnet_shots = np.load('resnet_shot_boundaries_vmax_0.8.npy',allow_pickle=True)
densenet_shots = np.load('densenet_shot_boundaries_0.8.npy',allow_pickle=True)[20:30]
googlenet_f1_scores = []
resnet_f1_scores = []
densenet_f1_scores = []
for i,index in enumerate(indices[:10]):
    precison, recall, f1 = calculate_metrics(gt_shot_boundary[dataset_keys[index]]['change_points'][...].flatten(),googlenet_shots[i].flatten())
    googlenet_f1_scores.append(f1)
    precison, recall, f1 = calculate_metrics(gt_shot_boundary[dataset_keys[index]]['change_points'][...].flatten(),resnet_shots[i].flatten())
    resnet_f1_scores.append(f1)
    precison, recall, f1 = calculate_metrics(gt_shot_boundary[dataset_keys[index]]['change_points'][...].flatten(),densenet_shots[i].flatten())
    densenet_f1_scores.append(f1)


print(f'Googlenet average f1 : {np.mean(googlenet_f1_scores)}')
print(f'resnet average f1 : {np.mean(resnet_f1_scores)}')
print(f'DenseNet average f1 : {np.mean(densenet_f1_scores)}')

Googlenet average f1 : 0.39347434440659274
resnet average f1 : 0.3168795153733152
DenseNet average f1 : 0.08209442409442409


In [None]:
perfs_avg = []
for i in range(10):
    _,_,f1_goog_res = calculate_metrics(resnet_shots[i].flatten(),googlenet_shots[i].flatten())
    _,_,f1_res_dense= calculate_metrics(densenet_shots[i].flatten(),resnet_shots[i].flatten())
    _,_,f1_dens_gog= calculate_metrics(densenet_shots[i].flatten(),googlenet_shots[i].flatten())
    perfs_avg.append( np.mean([f1_goog_res,f1_res_dense]))
print(np.mean(perfs_avg))

In [23]:
googlenet_shots = np.load('googlenet_shot_boundaries_0.6.npy',allow_pickle=True)[10:20]
resnet_shots = np.load('resnet_shot_boundaries_vmax_0.6.npy',allow_pickle=True)
densenet_shots = np.load('densenet_shot_boundaries_0.6.npy',allow_pickle=True)[20:30]
googlenet_f1_scores = []
resnet_f1_scores = []
densenet_f1_scores = []
for i,index in enumerate(indices[:10]):
    precison, recall, f1 = calculate_metrics(gt_shot_boundary[dataset_keys[index]]['change_points'][...].flatten(),googlenet_shots[i].flatten())
    googlenet_f1_scores.append(f1)
    precison, recall, f1 = calculate_metrics(gt_shot_boundary[dataset_keys[index]]['change_points'][...].flatten(),resnet_shots[i].flatten())
    resnet_f1_scores.append(f1)
    precison, recall, f1 = calculate_metrics(gt_shot_boundary[dataset_keys[index]]['change_points'][...].flatten(),densenet_shots[i].flatten())
    densenet_f1_scores.append(f1)


print(f'Googlenet average f1 : {np.mean(googlenet_f1_scores)}')
print(f'resnet average f1 : {np.mean(resnet_f1_scores)}')
print(f'DenseNet average f1 : {np.mean(densenet_f1_scores)}')

Googlenet average f1 : 0.32693164649341244
resnet average f1 : 0.2633254972947965
DenseNet average f1 : 0.08209442409442409


In [86]:
perfs_avg = []
for i in range(10):
    _,_,f1_goog_res = calculate_metrics(resnet_shots[i].flatten(),googlenet_shots[i].flatten())
    _,_,f1_res_dense= calculate_metrics(densenet_shots[i].flatten(),resnet_shots[i].flatten())
    _,_,f1_dens_gog= calculate_metrics(densenet_shots[i].flatten(),googlenet_shots[i].flatten())
    perfs_avg.append( np.mean([f1_goog_res,f1_res_dense]))
print(np.mean(perfs_avg))

0.23142867232486375


In [90]:
googlenet_shots = np.load('googlenet_shot_boundaries_0.4.npy',allow_pickle=True)
resnet_shots = np.load('resnet_shot_boundaries_vmax_0.4.npy',allow_pickle=True)
densenet_shots = np.load('densenet_shot_boundaries_0.4.npy',allow_pickle=True)
googlenet_f1_scores = []
resnet_f1_scores = []
densenet_f1_scores = []
for i,index in enumerate(indices[:10]):
    precison, recall, f1 = calculate_metrics(gt_shot_boundary[dataset_keys[index]]['change_points'][...].flatten(),googlenet_shots[i].flatten())
    googlenet_f1_scores.append(f1)
    precison, recall, f1 = calculate_metrics(gt_shot_boundary[dataset_keys[index]]['change_points'][...].flatten(),resnet_shots[i].flatten())
    resnet_f1_scores.append(f1)
    precison, recall, f1 = calculate_metrics(gt_shot_boundary[dataset_keys[index]]['change_points'][...].flatten(),densenet_shots[i].flatten())
    densenet_f1_scores.append(f1)


print(f'Googlenet average f1 : {np.mean(googlenet_f1_scores)}')
print(f'resnet average f1 : {np.mean(resnet_f1_scores)}')
print(f'DenseNet average f1 : {np.mean(densenet_f1_scores)}')

Googlenet average f1 : 0.2372799281259132
resnet average f1 : 0.19722524250195686
DenseNet average f1 : 0.08209442409442409


In [91]:
perfs_avg = []
for i in range(10):
    _,_,f1_goog_res = calculate_metrics(resnet_shots[i].flatten(),googlenet_shots[i].flatten())
    _,_,f1_res_dense= calculate_metrics(densenet_shots[i].flatten(),resnet_shots[i].flatten())
    _,_,f1_dens_gog= calculate_metrics(densenet_shots[i].flatten(),googlenet_shots[i].flatten())
    perfs_avg.append( np.mean([f1_goog_res,f1_res_dense]))
print(np.mean(perfs_avg))

0.21228872323701808
