# 3D CNN Feature Extraction and kNN Testing

In [485]:
import os
import pickle
import h5py
import json
import numpy as np
import importlib
import imageio
# from skimage import transform
import matplotlib.pyplot as plt
%matplotlib inline
from math import sqrt
import glob
import subprocess


# custome modules
import cnn3d_similarity_inference
import knn_cnn_features
importlib.reload(knn_cnn_features)

<module 'knn_cnn_features' from '/mnt/c/Users/RyanBae/Dropbox/uw_data_science/capstone/VideoSimilarity/knn_cnn_features.py'>

## Accuracy Test

In [505]:
# load feature vectors and video labels
feature_vectors = cnn3d_similarity_inference.load_feature_vectors('feature_vectors_cnn3d_ucf101', './cnn3d_features')
video_labels = cnn3d_similarity_inference.load_video_labels('video_labels_cnn3d_ucf101', './cnn3d_features')
video_labels = np.array([fl.decode() for fl in video_labels])

loading feature vectors...
loading video labels...


In [506]:
# check loaded feature and label sizes
print(feature_vectors.shape)
print(len(video_labels))

(13320, 512)
13320


In [486]:
%timeit feature_indices = knn_cnn_features.run_knn_features(feature_vectors=feature_vectors, k=3, gpu=False)

1.04 s ± 109 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [487]:
# perform kNN search for every video in the UCF-101 dataset
feature_indices = knn_cnn_features.run_knn_features(feature_vectors=feature_vectors, k=3, gpu=False)

In [514]:
# get similar video labels
similar_videos = video_labels[feature_indices]

In [524]:
def get_cls_accuracy(similar_videos, feature_labels, k=3):
    accuracy = 0
    for i, sim_vids in enumerate(similar_videos):
        true_label = feature_labels[i].split('_')[1]
        for sim_vid in sim_vids:
            pred_label = sim_vid.split('_')[1]
            accuracy += np.sum(pred_label==true_label)/k
    return accuracy/len(feature_labels)

In [527]:
accuracy = get_cls_accuracy(similar_videos, video_labels)
print("Accuracy:", round(accuracy, 3))

Accuracy: 0.897


## Accuracy Test with Test/Train Split

In [538]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(feature_vectors, video_labels, \
                                        test_size=0.3, random_state=42)

In [539]:
%timeit feature_indices = knn_cnn_features.run_knn_features(X_train, test_vectors=X_test, k=3, gpu=False)

246 ms ± 3.54 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [540]:
feature_indices = knn_cnn_features.run_knn_features(X_train, test_vectors=X_test, k=3, gpu=False)

In [541]:
similar_videos = y_train[feature_indices]

In [542]:
print("Accuracy:",get_cls_accuracy(similar_videos, y_test))

Accuracy: 0.7467467467468066
