In [1]:
import numpy as np
import imageio
from skimage import transform
import matplotlib.pyplot as plt
from math import sqrt
import glob
import subprocess

import extract_features
%matplotlib inline

In [2]:
def load_image(img_path, resize=True):
    tmp_img = imageio.imread(img_path)
    if resize:
        return transform.resize(image=tmp_img,output_shape=(200,200),anti_aliasing=True, mode='constant')
    return tmp_img

In [2]:
%timeit feature = extract_features.get_vector_resnet50('data/images/golden1.jpg')

20 ms ± 290 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [2]:
%timeit feature = extract_features.get_vector_resnet18('data/images/golden1.jpg')

10.4 ms ± 65.8 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


**Resnet 50** takes $2$ times the amount of time as **Resnet 18**.

In [3]:
classes = sorted([path.split("/")[-1] for path in glob.glob("data/videos/*")])
print(classes)

['animals', 'children', 'christmas', 'nature', 'school', 'snow', 'work']


In [2]:
feature = extract_features.get_vector_resnet50('data/images/golden1.jpg')

In [7]:
feature.shape

(2048,)

In [3]:
concat_features = np.repeat([feature], repeats=1000,axis=0)
concat_features.shape

(1000, 2048)

In [6]:
import knn_cnn_features
%time feature_indices = knn_cnn_features.run_knn_features(concat_features,k=3)

Number of records: 1000 
Number of dimensions: 2048
Running with Flat index for 1000 records of with dimensionality 2048
CPU times: user 9.75 ms, sys: 86.7 ms, total: 96.4 ms
Wall time: 94.9 ms


For $1000$ feature vectors of dimensionality $2048$, the KNN search is very fast i.e. around $90$ms. These indices for the feature vectors can be used to get the corresponding videos and thereby similarity search can be implemented.

## Testing with real features (UCF101 subset)

In [None]:
feature_file = 'image_features.hdf5'
feature_labels = np.array(feature_file['labels'])
%time feature_indices = knn_cnn_features.run_knn_features(feature_file['features'],k=3)

In [5]:
def vid_from_path(ss = 'data/videos/animals/0.mp4/output0001.jpg'):
    vid_path = ss[:ss.rfind("/")]
#     frame_no = ss[ss.rfind("/")+1:]
    return vid_path

In [6]:
def get_ordered_unique(listed):
    seen = set()
    seen_add = seen.add
    return [x for x in listed if not (x in seen or seen_add(x))]

In [8]:
similar_videos_ordered = get_ordered_unique(list(map(vid_from_path, feature_labels[feature_indices])))