In [None]:
%cd ../..
%load_ext autoreload

%autoreload 2

In [None]:

import os
import random
import textwrap as tw
import numpy as np
import pandas as pd
pd.options.mode.chained_assignment = None
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
from PIL import Image
from ast import literal_eval
from sklearn.metrics import pairwise_distances

from emv.settings import DRIVE_PATH

# Clustering
from sklearn.neighbors import NearestNeighbors
from sklearn.cluster import DBSCAN, KMeans
from hdbscan import HDBSCAN

# DR
from umap import UMAP
from umap.umap_ import nearest_neighbors
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
from trimap import TRIMAP

# Metrics
from emv.embeddings.dr_eval import compute_embeddings, compute_umap_embeddings, plot_embeddings
from emv.embeddings.dr_eval import \
    compute_coranking_metrics, \
    random_triplet_accuracy, \
    compute_pcc, \
    global_score

# Load Imagenet features

In [None]:
os.listdir(DRIVE_PATH + "rts/aibox-vectors")

In [None]:
SAMPLE_FOLDER = DRIVE_PATH + "rts/aibox-vectors/"
rts_videos = pd.read_csv(DRIVE_PATH + "rts/aibox-vectors/videos.csv")
rts_videos["path"] = rts_videos.id_result.map(lambda x: SAMPLE_FOLDER + "videos/" + x[-1] + "/" + x[-2] + "/" + x[-3] + "/" + x + "/")

In [None]:
imagenet_features_per_video = {}
for umid,video in zip(rts_videos.umid, rts_videos.path):
    npz = np.load(video + "features_mean.npz", allow_pickle=True)
    imagenet_features_per_video[umid] = {item: npz[item] for item in npz.files}

In [None]:
rts_videos["imagenet_features"] = rts_videos.umid.map(lambda x: imagenet_features_per_video[x])
rts_videos["scenes_tc"] = rts_videos.imagenet_features.map(lambda x: x.get("scenes_tc"))
rts_videos["imagenet_features"] = rts_videos.imagenet_features.map(lambda x: x.get("imagenet_features_mean"))
rts_videos["scenes_ids"] = rts_videos.scenes_tc.map(lambda x: [i for i,w in enumerate(x)])

In [None]:
rts_videos = rts_videos.explode(["imagenet_features", "scenes_tc", "scenes_ids"]).reset_index(drop=True)
rts_videos.dropna(inplace=True)
rts_videos["scenes_length"] = rts_videos.scenes_tc.map(lambda x: x[1] - x[0])
rts_videos["thumbnail"] = rts_videos.apply(lambda df: df["path"] + "ims_scene/" + str(df["scenes_ids"]) + ".jpg", axis = 1)
rts_videos = rts_videos[rts_videos.scenes_length > 0]

In [None]:
rts_videos.head()

# Embeddings

In [None]:
sample = rts_videos.sample(1000, random_state=42)
features = np.array(sample.imagenet_features.tolist())

In [None]:
# PCA embeddings
embeddings_results = [compute_embeddings(features = features, reducer = PCA, params = {"n_components": 2})]

# UMAP embeddings
n_neighbors = [50, 100, 500]
embeddings_results.extend(compute_umap_embeddings(features = features, n_neighbors = n_neighbors))

# TSNE embeddings
perps = [5, 10, 50, 100]
for perp in perps:
    embeddings_results.append(compute_embeddings(features = features, reducer = TSNE, params = {"n_components": 2, "metric": "cosine", "perplexity": perp}))
    
# TRIMAP embeddings
n_inliers_values = [10, 20, 50] # Ratio of 2:1:1 for n_inliers:n_outliers:n_random (as recommended in the paper)
for n in n_inliers_values:
    m = int(0.5 * n)
    embeddings_results.append(compute_embeddings(features = features, reducer = TRIMAP, params = {"n_inliers": n, "n_outliers": m, "n_random": m, "distance": "cosine"}))

In [None]:
plot_embeddings(embeddings_results, "Embeddings on the RTS sample imagenet features")

In [None]:
# Show a maximum of 1000 thumbnails on the plot
EVERY_N = int(len(thumbnails) / 1000) 
if EVERY_N < 1:
    EVERY_N = 1
thumbnails = sample.thumbnail.tolist()[::EVERY_N]
thumbnails = [Image.open(thumbnail) for thumbnail in thumbnails]
embeddings = embeddings_results[-1]["embeddings"][::EVERY_N]

plot_embeddings_with_images(embeddings, thumbnails, zoom = 0.15)