In [34]:
from tqdm import tqdm
import json
import copy

In [51]:
import numpy as np

# load the variables
load_file = 'embeddings.npy'
save_dict = np.load(load_file, allow_pickle=True).item()

embedding_dict = save_dict['training_embedding_dict']
label_dict = save_dict['training_label_dict']
all_embeddings = save_dict['all_embeddings']
all_labels = save_dict['all_labels']
centers = save_dict['centers']
test_embedding_dict = save_dict['test_embedding_dict']
test_label_dict = save_dict['test_label_dict']


In [5]:
def calculate_distance(test_embedding, support_embedding, method):
    if method == 'euclidean':
        return np.linalg.norm(test_embedding - support_embedding)
    elif method == 'cosine':
        return 1 - np.dot(test_embedding, support_embedding) / (np.linalg.norm(test_embedding) * np.linalg.norm(support_embedding))
    else:
        raise ValueError('Not Implemented')

In [6]:
def get_support_embedding_indices(test_embedding, all_training_embeddings):
    # calculae all distances
    distances = []
    for i in range(len(all_training_embeddings)):
        distances.append(calculate_distance(test_embedding, all_training_embeddings[i], 'euclidean'))
    # get the indices of the k nearest neighbors, k = method['k']
    indices = np.argsort(distances)[:20]
    return indices

In [13]:
def is_ood(test_embedding, support_indices, threshold):
    if len(support_indices) == 0:
        return True
    # get all distances
    distances = []
    for i in support_indices:
        distances.append(calculate_distance(test_embedding, all_embeddings[i], 'euclidean'))
    # get the smallest distance
    min_distance = min(distances)
    if min_distance > threshold:
        return True
    return False

In [8]:
def get_sample_confidence(label_probs):
    return max(label_probs)

In [9]:
def get_sample_label_probs(test_embedding, support_indices):
    # get distance and corresponding label
    distances = []
    labels = []
    for i in support_indices:
        distances.append(calculate_distance(test_embedding, all_embeddings[i], 'euclidean'))
        labels.append(all_labels[i])
    label_probs = [0 for _ in range(4)]
    # there are 4 labels, 0, 1, 2, 3. calculate the mean distance of each label. 
    sum_distances = [0 for _ in range(4)]
    num_distances = [0 for _ in range(4)]
    for i in range(len(distances)):
        sum_distances[labels[i]] += distances[i]
        num_distances[labels[i]] += 1
    for i in range(4):
        if num_distances[i] != 0:
            label_probs[i] = 1 / (sum_distances[i] / num_distances[i])
    return label_probs




In [10]:
def prob_normalization(label_probs):
    label_probs = np.exp(label_probs)
    label_probs /= np.sum(label_probs)
    return label_probs

In [40]:
def get_video_label_confidence(sample_results):
    label_confidences = np.sum(sample_results, axis=0)
    # normalize the confidence
    label_confidences = np.array(label_confidences)
    label_confidences /= np.sum(label_confidences)
    # get the video label
    video_label = np.argmax(label_confidences)
    label_confidence = label_confidences[video_label]
    
    return video_label, label_confidence
    



In [42]:

results = {}
for video in tqdm(test_embedding_dict):
    results = {}
    test_embeddings = test_embedding_dict[video]
    sample_results = []
    sample_confidences = []
    for test_embedding in test_embeddings:
        support_indices = get_support_embedding_indices(test_embedding, all_embeddings)
        label_probs = get_sample_label_probs(test_embedding, support_indices, )
        label_probs = prob_normalization(label_probs)
        sample_confidences.append(get_sample_confidence(label_probs))
        sample_results.append(label_probs)
    sample_results = np.array(sample_results)
    sample_confidences = np.array(sample_confidences)
    video_label, video_confidence = get_video_label_confidence(sample_results)
    results['confidence'] = video_confidence
    results['label'] = video_label