In [15]:
import json
from pprint import pprint

import matplotlib.pyplot as plt
import numpy as np
from mpl_toolkits import mplot3d
from numpy import linalg as LA
from scipy.spatial import geometric_slerp

In [16]:
cifar10_mean = np.array((0.4914, 0.4822, 0.4465))
cifar10_std = np.array((0.2471, 0.2435, 0.2616))

In [17]:
def cosine_similarity(A: np.ndarray, B: np.ndarray):
    # assert A.shape == B.shape
    A_dist = LA.norm(A, 2) if len(A.shape) == 1 else LA.norm(A, 2, axis=1)
    return np.dot(A, B) / (A_dist * LA.norm(B, 2))

- Find 10 mean values in cosine similarity range

In [18]:
# generate 10000 random mean samples
sample_num = 10000
rng: np.random._generator.Generator = np.random.default_rng(seed=123)
rnd_vectors = rng.uniform(-1, 1, size=(sample_num, 3))

In [19]:
# calculate cosine similarity for all mean samples & sort ascending
cossim_values_np = cosine_similarity(rnd_vectors, cifar10_mean)
cossim_value_sort_indices_np = np.argsort(cossim_values_np)
print(cossim_value_sort_indices_np.shape)


(10000,)


In [20]:
# find the left bound & right bound of indices for slicing the values in range
left_limit = -0.381
right_limit = 0.108

left_index = 0
right_index = len(cossim_value_sort_indices_np)
# find left bound
for i in range(cossim_value_sort_indices_np.shape[0]):
    sort_index = cossim_value_sort_indices_np[i]
    left_index = i
    if cossim_values_np[sort_index] > left_limit:
        break
# find right bound
for i in range(cossim_value_sort_indices_np.shape[0]):
    sort_index = cossim_value_sort_indices_np[cossim_value_sort_indices_np.shape[0] - i - 1]
    right_index = cossim_value_sort_indices_np.shape[0] - i - 1
    if cossim_values_np[sort_index] < right_limit:
        break

print(left_index)
print(right_index)


2980
5639


In [21]:
print(cossim_values_np[cossim_value_sort_indices_np[right_index]])

0.10742645621914504


In [22]:
# slice the indices array
sliced_cossim_value_sort_indices_np = cossim_value_sort_indices_np[left_index: right_index + 1]

In [23]:
print(cossim_values_np[sliced_cossim_value_sort_indices_np[0]])

-0.38071099610408654


In [24]:
print(sliced_cossim_value_sort_indices_np.shape)

(2660,)


In [25]:
# prepare intermediate 10 points of range
target_cossim_range_np = np.linspace(left_limit, right_limit, 11, endpoint=False)
target_cossim_range = np.round(target_cossim_range_np[1:], 3).tolist()
# target_cossim__index_map = {cossim_value: None for cossim_value in target_cossim_range}
pprint(target_cossim_range)

[-0.337, -0.292, -0.248, -0.203, -0.159, -0.114, -0.07, -0.025, 0.019, 0.064]


In [26]:
# find the nearest 10 cosine similarity points by index
target_counter = 0
selected_sort_indices = []

for sliced_sort_index in sliced_cossim_value_sort_indices_np:
    selected_cossim_value = cossim_values_np[sliced_sort_index]

    if selected_cossim_value > target_cossim_range[target_counter]:
        selected_sort_indices.append(sliced_sort_index)
        target_counter += 1

    if target_counter >= len(target_cossim_range):
        break

pprint(selected_sort_indices)

[1664, 4309, 1765, 419, 1998, 248, 1583, 3509, 4524, 4497]


In [27]:
# round & check the selected mean values
for selected_sort_index in selected_sort_indices:
    selected_vec = np.round(rnd_vectors[selected_sort_index], 3)
    cossim_value = cosine_similarity(selected_vec, cifar10_mean)
    print(f"{selected_vec.tolist()}: {cossim_value}")

[-0.731, -0.311, 0.544]: -0.3370355084911757
[0.219, -0.247, -0.174]: -0.2912352734646942
[-0.721, 0.778, -0.601]: -0.24739262454028435
[0.749, -0.448, -0.776]: -0.20290863319577457
[-0.311, -0.857, 0.897]: -0.157751128468922
[0.205, 0.181, -0.549]: -0.11347859415543816
[-0.804, 0.685, 0.01]: -0.06958444047413434
[0.6, -0.734, 0.089]: -0.024772773350340133
[-0.511, -0.221, 0.837]: 0.019455797381099114
[0.853, -0.615, -0.149]: 0.06434965317391732


In [28]:
# save selected_vec to json
select_vec_data = []

for selected_sort_index in selected_sort_indices:
    selected_vec = np.round(rnd_vectors[selected_sort_index], 3)
    select_vec_data.append(selected_vec.tolist())

# Serializing json
json_object = json.dumps(select_vec_data, indent=4)
 
# Writing to sample.json
with open("mean_values.json", "w") as outfile:
    outfile.write(json_object)