In [80]:
# Imports
from src.vector_utils import *
from src.distances import *
from src.print import *
from typing import Callable
import pandas as pd
import time

In [81]:
# Dictionnary test and inputs
tests: dict = {
	"Manhattan":				{"function":distance_manhattan},
	"Euclidean":				{"function":distance_euclidean},
	"Tchebyshev":				{"function":distance_tchebyshev},
	"Minkowski":				{"function":distance_minkowski},
	"Histogram Intersection":	{"function":distance_histogram_intersection},
	"Swapped Histogram":		{"function":distance_histogram_intersection},
	"Khi2":						{"function":distance_khi2},
}

In [82]:
## Prepare inputs
# Two known vectors for other tests
v1: np.ndarray = np.array([2.7, 4.3,  0.2,   9,   -4])
v2: np.ndarray = np.array([7.6, 5.8, -3.2, 9.7, 12.3])

# Generate two random vectors
v3: np.ndarray = np.round(random_vector(10), 4)
v4: np.ndarray = np.round(random_vector(10), 4)

# Similar vectors
v5: np.ndarray = np.array([1, 2, 3, 4, 5])
v6: np.ndarray = np.array([1, 2, 3, 4, 5])

# Prepare input tuples
inputs: list[tuple] = [
	("v1/v2 (vecteurs du cours)", v1, v2),
	("v3/v4 (vecteurs aléatoires)", v3, v4),
	("v5/v6 (vecteurs similaires)", v5, v6),
]

In [83]:
# Test all the inputs
for input_type, x, y in inputs:
	for method, test in tests.items():
		f: Callable = test["function"]
		
		# Measure the time
		start: int = time.perf_counter_ns()
		distance: float = f(x,y) if not "Swap" in method else f(y,x)
		end: int = time.perf_counter_ns()
		duration: int = end - start

		# Store the result
		test[input_type] = {"distance":distance, "duration":duration}

In [84]:
# Util function to print the results
def get_dataframe(tests: dict, input_type: str) -> pd.DataFrame:
	""" Get the dataframe for the given input type\n
	Args:
		tests		(dict):	Dictionnary containing every tests
		input_type	(str):	Input type (text for which inputs are used)
	Returns:
		(pd.DataFrame):	Dataframe containing the results
	"""
	# Prepare the dataframe
	df: pd.DataFrame = pd.DataFrame()
	for name in tests.keys():
		df[name] = 0
	
	# Add the distances and duration line
	for name in tests.keys():
		df.loc["Distance", name] = round(tests[name][input_type]["distance"], 4)
		df.loc["Time (ns)", name] = round(tests[name][input_type]["duration"])

	# Return the dataframe
	return df

In [85]:
# Show first inputs (made in class)
input_type: str = inputs[0][0]
x = inputs[0][1]
y = inputs[0][2]
info(f"{input_type}\n x = {x}\n y = {y}")
get_dataframe(tests, input_type)

[92m[INFO  14:06:22] v1/v2 (vecteurs du cours)
 x = [ 2.7  4.3  0.2  9.  -4. ]
 y = [ 7.6  5.8 -3.2  9.7 12.3][0m


Unnamed: 0,Manhattan,Euclidean,Tchebyshev,Minkowski,Histogram Intersection,Swapped Histogram,Khi2
Distance,26.8,17.4356,16.3,19.3847,0.2733,0.7213,5.3909
Time (ns),127200.0,67000.0,44200.0,76000.0,59400.0,46500.0,101000.0


In [86]:
# Show second inputs (random vectors)
input_type: str = inputs[1][0]
x = inputs[1][1]
y = inputs[1][2]
info(f"{input_type}\n x = {x}\n y = {y}")
get_dataframe(tests, input_type)

[92m[INFO  14:06:24] v3/v4 (vecteurs aléatoires)
 x = [0.2772 0.6799 0.6052 0.5581 0.7441 0.2224 0.1991 0.7975 0.358  0.4378]
 y = [0.7417 0.5888 0.2735 0.8298 0.1888 0.0787 0.7859 0.81   0.7721 0.9144][0m


Unnamed: 0,Manhattan,Euclidean,Tchebyshev,Minkowski,Histogram Intersection,Swapped Histogram,Khi2
Distance,3.348,1.2166,0.5868,1.6871,0.628,0.7701,1.5893
Time (ns),41900.0,42000.0,35400.0,59500.0,53500.0,44300.0,89900.0


In [87]:
# Show third inputs (similar vectors)
input_type: str = inputs[2][0]
x = inputs[2][1]
y = inputs[2][2]
info(f"{input_type}\n x = {x}\n y = {y}")
get_dataframe(tests, input_type)

[92m[INFO  14:06:30] v5/v6 (vecteurs similaires)
 x = [1 2 3 4 5]
 y = [1 2 3 4 5][0m


Unnamed: 0,Manhattan,Euclidean,Tchebyshev,Minkowski,Histogram Intersection,Swapped Histogram,Khi2
Distance,0.0,0.0,0.0,0.0,1.0,1.0,0.0
Time (ns),36600.0,47000.0,37200.0,60200.0,58600.0,47000.0,91200.0


In [88]:
# Sort algorithms in terms of speed
df = pd.DataFrame()
time_str: str = "Total Time (ns)"
df[time_str] = 0
for method in tests.keys():
	df.loc[method, time_str] = sum(tests[method][input_type[0]]["duration"] for input_type in inputs)
df = df.sort_values(by=time_str)
df[time_str] = df[time_str].astype(int)
info("Total time for each method")
df

[92m[INFO  14:06:30] Total time for each method[0m


Unnamed: 0,Total Time (ns)
Tchebyshev,116800
Swapped Histogram,137800
Euclidean,156000
Histogram Intersection,171500
Minkowski,195700
Manhattan,205700
Khi2,282100
