In [12]:
import sys
import os
current_dir = os.path.abspath('')
sys.path.append(os.path.join(current_dir, '..', '..'))

from jffpy.data import load_features
from jffpy.printing import format_time, purify_name
import numpy as np
import pickle
from numba import njit, jit, prange
# Print current date
from datetime import datetime

In [3]:
now = datetime.now()
print(now.strftime("%d/%m/%Y %H:%M:%S"))

gallerypath = "C:\\Users\\Joao\\Documents\\Repos\\Griaule\\Dados\\tse1kflat_sd27"
querypath = "C:\\Users\\Joao\\Documents\\Repos\\Griaule\\Dados\\sd27\\latent"
qcache = "sd27_latent"

print(gallerypath)
print(querypath)

# Load all the features
features, shapes, filenames = load_features(gallerypath, log_info = True, progress_bar=True, cache=True, cache_name="tse1kflat_sd27")

result = np.repeat(np.arange(len(shapes)), shapes)
for i in range(len(filenames)):
    filenames[i] = purify_name(filenames[i])

queries, qshapes, qfilenames = load_features(querypath, log_info = True, progress_bar = False, cache=True, cache_name=qcache)

# Create two arrays to store mean and stddev of all features from gallery
# Each mean/stddev is computed between accumulated and accumulated + shapes[i]

# Preallocate arrays
mean = np.zeros((len(shapes), features.shape[1]))
stddev = np.ones((len(shapes), features.shape[1]))

accumulated = 0
for i in range(len(shapes)):
    f = features[accumulated:accumulated + shapes[i]]
    mean[i] = np.mean(f, axis=0)
    stddev[i] = np.std(f, axis=0)
    # Apply a shift for all gallery features (features - mean) / stddev
    features[accumulated:accumulated + shapes[i]] = (f - mean[i]) / stddev[i]

    accumulated += shapes[i]


13/12/2024 06:40:07
C:\Users\Joao\Documents\Repos\Griaule\Dados\tse1kflat_sd27
C:\Users\Joao\Documents\Repos\Griaule\Dados\sd27\latent
Loaded 20730 cached file(s) in 68.42 ms
Shape of loaded features: (1957935, 32)
Loaded 257 cached file(s) in 713.10 us
Shape of loaded features: (6658, 32)


In [45]:
@njit
def sequential_search(features, shapes, queries, qshapes, mean, stddev, result, filenames, max_queries):
    all_I = []
    all_D = []

    acc = 0
    for p in prange(max_queries):  # Paralelizar a nível de query
        query = queries[acc:acc + qshapes[p]]
        acc += qshapes[p]

        qidxs = np.zeros((len(query), 16), dtype=np.int32)
        qdist = np.zeros((len(query), 16), dtype=np.float64)

        for q_idx, qf in enumerate(query):
            distances = np.full(len(result), np.inf, dtype=np.float64)
            accumulated = 0

            for i in range(len(shapes)):
                sf = features[accumulated:accumulated + shapes[i]]  # Features para a classe i
                sqf = (qf - mean[i]) / stddev[i]  # Normalizar query feature

                # Calcular distâncias
                distances[accumulated:accumulated + shapes[i]] = np.sum((sf - sqf) ** 2, axis=1)
                accumulated += shapes[i]

            # Encontrar os k índices mais próximos
            k = 16
            idxs = np.argpartition(distances, k)[:k]
            sorted_idx = idxs[np.argsort(distances[idxs])]

            # Salvar resultados
            qidxs[q_idx, :] = sorted_idx
            qdist[q_idx, :] = distances[sorted_idx]

        all_I.append(qidxs)
        all_D.append(qdist)

    return all_I, all_D

In [46]:
max_queries = len(qshapes)
# Perform the sequential search using Numba
# time sequential_search
start = datetime.now()
all_I, all_D = sequential_search(features, shapes, queries, qshapes, mean, stddev, result, filenames, max_queries)
print("Sequential search time:", datetime.now() - start)

Sequential search time: 0:10:08.284359


In [43]:
for I, D in zip(all_I, all_D):
    I, D = np.array(I), np.array(D)
    print(I.shape, D.shape)

(14, 16) (14, 16)
(31, 16) (31, 16)
(24, 16) (24, 16)
(10, 16) (10, 16)


In [48]:
p = 0
for I, D in zip(all_I, all_D):
    I, D = np.array(I), np.array(D)
    print()
    print(f"Query {p}", qfilenames[p], "(queries:" + str(qshapes[p]) + ")")
    for i in range(I.shape[0]):
        idxs = I[i]
        for j, idx in enumerate(idxs):
            print(filenames[result[idx]], ":", f"{D[i][j]:.4f}", sep="", end="  ")
        print()
    p += 1

with open(qcache + '_results_numba.pkl', 'wb') as f:
    pickle.dump((all_I, all_D), f)


Query 0 b101-9_l.tpt (queries:14)
       b101-9_t:7.8211  32356_83269_d09:13.8738  71368_90526_d07:14.5712  61503_73930_d01:14.6893  50795_33306_d03:14.7006  31791_30963_d08:14.7206  61260_75724_d02:15.6334  12208_41399_d04:15.8714  61503_73930_d04:15.9345  02364_61693_d06:15.9710  02216_10157_d08:16.0482  20701_03675_d10:16.5823  72496_30100_d09:16.5953  91708_32774_d09:16.6183  72380_25903_d10:16.6357  80990_83236_d06:16.6721  
       b101-9_t:11.1473  52305_73746_d09:11.5326  42470_92684_d01:12.2609  12410_21461_d07:12.9099  42470_70101_d01:13.2085  00639_42968_d06:13.3622  70809_32410_d06:13.9445  32518_24626_d06:13.9832  12348_62403_d07:14.3267  42305_03673_d08:14.4180  02330_41068_d08:14.6246  41724_12121_d08:14.7070  32356_83269_d09:14.7314  32445_63961_d07:14.9526  62437_83610_d10:15.2823  81821_85355_d10:15.3084  
42453_13876_d04:8.5676  42380_51412_d07:8.7535  32437_70788_d06:11.0460  60469_21263_d04:12.7084  01708_82961_d06:12.9193  42356_01134_d09:13.0982  32470_15823_d10: