In [1]:
import os
import sys
import yaml
import time
import glob
import h5py
import faiss
import click
import curses
import librosa

import numpy as np
import pandas as pd
import tensorflow as tf
from pydub import AudioSegment


from model_RA.fp_RA.melspec.melspectrogram_RA import get_melspec_layer
from model_RA.fp_RA.nnfp import get_fingerprinter
from model_RA.utils.dataloader_keras import genUnbalSequence

2024-06-22 09:48:55.493988: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
def load_h5_data(source_dir):
    h5Files = sorted(glob.glob(source_dir + '**/*.h5', recursive=True))

    embs_count = 0
    embs_info = []
    embs = []

    for i in range(len(h5Files[30005:])):
        with h5py.File(h5Files[i], "r") as f:
            #print(i)
            base_name = os.path.splitext(os.path.basename(h5Files[i]))[0]
            #primeiro objeto é o que contém os embeddings
            a_group_key = list(f.keys())[0]

            #Extração dos embs como um array
            ds_arr = f[a_group_key][()]  # returns as a numpy array
            #print(ds_arr.shape)
            embeddings = np.squeeze(ds_arr, axis=1)
            #print(embeddings.shape)
            embs.append(embeddings) #Guarda na lista os embs
            #arrayEmb = ds_arr

            embs_count += len(ds_arr) #conta quantos embs tem o vetor
            embs_info.append([i, base_name, embs_count, ds_arr]) #guarda numa lista o número de vetores até o momento.
            # embs_info = [indice, file_name, n_segs, array]
            
            f.close()
    return embs, embs_info

def create_index(db_embeddings, nogpu=True, n_centroids=256, code_sz=64, nbits=8):
    #faiss.IndexIVFPQ(quantizer, d, n_centroids, code_sz, nbits), com d=, nlist=n_centroids=50, m=code_sz=8, bits=nbits=8
    #faiss.IndexIVFPQ(quantizer, d, nlist, m, bits)
    #n_centroids -> clusters
    
    d = db_embeddings.shape[1]  # Dim emb #len(db_embeddings[0][0][0])

    quantizer = faiss.IndexFlatL2(d)

    code_sz = 64 # power of 2
    n_centroids = 256 #Veronoi Cells
    nbits = 8  # nbits must be 8, 12 or 16, The dimension d should be a multiple of M.
    index = faiss.IndexIVFPQ(quantizer, d, n_centroids, code_sz, nbits) #Adicona clustering

    # Se não usar GPU
    if not nogpu:
        res = faiss.StandardGpuResources()
        index = faiss.index_cpu_to_gpu(res, 0, index)

    if not index.is_trained:
        index.train(db_embeddings)

    # Adicionando os embeddings ao índice
    index.add(db_embeddings)
    print(f"Foram adicionados:{index.ntotal}")
    
    return index

In [3]:
data_dir = '/mnt/dev/rodrigoalmeida/neural-audio-fp/logs/emb/CHECK_BFTRI_100/101/'
h5Dir = '/mnt/dataset/public/Fingerprinting/Embeddings_BFTRI/dummy_db/'
h5Embs, embs_info = load_h5_data(h5Dir)
embsArrayDummy=np.vstack(h5Embs)

In [9]:
faiss_engine = create_index(embsArrayDummy, nogpu=True, n_centroids=256, code_sz=64, nbits=8) #dummy_db

KeyboardInterrupt: 

In [4]:
def load_config(config_fname):
    config_filepath = './config/' + config_fname + '.yaml'
    if os.path.exists(config_filepath):
        print(f'cli: Configuration from {config_filepath}')
    else:
        sys.exit(f'cli: ERROR! Configuration file {config_filepath} is missing!!')

    with open(config_filepath, 'r') as f:
        cfg = yaml.safe_load(f)
    return cfg


def build_fp(cfg):
    """ Build fingerprinter """
    # m_pre: log-power-Mel-spectrogram layer, S.
    m_pre = get_melspec_layer(cfg, trainable=False)

    # m_fp: fingerprinter g(f(.)).
    m_fp = get_fingerprinter(cfg, trainable=False)
    return m_pre, m_fp


@tf.function
def predict(X, m_pre, m_fp):
    """ 
    Test step used for mini-search-validation 
    X -> (B,1,8000)
    """
    #tf.print(X)
    feat = m_pre(X)  # (nA+nP, F, T, 1)
    m_fp.trainable = False
    emb_f = m_fp.front_conv(feat)  # (BSZ, Dim)
    #emb_f_postL2 = tf.math.l2_normalize(emb_f, axis=1)
    emb_gf = m_fp.div_enc(emb_f)
    emb_gf = tf.math.l2_normalize(emb_gf, axis=1)
    return emb_gf # f(.), L2(f(.)), L2(g(f(.))

In [5]:
config = "default_RA"
cfg = load_config(config)

m_pre, m_fp = build_fp(cfg)

checkpoint_root_dir:str = "./logs/CHECK_BFTRI_100/101/"
checkpoint = tf.train.Checkpoint(m_fp)
checkpoint.restore(tf.train.latest_checkpoint(checkpoint_root_dir))

cli: Configuration from ./config/default_RA.yaml


<tensorflow.python.checkpoint.checkpoint.InitializationOnlyStatus at 0x7c1585ea5a60>

In [6]:
#source_root_dir = '/mnt/dataset/public/Fingerprinting/neural-audio-fp-dataset/music/'
audio_dir = '/mnt/dataset/public/Fingerprinting/query_procura/000003.wav' #audio query
ts_dummy_db_source_fps = sorted(
    glob.glob(audio_dir, recursive=True))

dur = cfg['MODEL']['DUR']
hop = cfg['MODEL']['HOP']
fs = cfg['MODEL']['FS']
bsz = ts_batch_sz = cfg['BSZ']['TS_BATCH_SZ']

_ts_n_anchor = ts_batch_sz
ds = genUnbalSequence(
    ts_dummy_db_source_fps,
    ts_batch_sz,
    _ts_n_anchor,
    dur,
    hop,
    fs,
    shuffle=False,
    random_offset_anchor=False,
    drop_the_last_non_full_batch=False)

enq = tf.keras.utils.OrderedEnqueuer(ds,use_multiprocessing=True,shuffle=False)
enq.start(workers=cfg['DEVICE']['CPU_N_WORKERS'], max_queue_size=cfg['DEVICE']['CPU_MAX_QUEUE'])

i = 0
emb_query_list = []

while i < len(enq.sequence):
    X, _ = next(enq.get())
    emb = predict(X, m_pre, m_fp)
    emb_query_list.append(emb.numpy())
    i += 1
enq.stop()

2024-06-22 09:49:49.880413: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 262144000 exceeds 10% of free system memory.
2024-06-22 09:49:50.018361: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 262144000 exceeds 10% of free system memory.
2024-06-22 09:49:50.018426: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 262144000 exceeds 10% of free system memory.
2024-06-22 09:49:50.018441: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 262144000 exceeds 10% of free system memory.
2024-06-22 09:49:51.168904: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 262144000 exceeds 10% of free system memory.


In [7]:
emb_query_array = np.vstack(emb_query_list) #emb_array[472] = emb_query_list[3][97] pois emb_query_list tem 4 batches, sendo os 3 primeiros preenchidos até 125 vetores, e o último com 98 vetores.
#Com esta conversão passo a ter um array com todos os vetores, ou seja, os 473
#genérico: O tem a seguinte forma emb_query_list[N_BSZ][BSZ], e quando o último não está preenchido tem o valor que entre 0 e 125. Pois, BSZ de teste = 125

In [None]:
print(faiss_engine.ntotal)
print(faiss_engine.nprobe)
topN = 1 #Numero de índices por vetor que retornar na comparação

# c) Buscar o índice
D, I = faiss_engine.search(emb_query_array, 1) # D: Distâncias, I: Índices dos resultados

NameError: name 'faiss_engine' is not defined

In [None]:
II=I
for offset in range(len(I)):
    II[offset, :] -= offset

candidates = np.unique(I[np.where(I >= 0)])   # ignore id < 0


x = 0
idx = 0
for i in range(len(embs_info)):
    if embs_info[i][2] < candidates[0]:
        x=embs_info[i+1][2]
        idx = i+1

print(idx, x, candidates[0])

In [None]:
metadata_file = "/mnt/dataset/public/Fingerprinting/selected_tracks.csv"
metadata_df = pd.read_csv(metadata_file)

In [None]:
# d) Recuperar os metadados

data = metadata_df.loc[metadata_df["track_id"] == int(embs_info[idx][1])]

# e) Retornar ao Cliente
print(data)