In [1]:
import os
import sys
import yaml

import tensorflow as tf
from tensorflow.keras.utils import Progbar
import tensorflow.keras as K
import librosa
import numpy as np
import glob

2024-04-28 22:06:30.010107: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'

from model.dataset import Dataset
from model.fp.melspec.melspectrogram import get_melspec_layer
from model.fp.specaug_chain.specaug_chain import get_specaug_chain_layer
from model.fp.nnfp import get_fingerprinter


In [3]:
tf.config.list_physical_devices('GPU')

2024-04-28 22:06:41.381328: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-04-28 22:06:41.381574: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-04-28 22:06:41.414316: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'),
 PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')]

In [4]:
tf.__version__

'2.16.1'

# Functions definition

In [37]:
def build_fp(cfg):
    """ Build fingerprinter """
    # m_pre: log-power-Mel-spectrogram layer, S.
    m_pre = get_melspec_layer(cfg, trainable=False)

    # m_specaug: spec-augmentation layer.
    m_specaug = get_specaug_chain_layer(cfg, trainable=False)
    assert(m_specaug.bypass==False) # Detachable by setting m_specaug.bypass.

    # m_fp: fingerprinter g(f(.)).
    m_fp = get_fingerprinter(cfg, trainable=False)
    return m_pre, m_specaug, m_fp

def load_config(config_fname):
    config_filepath = './config/' + config_fname + '.yaml'
    if os.path.exists(config_filepath):
        print(f'cli: Configuration from {config_filepath}')
    else:
        sys.exit(f'cli: ERROR! Configuration file {config_filepath} is missing!!')

    with open(config_filepath, 'r') as f:
        cfg = yaml.safe_load(f)
    return cfg

@tf.function
def test_step(X, m_pre, m_fp):
    """ Test step used for mini-search-validation """
    X = tf.concat(X, axis=0)
    feat = m_pre(X)  # (nA+nP, F, T, 1)
    m_fp.trainable = False
    emb_f = m_fp.front_conv(feat)  # (BSZ, Dim)
    emb_f_postL2 = tf.math.l2_normalize(emb_f, axis=1)
    emb_gf = m_fp.div_enc(emb_f)
    emb_gf = tf.math.l2_normalize(emb_gf, axis=1)
    return emb_f, emb_f_postL2, emb_gf # f(.), L2(f(.)), L2(g(f(.))

@tf.function
def predict(X, m_pre, m_fp):
    """ 
    Test step used for mini-search-validation 
    X -> (B,1,8000)
    """
    feat = m_pre(X)  # (nA+nP, F, T, 1)
    m_fp.trainable = False
    emb_f = m_fp.front_conv(feat)  # (BSZ, Dim)
    emb_f_postL2 = tf.math.l2_normalize(emb_f, axis=1)
    emb_gf = m_fp.div_enc(emb_f)
    emb_gf = tf.math.l2_normalize(emb_gf, axis=1)
    return emb_f, emb_f_postL2, emb_gf # f(.), L2(f(.)), L2(g(f(.))

# Load latest checkpoint

In [4]:
checkpoint_name_dir:str = "./logs/CHECKPOINT_BSZ_120"#"CHECKPOINT"   # string
checkpoint_index:int = None  # int
config:str = "default"   

In [5]:
cfg = load_config(config)


cli: Configuration from ./config/default.yaml


In [6]:
m_pre, m_specaug, m_fp = build_fp(cfg)

2024-04-28 11:26:42.175676: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcuda.so.1
2024-04-28 11:26:42.186565: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2024-04-28 11:26:42.186719: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: 
pciBusID: 0000:03:00.0 name: NVIDIA GeForce RTX 3090 computeCapability: 8.6
coreClock: 1.725GHz coreCount: 82 deviceMemorySize: 23.68GiB deviceMemoryBandwidth: 871.81GiB/s
2024-04-28 11:26:42.186794: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2024-04-28 11:26:42.186917: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 1 with properties: 
pciBusID: 0

In [7]:

checkpoint = tf.train.Checkpoint(m_fp)
checkpoint.restore(tf.train.latest_checkpoint(checkpoint_name_dir))

<tensorflow.python.training.tracking.util.InitializationOnlyStatus at 0x76d1d29ae910>

# Load data

In [12]:
audio_path = ""

<model.fp.nnfp.FingerPrinter at 0x1a2d68f40>

In [None]:
audio,fs = librosa.load(audio_path, mono=True, sr=22050)

In [8]:
dataset = Dataset(cfg)

In [21]:
test_db_ds = dataset.get_test_dummy_db_ds()

In [27]:
len(test_db_ds)

430034

In [28]:
X = test_db_ds.__getitem__(0)

In [42]:
X[0][0][None,:].shape

(1, 1, 8000)

In [30]:
Y = tf.concat(X, axis=0)

In [31]:
Y.shape

TensorShape([125, 1, 8000])

# Model Predict

In [43]:
emb_f, emb_f_postL2, emb_gf = predict(X[0][0][None,:], m_pre, m_fp)

In [45]:
emb_gf.shape

TensorShape([1, 128])

In [46]:
emb_gf

<tf.Tensor: shape=(1, 128), dtype=float32, numpy=
array([[ 0.07539804, -0.16872996, -0.12115411, -0.01941191, -0.0148505 ,
         0.08560746, -0.0388802 ,  0.11214361,  0.02991246,  0.12607488,
         0.00300827,  0.06731218, -0.01309047,  0.03161237, -0.12762582,
         0.04633382,  0.11228829,  0.01818596, -0.09356156, -0.08653133,
        -0.15511537, -0.10948697, -0.00143263,  0.06219649,  0.00831405,
         0.04150734, -0.0522717 ,  0.0057195 , -0.0325827 , -0.03062948,
         0.03439471,  0.03571294, -0.08395135,  0.01864311,  0.03021016,
         0.02572118, -0.00137527,  0.01199023, -0.21145529, -0.03078636,
        -0.09409375,  0.08832733,  0.02622688, -0.0805587 , -0.04767458,
        -0.07374059, -0.07926158, -0.00388286,  0.06218813, -0.09946233,
        -0.14341326,  0.10287136,  0.0331257 ,  0.02296653,  0.05973741,
        -0.1981869 ,  0.04241665, -0.18951239,  0.12496328, -0.08082911,
        -0.00096449, -0.03653152, -0.02718567, -0.11009899,  0.1064317 ,
 

# System Audio Fingerprinting

### Load Reference Dataset and Extract Embedded

In [50]:
files_db_dir = '/mnt/dataset/public/Fingerprinting/neural-audio-fp-dataset/music/test-dummy-db-100k-full/fma_full'
files_query_dir = '/mnt/dataset/public/Fingerprinting/neural-audio-fp-dataset/music/test-dummy-db-100k-full/fma_full'

In [47]:
d = '/mnt/dataset/public/Fingerprinting/neural-audio-fp-dataset/music/test-query-db-500-30s/test_ids_icassp2021.npy'

In [51]:

y = np.load(d)

In [49]:
y

array([14655, 21662,  7594, ..., 15722,  1097,  4538])

In [52]:
files = glob.glob(os.path.join(files_db_dir,"**/*.wav"))

In [53]:
len(files)

93458

### Create Vector Database

### Search

# Ver o ficheiro pickle

In [1]:
root_out = '/mnt/dataset/features'

In [2]:
import pickle

In [11]:
# 1. Carregar o arquivo pickle
file_path = root_out + '/002/002000.pkl'

data = [] 
with open(file_path, "rb") as f:
    data = pickle.load(f)

In [12]:
# 2. Explorar os dados
print("Tipo de Dados:", type(data))

Tipo de Dados: <class 'tensorflow.python.framework.ops.EagerTensor'>


In [13]:
print(data)

tf.Tensor(
[[ 0.01796628 -0.10318357 -0.04985833 ... -0.15665779 -0.05464523
  -0.08112149]
 [ 0.01028875 -0.03953161  0.26217332 ...  0.03502701  0.00542068
   0.00784411]
 [ 0.0317118   0.0627202  -0.15309468 ...  0.12765522 -0.05246528
  -0.05398506]
 ...
 [ 0.03320696 -0.11092483 -0.05904498 ... -0.1455077  -0.18099743
   0.14961286]
 [ 0.01825167  0.01535849 -0.12124566 ...  0.03751192  0.03870636
   0.07288236]
 [ 0.00894083  0.02610302  0.0102381  ...  0.09504692  0.05563493
  -0.07567749]], shape=(549, 128), dtype=float32)


In [14]:
# 2. Converter o tensor TensorFlow para um array numpy
numpy_data = data.numpy()

# 3. Agora você pode analisar o array numpy como desejar
print("Tipo de Dados (Numpy):", type(numpy_data))
print("Forma do Array:", numpy_data.shape)
print("Dados:", numpy_data)

Tipo de Dados (Numpy): <class 'numpy.ndarray'>
Forma do Array: (549, 128)
Dados: [[ 0.01796628 -0.10318357 -0.04985833 ... -0.15665779 -0.05464523
  -0.08112149]
 [ 0.01028875 -0.03953161  0.26217332 ...  0.03502701  0.00542068
   0.00784411]
 [ 0.0317118   0.0627202  -0.15309468 ...  0.12765522 -0.05246528
  -0.05398506]
 ...
 [ 0.03320696 -0.11092483 -0.05904498 ... -0.1455077  -0.18099743
   0.14961286]
 [ 0.01825167  0.01535849 -0.12124566 ...  0.03751192  0.03870636
   0.07288236]
 [ 0.00894083  0.02610302  0.0102381  ...  0.09504692  0.05563493
  -0.07567749]]


In [None]:
root_out = '/mnt/dataset/featuresv2'

In [None]:
with open(dstpath, "rb") as f:
        x = pickle.load(f)

In [15]:
#x é emb

In [8]:
import pickle

In [9]:
file = '/mnt/dataset/test/features/000/000134.pkl'
with open(file, "rb") as f:
    x = pickle.load(f)

2024-04-28 22:14:37.274203: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-04-28 22:14:37.274374: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-04-28 22:14:37.274512: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-

In [11]:
type(x)

tensorflow.python.framework.ops.EagerTensor

## Clean GPU memory

In [12]:
from numba import cuda 
device = cuda.get_current_device()
device.reset()