In [1]:
import os
import sys
import numpy as np
import tensorflow as tf
import yaml
import h5py
import glob
from tensorflow.keras.utils import Progbar
from model_RA.dataset_RA import Dataset
from model_RA.fp_RA.melspec.melspectrogram_RA import get_melspec_layer
from model_RA.fp_RA.nnfp import get_fingerprinter
import soundfile as sf

2024-06-18 15:39:00.902964: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


After run, the output (generated fingerprints) directory will be: <br>
$\;\;\;\;\;\;$    .<br>
$\;\;\;\;\;\;$    └──dataset<br>
$\;\;\;\;\;\;\;\;\;\;\;\;$        └── dummy_db<br>
$\;\;\;\;\;\;\;\;\;\;\;\;$            └── 000<br>
$\;\;\;\;\;\;\;\;\;\;\;\;\;\;\;\;\;\;$                ├── 000001.h5<br>
$\;\;\;\;\;\;\;\;\;\;\;\;\;\;\;\;\;\;$                ├── 000002.h5<br>
$\;\;\;\;\;\;\;\;\;\;\;\;\;\;\;\;\;\;$                └── 000003.h5<br>
$\;\;\;\;\;\;\;\;\;\;\;\;$            └── 001<br>
$\;\;\;\;\;\;\;\;\;\;\;\;\;\;\;\;\;\;$                ├── 001001.h5<br>
$\;\;\;\;\;\;\;\;\;\;\;\;\;\;\;\;\;\;$                ├── 001002.h5<br>
$\;\;\;\;\;\;\;\;\;\;\;\;\;\;\;\;\;\;$                └── 001003.h5
                

In [2]:
def load_config(config_fname):
    config_filepath = './config/' + config_fname + '.yaml'
    if os.path.exists(config_filepath):
        print(f'cli: Configuration from {config_filepath}')
    else:
        sys.exit(f'cli: ERROR! Configuration file {config_filepath} is missing!!')

    with open(config_filepath, 'r') as f:
        cfg = yaml.safe_load(f)
    return cfg

def load_checkpoint(checkpoint_root_dir, checkpoint_name, checkpoint_index,
                    m_fp):
    """ Load a trained fingerprinter """
    # Create checkpoint
    checkpoint = tf.train.Checkpoint(model=m_fp)
    checkpoint_dir = checkpoint_root_dir + f'/{checkpoint_name}/'
    c_manager = tf.train.CheckpointManager(checkpoint, checkpoint_dir,
                                           max_to_keep=None)

    # Load
    if checkpoint_index == None:
        tf.print("\x1b[1;32mArgument 'checkpoint_index' was not specified.\x1b[0m")
        tf.print('\x1b[1;32mSearching for the latest checkpoint...\x1b[0m')
        latest_checkpoint = c_manager.latest_checkpoint
        if latest_checkpoint:
            checkpoint_index = int(latest_checkpoint.split(sep='ckpt-')[-1])
            status = checkpoint.restore(latest_checkpoint)
            status.expect_partial()
            tf.print(f'---Restored from {c_manager.latest_checkpoint}---')
        else:
            raise FileNotFoundError(f'Cannot find checkpoint in {checkpoint_dir}')
    else:
        checkpoint_fpath = checkpoint_dir + 'ckpt-' + str(checkpoint_index)
        status = checkpoint.restore(checkpoint_fpath) # Let TF to handle error cases.
        status.expect_partial()
        tf.print(f'---Restored from {checkpoint_fpath}---')
    return checkpoint_index


def prevent_overwrite(key, target_path):
    if (key == 'dummy_db') & os.path.exists(target_path):
        answer = input(f'{target_path} exists. Will you overwrite (y/N)?')
        if answer.lower() not in ['y', 'yes']: sys.exit()

In [3]:
def build_fp(cfg):
    """ Build fingerprinter """
    # m_pre: log-power-Mel-spectrogram layer, S.
    m_pre = get_melspec_layer(cfg, trainable=False)

    # m_fp: fingerprinter g(f(.)).
    m_fp = get_fingerprinter(cfg, trainable=False)
    return m_pre, m_fp

@tf.function
def test_step(X, m_pre, m_fp):
    """ Test step used for generating fingerprint """
    # X is not (Xa, Xp) here. The second element is reduced now.
    m_fp.trainable = False
    return m_fp(m_pre(X))  # (BSZ, Dim)

In [4]:
def load_audio(file_path):
    audio, _ = sf.read(file_path)
    
    if audio.ndim > 1:
        audio = np.mean(audio, axis=1)  # Convertendo para mono se necessário
    # Normalizar o áudio
    audio = audio / np.max(np.abs(audio))
    return audio

In [5]:
#file_path = '/mnt/dataset/public/Fingerprinting/neural-audio-fp-dataset/music/test-dummy-db-100k-full/fma_full/000/000003.wav'
#x = load_audio(file_path)

In [6]:
def split_audio_into_segments(audio, segment_duration=1.0, hop_size=0.5, sample_rate=8000):
    segment_samples = int(segment_duration * sample_rate)
    hop_samples = int(hop_size * sample_rate)

    segments = []
    for start in range(0, len(audio) - segment_samples + 1, hop_samples):
        segment = audio[start:start + segment_samples]
        if len(segment) == segment_samples:
            segments.append(segment)
    
    return segments

In [7]:
#segments = split_audio_into_segments(x, segment_duration=1.0, hop_size=0.5, sample_rate=8000)
#X = tf.convert_to_tensor(segments, dtype=tf.float32) #473,8000
#tensor_transformado = tf.expand_dims(tf.expand_dims(X, axis=0), axis=0)
#tensores_transformados = [tf.expand_dims(tf.expand_dims(tensor, axis=0), axis=0) for tensor in X]

In [8]:
config = "default_RA"
cfg = load_config(config)
checkpoint_name:str='CHECK_BFTRI_100'
checkpoint_index:int=101
source_root_dir = cfg['DIR']['SOURCE_ROOT_DIR']
output_root_dir:str='/mnt/dataset/public/Fingerprinting/Embeddings_BFTRI/dummy_db'

# Build and load checkpoint
m_pre, m_fp = build_fp(cfg)
checkpoint_root_dir = cfg['DIR']['LOG_ROOT_DIR'] + 'checkpoint/'
checkpoint_index = load_checkpoint(checkpoint_root_dir, checkpoint_name,
                                    checkpoint_index, m_fp)

cli: Configuration from ./config/default_RA.yaml
---Restored from ./logs/checkpoint//CHECK_BFTRI_100/ckpt-101---


In [13]:
def process_audio_files(source_root_dir, output_root_dir, m_pre, m_fp):
    audio_files = sorted(glob.glob(source_root_dir + 'test-dummy-db-100k-full/' + '**/*.wav', recursive=True))
    for file_path in audio_files:
        base_name = os.path.splitext(os.path.basename(file_path))[0]

        audio = load_audio(file_path)
        segments = split_audio_into_segments(audio, segment_duration=1.0, hop_size=0.5, sample_rate=8000)
        X = tf.convert_to_tensor(segments, dtype=tf.float32)
        tensores_transformados = [tf.expand_dims(tf.expand_dims(tensor, axis=0), axis=0) for tensor in X]

        emb = []
        for i in range(len(X)):
            embedding = test_step(tensores_transformados[i], m_pre, m_fp)
            emb.append(embedding.numpy())
        
        emb_array = np.array(emb)

        output_file_path = os.path.join(output_root_dir, base_name + '.h5')
        os.makedirs(os.path.dirname(output_file_path), exist_ok=True)

        with h5py.File(output_file_path, 'w') as hf:
            hf.create_dataset('embeddings', data=emb_array)

In [14]:
process_audio_files(source_root_dir, output_root_dir, m_pre, m_fp)
#430034(?)

#93458 musicas

file_path: /mnt/dataset/public/Fingerprinting/neural-audio-fp-dataset/music/test-dummy-db-100k-full/fma_full/000/000003.wav
Saved /mnt/dataset/public/Fingerprinting/Embeddings_BFTRI/dummy_db/000003.h5
file_path: /mnt/dataset/public/Fingerprinting/neural-audio-fp-dataset/music/test-dummy-db-100k-full/fma_full/000/000020.wav
Saved /mnt/dataset/public/Fingerprinting/Embeddings_BFTRI/dummy_db/000020.h5
file_path: /mnt/dataset/public/Fingerprinting/neural-audio-fp-dataset/music/test-dummy-db-100k-full/fma_full/000/000026.wav
Saved /mnt/dataset/public/Fingerprinting/Embeddings_BFTRI/dummy_db/000026.h5
file_path: /mnt/dataset/public/Fingerprinting/neural-audio-fp-dataset/music/test-dummy-db-100k-full/fma_full/000/000030.wav
Saved /mnt/dataset/public/Fingerprinting/Embeddings_BFTRI/dummy_db/000030.h5
file_path: /mnt/dataset/public/Fingerprinting/neural-audio-fp-dataset/music/test-dummy-db-100k-full/fma_full/000/000046.wav
Saved /mnt/dataset/public/Fingerprinting/Embeddings_BFTRI/dummy_db/00004