In [2]:
import os
import sys
import yaml
import glob
import h5py
import click
import librosa

import numpy as np
import soundfile as sf
#import deepdish as dd
import tensorflow as tf

from model_RA.fp_RA.melspec.melspectrogram_RA import get_melspec_layer
from model_RA.fp_RA.nnfp import get_fingerprinter
from model_RA.utils.dataloader_keras import genUnbalSequence

In [3]:
def load_config(config_fname):
    config_filepath = './config/' + config_fname + '.yaml'
    if os.path.exists(config_filepath):
        print(f'cli: Configuration from {config_filepath}')
    else:
        sys.exit(f'cli: ERROR! Configuration file {config_filepath} is missing!!')

    with open(config_filepath, 'r') as f:
        cfg = yaml.safe_load(f)
    return cfg


def load_audio(file_path):
    audio, _ = librosa.load(file_path, sr=8000)
    return audio


def build_fp(cfg):
    """ Build fingerprinter """
    # m_pre: log-power-Mel-spectrogram layer, S.
    m_pre = get_melspec_layer(cfg, trainable=False)

    # m_fp: fingerprinter g(f(.)).
    m_fp = get_fingerprinter(cfg, trainable=False)
    return m_pre, m_fp


@tf.function
def embeddingGenerator(X, m_pre, m_fp):
    """ 
    X -> (B,1,8000)
    """
    #tf.print(f"X:{X}")
    #feat = m_pre(X)  # (nA+nP, F, T, 1)
    m_fp.trainable = False

    return m_fp(m_pre(X))

In [8]:
config = "default_RA"
cfg = load_config(config)

dur=cfg['MODEL']['DUR'] 
hop=cfg['MODEL']['HOP'] 
fs=cfg['MODEL']['FS'] 


audio_files = sorted(glob.glob('/mnt/dataset/public/Fingerprinting/Embeddings_BFTRI/debug_audios' + '/*.wav', recursive=True))
outputDir = '/mnt/dataset/public/Fingerprinting/Embeddings_BFTRI/debug_audios'

m_pre, m_fp = build_fp(cfg)

checkpoint_root_dir:str = "./logs/CHECK_BFTRI_100/101/"
checkpoint = tf.train.Checkpoint(m_fp)
checkpoint.restore(tf.train.latest_checkpoint(checkpoint_root_dir))


cli: Configuration from ./config/default_RA.yaml


<tensorflow.python.checkpoint.checkpoint.InitializationOnlyStatus at 0x7c6a8c5f28e0>

In [9]:
for file_path in audio_files:
    base_name = os.path.splitext(os.path.basename(file_path))[0]

    dir_name = f'{base_name[:3]}' #nome das pastas

    #if not os.path.exists(dir_name):
    #    os.makedirs(dir_name)

    bsz = ts_batch_sz = 125

    _ts_n_anchor = ts_batch_sz
    ds = genUnbalSequence(
        list(file_path),
        ts_batch_sz,
        _ts_n_anchor,
        dur,
        hop,
        fs,
        shuffle=False,
        random_offset_anchor=False,
        drop_the_last_non_full_batch=False)

    enq = tf.keras.utils.OrderedEnqueuer(ds,use_multiprocessing=True,shuffle=False)
    enq.start(workers=cfg['DEVICE']['CPU_N_WORKERS'], max_queue_size=cfg['DEVICE']['CPU_MAX_QUEUE'])

    i = 0
    emb_list = []

    while i < len(enq.sequence):
        X, _ = next(enq.get())
        emb = embeddingGenerator(X, m_pre, m_fp)
        emb_list.append(emb.numpy())
        i += 1
    enq.stop()

    emb_array = np.concatenate(emb_list,axis=0)
    #emb_array_list = np.array(emb_array)

    output_file_path = os.path.join(outputDir, dir_name, base_name + '.h5')

    #deepdish - encapsula
    
    # save -> dd.io.save(emb_array, output_file_path)
    # load -> emb_array = dd.io.load(output_file_path)

    with h5py.File(output_file_path, 'w') as hf:
            hf.create_dataset('embeddings', data=emb_array)

NotImplementedError: /