In [110]:
import os
import sys
import yaml
import glob
import wave

import tensorflow as tf
import librosa
import numpy as np
import pickle

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'

from model.fp.melspec.melspectrogram import get_melspec_layer
from model.fp.specaug_chain.specaug_chain import get_specaug_chain_layer
from model.fp.nnfp import get_fingerprinter

In [101]:
def build_fp(cfg):
    """ Build fingerprinter """
    # m_pre: log-power-Mel-spectrogram layer, S.
    m_pre = get_melspec_layer(cfg, trainable=False)

    # m_specaug: spec-augmentation layer.
    #m_specaug = get_specaug_chain_layer(cfg, trainable=False)
    #assert(m_specaug.bypass==False) # Detachable by setting m_specaug.bypass.

    # m_fp: fingerprinter g(f(.)).
    m_fp = get_fingerprinter(cfg, trainable=False)
    m_fp.trainable = False
    
    return m_pre, m_fp

In [102]:
def load_config(config_fname):
    config_filepath = './config/' + config_fname + '.yaml'
    if os.path.exists(config_filepath):
        print(f'cli: Configuration from {config_filepath}')
    else:
        sys.exit(f'cli: ERROR! Configuration file {config_filepath} is missing!!')

    with open(config_filepath, 'r') as f:
        cfg = yaml.safe_load(f)
    return cfg


@tf.function
def predict(X, m_fp):
    """ 
    Test step used for mini-search-validation 
    X -> (B,1,8000)
    """
    emb_gf = m_fp(X)

    return emb_gf


def load_model():

    checkpoint_name_dir:str = "./logs/CHECKPOINT_BSZ_120"  #"CHECKPOINT"   # string
    config:str = "default"   

    cfg = load_config(config)

    m_pre, m_fp = build_fp(cfg)

    checkpoint = tf.train.Checkpoint(m_fp)
    checkpoint.restore(tf.train.latest_checkpoint(checkpoint_name_dir))
        
    return m_fp

In [103]:
@tf.function
def predict(X, m_fp):
    """ 
    Test step used for mini-search-validation 
    X -> (B,1,8000)
    """
    emb_gf = m_fp(X)

    return emb_gf

In [104]:
audioDir = '/mnt/dataset/public/Fingerprinting/teste/uniqueFile/002000.wav'

fns_list = sorted(glob.glob(audioDir, recursive=True))

fns_seg_list = []
seg_list_test = []

fs:int = 8000
duration:int = 1
hop:int = 0.5

for offset_idx, filename in enumerate(fns_list):
    # Get audio info
    n_frames_in_seg = fs * duration
    n_frames_in_hop = fs * hop  # 2019 09.05
    file_ext = filename[-3:]

    if file_ext == 'wav':
        pt_wav = wave.open(filename, 'r')
        _fs = pt_wav.getframerate()

        if fs != _fs:
            raise ValueError('Sample rate should be {} but got {}'.format(
                str(fs), str(_fs)))

        n_frames = pt_wav.getnframes()
        #n_segs = n_frames // n_frames_in_seg
        if n_frames > n_frames_in_seg:
            n_segs = (n_frames - n_frames_in_seg +
                        n_frames_in_hop) // n_frames_in_hop
        else:
            n_segs = 1

        n_segs = int(n_segs)
        assert (n_segs > 0)
        residual_frames = np.max([
            0,
            n_frames - ((n_segs - 1) * n_frames_in_hop + n_frames_in_seg)
        ])
        
        #print(f"filename:{filename}\nn_frames_in_seg:{n_frames_in_seg}\nn_frames_in_hop:{n_frames_in_hop}\nfile_ext:{file_ext}\n")
        #print(f"_fs:{_fs}\nn_frames:{n_frames}\nn_segs:{n_segs}\nresidual_frames:{residual_frames}\n")
        
        pt_wav.close()

    else:
        raise NotImplementedError(file_ext)
    

    for seg_idx in range(n_segs):
        offset_min, offset_max = int(-1 * n_frames_in_hop), n_frames_in_hop
        
        #print(f"seg_idx:{seg_idx}\noffset_min:{offset_min}\noffset_max:{offset_max}\nfns_seg_list:{fns_seg_list}\n")

        if seg_idx == 0:  # first seg
            offset_min = 0
        if seg_idx == (n_segs - 1):  # last seg
            offset_max = residual_frames
        if seg_idx == 3:
            seg_list_test.append( [filename, seg_idx, offset_min, offset_max])
            print(f"filename:{filename}\nseg_idx:{seg_idx}\noffset_min:{offset_min}\noffset_max:{offset_max}\n")

        fns_seg_list.append(
            [filename, seg_idx, offset_min, offset_max])
        
        #print(f"seg_idx:{seg_idx}\noffset_min:{offset_min}\noffset_max:{offset_max}\nfns_seg_list:{fns_seg_list}\n")
    

filename:/mnt/dataset/public/Fingerprinting/teste/uniqueFile/002000.wav
seg_idx:3
offset_min:-4000
offset_max:4000.0



In [105]:
def load_audio(filename=str(),
               seg_start_sec=float(),
               offset_sec=0.0,
               seg_length_sec=float(),
               seg_pad_offset_sec=0.0,
               fs=22050,
               amp_mode='normal'):
    """
        Open file to get file info --> Calulate index range
        --> Load sample by index --> Padding --> Max-Normalize --> Out
        
    """
    start_frame_idx = np.floor((seg_start_sec + offset_sec) * fs).astype(int)
    seg_length_frame = np.floor(seg_length_sec * fs).astype(int)
    end_frame_idx = start_frame_idx + seg_length_frame

    # Get file-info
    file_ext = filename[-3:]
    print(start_frame_idx, end_frame_idx)

    if file_ext == 'wav':
        pt_wav = wave.open(filename, 'r')
        pt_wav.setpos(start_frame_idx)
        x = pt_wav.readframes(end_frame_idx - start_frame_idx)
        x = np.frombuffer(x, dtype=np.int16)
        x = x / 2**15  # dtype=float
    else:
        raise NotImplementedError(file_ext)

    # Max Normalize, random amplitude
    if amp_mode == 'normal':
        pass
    elif amp_mode == 'max_normalize':
        _x_max = np.max(np.abs(x))
        if _x_max != 0:
            x = x / _x_max
    else:
        raise ValueError('amp_mode={}'.format(amp_mode))

    # padding process. it works only when win_size> audio_size and padding='random'
    audio_arr = np.zeros(int(seg_length_sec * fs))
    seg_pad_offset_idx = int(seg_pad_offset_sec * fs)
    audio_arr[seg_pad_offset_idx:seg_pad_offset_idx + len(x)] = x
    return audio_arr

In [106]:
x = load_audio(filename=filename,
               seg_start_sec=1.5,
               seg_length_sec=1,
               fs=8000)

12000 20000


In [111]:
import numpy as np
print(type(x))
print(np.shape(x))
print(x)

<class 'numpy.ndarray'>
(8000,)
[0.0213623  0.01916504 0.021698   ... 0.02557373 0.0295105  0.02172852]


In [112]:
x = x.reshape((1, -1))

In [113]:
print(type(x))
print(np.shape(x))
print(x)

<class 'numpy.ndarray'>
(1, 8000)
[[0.0213623  0.01916504 0.021698   ... 0.02557373 0.0295105  0.02172852]]


In [114]:
def run(x, m_fp):
    '''
    filepath: (str, ndarray)
    '''

    # tenho as tramas e de 8000 amostras tenho de chamar a get_melspec, tenho de trazer o get_melspec e o get_fringerprint, tenho trazer uma matriz do input_shape para que consigo ver os pesos
    # tem de ser m_spec, _, m_fp = build_fp(cfg). matching. tirar o librosa pegar no sinal, passar no fingerprint. 3 tensores, um para mel spec, um para data_aug e outro para a rede.
    # (256,32,1) de entrada no X, tenho de alterar em vez de 8000
    
    emb = predict(x, m_fp)

    tf.config.run_functions_eagerly(True)

    return emb.numpy()

In [115]:
m_fp = load_model()
emb = run(x, m_fp)

cli: Configuration from ./config/default.yaml


ValueError: in user code:

    File "/tmp/ipykernel_3084456/2396453651.py", line 7, in predict  *
        emb_gf = m_fp(X)
    File "/home/rodrigo/anaconda3/envs/tf/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py", line 70, in error_handler  **
        raise e.with_traceback(filtered_tb) from None
    File "/tmp/__autograph_generated_file7mrpzq9p.py", line 10, in tf__call
        x = ag__.converted_call(ag__.ld(self).front_conv, (ag__.ld(inputs),), None, fscope)
    File "/tmp/__autograph_generated_fileu8kwn9i4.py", line 15, in tf__call
        raise

    ValueError: Exception encountered when calling layer 'finger_printer_5' (type FingerPrinter).
    
    in user code:
    
        File "/home/rodrigo/Documents/neural-audio-fp/model/fp/nnfp.py", line 229, in call  *
            x = self.front_conv(inputs) # (B,D) with D = (T/2^4) x last_hidden_ch
        File "/home/rodrigo/anaconda3/envs/tf/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py", line 70, in error_handler  **
            raise e.with_traceback(filtered_tb) from None
        File "/tmp/__autograph_generated_fileu8kwn9i4.py", line 15, in tf__call
            raise
    
        ValueError: Exception encountered when calling layer 'conv_layer_40' (type ConvLayer).
        
        in user code:
        
            File "/home/rodrigo/Documents/neural-audio-fp/model/fp/nnfp.py", line 83, in call  *
                return self.forward(x)
            File "/home/rodrigo/anaconda3/envs/tf/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py", line 70, in error_handler  **
                raise e.with_traceback(filtered_tb) from None
            File "/home/rodrigo/anaconda3/envs/tf/lib/python3.11/site-packages/keras/src/engine/input_spec.py", line 253, in assert_input_compatibility
                raise ValueError(
        
            ValueError: Exception encountered when calling layer 'sequential_683' (type Sequential).
            
            Input 0 of layer "conv2d_80" is incompatible with the layer: expected min_ndim=4, found ndim=2. Full shape received: (1, 8000)
            
            Call arguments received by layer 'sequential_683' (type Sequential):
              • inputs=tf.Tensor(shape=(1, 8000), dtype=float32)
              • training=None
              • mask=None
        
        
        Call arguments received by layer 'conv_layer_40' (type ConvLayer):
          • x=tf.Tensor(shape=(1, 8000), dtype=float32)
    
    
    Call arguments received by layer 'finger_printer_5' (type FingerPrinter):
      • inputs=tf.Tensor(shape=(1, 8000), dtype=float32)


In [81]:
#seg_list_test[0][2]

-4000

In [95]:
"""
seg_list_test[0][0]
seg_list_test[0][2]
duration = 1
fs = 8000
amp_mode = 'normal'
"""

"\nseg_list_test[0][0]\nseg_list_test[0][2]\nduration = 1\nfs = 8000\namp_mode = 'normal'\n"

In [None]:
#xs = load_audio_multi_start(fns_event_seg_list[idx][0], start_sec_list, duration, fs, amp_mode)  # xs: ((1+n_pos)),T)

## RASCUNHOS

In [None]:
emb = predict(X, m_fp)

In [51]:
modelo = load_model()

cli: Configuration from ./config/default.yaml


In [52]:
type(modelo)

model.fp.nnfp.FingerPrinter

In [10]:
modelo.div_enc

<model.fp.nnfp.DivEncLayer at 0x7aab17659750>

In [16]:
config = "default" 

In [17]:
cfg = load_config(config)

cli: Configuration from ./config/default.yaml


In [23]:
cfg['DIR']['SOURCE_ROOT_DIR']

'/mnt/dataset/public/Fingerprinting/neural-audio-fp-dataset/music/'

In [24]:
import math

In [27]:
x = tf.constant([5, 1, 2, 4])
y=tf.reduce_max(x)

In [31]:
print(x)

tf.Tensor([5 1 2 4], shape=(4,), dtype=int32)


In [33]:
x[1:]

<tf.Tensor: shape=(3,), dtype=int32, numpy=array([1, 2, 4], dtype=int32)>

In [36]:
from tensorflow.keras.layers import Lambda, Permute

In [40]:
w=Permute((3, 2, 1), input_shape=x[1:])

In [42]:
print(w)

<keras.src.layers.reshaping.permute.Permute object at 0x7aab1769bd90>


In [48]:
import numpy as np

In [44]:
front_strides=[
                [(1,2), (2,1)], 
                [(1,2), (2,1)],
                [(1,2), (2,1)],
                [(1,2), (2,1)],
                [(1,1), (2,1)],
                [(1,2), (2,1)],
                [(1,1), (2,1)],
                [(1,2), (2,1)]
                ]
len(front_strides[])

8

In [49]:
np.shape(front_strides)

(8, 2, 2)