In [1]:
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.layers import Lambda, Permute
from kapre.time_frequency import STFT, Magnitude, ApplyFilterbank
import math

import numpy as np
from model.fp.melspec.melspectrogram import Melspec_layer

2024-05-23 15:16:18.619453: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
from tensorflow.keras import backend as K
from kapre import backend
from kapre.backend import _CH_FIRST_STR, _CH_LAST_STR, _CH_DEFAULT_STR

In [3]:
import tensorflow as tf
from model.dataset import Dataset
#from model.fp.melspec.melspectrogram import get_melspec_layer
from model.fp.specaug_chain.specaug_chain import get_specaug_chain_layer
from model.fp.nnfp import get_fingerprinter
from model.fp.NTxent_loss_single_gpu import NTxentLoss
from model.fp.online_triplet_loss import OnlineTripletLoss
from model.fp.lamb_optimizer import LAMB
from model.utils.experiment_helper import ExperimentHelper

In [4]:
from kapre.time_frequency import ApplyFilterbank

class CustomApplyFilterbank(ApplyFilterbank):
    def __init__(self, type, filterbank_kwargs, data_format='default', **kwargs):
        super(CustomApplyFilterbank, self).__init__(type, filterbank_kwargs, data_format, **kwargs)

        # tipo de Banco de filtros
        if type == 'tri':
            self.filterbank = self.filterbank_triangular_log(**filterbank_kwargs)

        if data_format == _CH_DEFAULT_STR:
            self.data_format = K.image_data_format()
        else:
            self.data_format = data_format

        if self.data_format == _CH_FIRST_STR:
            self.freq_axis = 3
        else:
            self.freq_axis = 2

    def filterbank_triangular_log(self, sample_rate, n_fft):
        # Com o objetivo de ter 256 giltros e 8000 Hz na frequência de amostragem, teve-se de optar por Nfft de 2048, o que resulta em 54.4024 filtros por oitava, e numa frequência mínima de 151.3483 Hz.
        # Sendo assim, o Nfpo será 60, 5*12, e a frequência do último filtro, f256, será Si7 = 3951.066410048992 Hz. Resultando numa frequência máxima de 3996.975590329487 Hz.
        # Com isto, obtem-se pelo menos um bin em cada filtro, visto que f0*(2^(2/Nfpo)-1) = 4.7979 > 8000/2048 = 3.9062. Para uma Nfft de 1024, não era certo que obte-se pelo menos um bin por filtro.

        n_fft=2048
        sample_rate=8000
        Nfpo=60 #=5*12
        Nb =256

        #Cálculo da fmin e fmax
        f256 = 440*2.**(38/12) # Si7 = 3951.066410048992 Hz;
        f0=f256/2**(256/Nfpo) # fmin, 205.2672581380976 Hz
        fmax = f0*2**(257/Nfpo) # fmax, 3996.975590329487 Hz

        #Depois disto, dá bins em todos os fitros. Ver a linha 24 do getOctaveFilterBanck2.m

        i=np.arange(1,Nb+1, dtype=float)
        k=np.arange(n_fft//2+1)
        f=k*sample_rate/n_fft

        fcf = f0 * 2.**(i/Nfpo) #3905.68454168

        fi = np.concatenate(([f0], fcf, [fmax])) #fi =[f0, fcf, fmax] 

        # Construct the output matrix
        H = np.zeros((Nb, n_fft // 2 + 1))

        #for i in range(n_filters), com isto são 256
        for j in range(Nb):
            fLow = fi[j] 
            fmid = fi[j+1] 
            fUpp = fi[j+2]

            H[j, :] = ((f - fLow) / (fmid - fLow)) * ((f > fLow) & (f <= fmid)) + \
                            ((f - fUpp) / (fmid - fUpp)) * ((f > fmid) & (f <= fUpp))
            

        H /= np.sum(H, axis=1, keepdims=True) # : A matriz é normalizada ao longo do eixo 1 (linhas), dividindo cada valor pela soma dos valores na respectiva linha. Isto garante que a soma de cada linha seja igual a 1.65
        
        return tf.convert_to_tensor(H.T, dtype=tf.float32)
        #return tf.convert_to_tensor(H.T)
    

    def call(self, x):
        tf.print(f"self.filterbank_shape={self.filterbank.shape}")
        output = tf.tensordot(x, self.filterbank, axes=(self.freq_axis, 0))
        
        if self.data_format == _CH_LAST_STR:
            output = tf.transpose(output, (0, 1, 3, 2))
        return output

In [5]:
class triBFMelspecLayer(Melspec_layer):
    def __init__(self,
                input_shape=(1, 8000),
                segment_norm=False,
                n_fft=2048,
                stft_hop=192,
                fs=8000,
                dur=1.,
                f_min=300.,
                f_max=4000.,
                amin=1e-10, # minimum amp.
                dynamic_range=80.,
                name='Mel-spectrogram',
                trainable=False,
                **kwargs
                ):
        super(triBFMelspecLayer, self).__init__(name=name, trainable=False, **kwargs)

        self.n_fft = n_fft
        self.stft_hop = stft_hop
        self.amin = amin
        self.dynamic_range = dynamic_range
        self.segment_norm = segment_norm

        self.tri_fb_kwargs = {
            'sample_rate': fs,
            'n_fft': n_fft,
            }
        
        # Construct log-power Mel-spec layer
        self.mm = self.construct_melspec_layer(input_shape, name)

        # Permute layer
        self.p = tf.keras.Sequential(name='Permute')
        self.p.add(Permute((3, 2, 1), input_shape=self.m.output_shape[1:]))
        
        super(triBFMelspecLayer, self).build((None, input_shape[0], input_shape[1]))
        
    def construct_melspec_layer(self, input_shape, name):
        mm = tf.keras.Sequential(name=name)
        mm.add(tf.keras.layers.InputLayer(input_shape=input_shape))
        mm.add(self.pad_layer)
        mm.add(
            STFT(
                n_fft=self.n_fft,
                hop_length=self.stft_hop,
                pad_begin=False, # We do not use Kapre's padding, due to the @tf.function compatiability
                pad_end=False, # We do not use Kapre's padding, due to the @tf.function compatiability
                input_data_format='channels_first',
                output_data_format='channels_first')
            )
        mm.add(
            Magnitude()
            )
        mm.add(
            CustomApplyFilterbank(type='tri',
                            filterbank_kwargs=self.tri_fb_kwargs,
                            data_format='channels_first'
                            )
            )
        return mm

    @tf.function
    def call(self, x):
        x = self.mm(x) + 0.06
        #x = tf.sqrt(x)
        
        x = tf.math.log(tf.maximum(x, self.amin)) / math.log(10)
        x = x - tf.reduce_max(x)
        x = tf.maximum(x, -1 * self.dynamic_range)
        if self.segment_norm:
            x = (x - tf.reduce_min(x) / 2) / tf.abs(tf.reduce_min(x) / 2 + 1e-10)
        return self.p(x) # Permute((3,2,1))

    

# Função auxiliar para criar a camada Mel-spectrogram usando a nova classe
def get_triBF_melspec_layer(cfg, trainable=False, new_param=None):
    fs = 8000
    dur = 1.
    n_fft = 2048
    stft_hop = 192
    segment_norm = False

    input_shape = (1, int(fs * dur))

    l = triBFMelspecLayer(input_shape=input_shape,
                            segment_norm=segment_norm,
                            n_fft=n_fft,
                            stft_hop=stft_hop,
                            fs=fs,
                            dur=dur)
    l.trainable = trainable
    return l


In [6]:
def build_fp(cfg):
    """ Build fingerprinter """
    # m_pre: log-power-Mel-spectrogram layer, S.
    m_pre = get_triBF_melspec_layer(cfg, trainable=False)

    # m_specaug: spec-augmentation layer.
    m_specaug = get_specaug_chain_layer(cfg, trainable=False)
    assert(m_specaug.bypass==False) # Detachable by setting m_specaug.bypass.

    # m_fp: fingerprinter g(f(.)).
    m_fp = get_fingerprinter(cfg, trainable=False)
    return m_pre, m_specaug, m_fp

In [7]:
import os
import sys
import pathlib
import click
import yaml
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'

def load_config(config_fname):
    config_filepath = './config/' + config_fname + '.yaml'
    if os.path.exists(config_filepath):
        print(f'cli: Configuration from {config_filepath}')
    else:
        sys.exit(f'cli: ERROR! Configuration file {config_filepath} is missing!!')

    with open(config_filepath, 'r') as f:
        cfg = yaml.safe_load(f)
    return cfg

In [8]:
config = "default"
cfg = load_config(config)
checkpoint_name = "Checks"

cli: Configuration from ./config/default.yaml


In [None]:
# Dataloader
dataset = Dataset(cfg)

# Build models.
m_pre, m_specaug, m_fp = build_fp(cfg)

# Learning schedule
total_nsteps = cfg['TRAIN']['MAX_EPOCH'] * len(dataset.get_train_ds())
if cfg['TRAIN']['LR_SCHEDULE'].upper() == 'COS':
    lr_schedule = tf.keras.experimental.CosineDecay(
        initial_learning_rate=float(cfg['TRAIN']['LR']),
        decay_steps=total_nsteps,
        alpha=1e-06)
elif cfg['TRAIN']['LR_SCHEDULE'].upper() == 'COS-RESTART':
    lr_schedule = tf.keras.experimental.CosineDecayRestarts(
        initial_learning_rate=float(cfg['TRAIN']['LR']),
        first_decay_steps=int(total_nsteps * 0.1),
        num_periods=0.5,
        alpha=2e-06)
else:
    lr_schedule = float(cfg['TRAIN']['LR'])

# Optimizer
if cfg['TRAIN']['OPTIMIZER'].upper() == 'LAMB':
    opt = LAMB(learning_rate=lr_schedule)
elif cfg['TRAIN']['OPTIMIZER'].upper() == 'ADAM':
    opt = tf.keras.optimizers.Adam(learning_rate=lr_schedule)
else:
    raise NotImplementedError(cfg['TRAIN']['OPTIMIZER'])

# Experiment helper: see utils.experiment_helper.py for details.
helper = ExperimentHelper(
    checkpoint_name=checkpoint_name,
    optimizer=opt,
    model_to_checkpoint=m_fp,
    cfg=cfg)

# Loss objects
if cfg['LOSS']['LOSS_MODE'].upper() == 'NTXENT': # Default
    loss_obj_train = NTxentLoss(
        n_org=cfg['BSZ']['TR_N_ANCHOR'],
        n_rep=cfg['BSZ']['TR_BATCH_SZ'] - cfg['BSZ']['TR_N_ANCHOR'],
        tau=cfg['LOSS']['TAU'])
    loss_obj_val = NTxentLoss(
        n_org=cfg['BSZ']['VAL_N_ANCHOR'],
        n_rep=cfg['BSZ']['VAL_BATCH_SZ'] - cfg['BSZ']['VAL_N_ANCHOR'],
        tau=cfg['LOSS']['TAU'])
elif cfg['LOSS']['LOSS_MODE'].upper() == 'ONLINE-TRIPLET': # Now-playing
    loss_obj_train = OnlineTripletLoss(
        bsz=cfg['BSZ']['TR_BATCH_SZ'],
        n_anchor=cfg['BSZ']['TR_N_ANCHOR'],
        mode = 'semi-hard',
        margin=cfg['LOSS']['MARGIN'])
    loss_obj_val = OnlineTripletLoss(
        bsz=cfg['BSZ']['VAL_BATCH_SZ'],
        n_anchor=cfg['BSZ']['VAL_N_ANCHOR'],
        mode = 'all', # use 'all' mode for validation
        margin=0.)
else:
    raise NotImplementedError(cfg['LOSS']['LOSS_MODE'])

In [None]:
train_ds = dataset.get_train_ds(0)
enq = tf.keras.utils.OrderedEnqueuer(
        train_ds, use_multiprocessing=True, shuffle=train_ds.shuffle)
enq.start(workers=cfg['DEVICE']['CPU_N_WORKERS'],
        max_queue_size=cfg['DEVICE']['CPU_MAX_QUEUE'])

In [None]:
X = next(enq.get())

In [None]:
n_anchors = len(X[0])
X = tf.concat(X, axis=0)

In [None]:
m_pre(X)