In [1]:
 import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.layers import Lambda, Permute
from kapre.time_frequency import STFT, Magnitude, ApplyFilterbank
import math

2024-05-12 13:04:42.816799: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


input_shape=(1, 8000),
segment_norm=False,
n_fft=1024,
stft_hop=256,
n_mels=256,
fs=8000,
dur=1.,
f_min=300.,
f_max=4000.,
amin=1e-10, # minimum amp.
dynamic_range=80.,
name='Mel-spectrogram',
trainable=False

In [2]:
def construct_melspec_layer(input_shape, name, pad_layer, n_fft, stft_hop, mel_fb_kwargs):
        m = tf.keras.Sequential(name=name)
        m.add(tf.keras.layers.InputLayer(input_shape=input_shape))
        m.add(pad_layer)
        m.add(
            STFT(
                n_fft=n_fft,
                hop_length=stft_hop,
                pad_begin=False, # We do not use Kapre's padding, due to the @tf.function compatiability
                pad_end=False, # We do not use Kapre's padding, due to the @tf.function compatiability
                input_data_format='channels_first',
                output_data_format='channels_first')
            )
        m.add(
            Magnitude()
            )
        m.add(
            ApplyFilterbank(type='mel',
                            filterbank_kwargs=mel_fb_kwargs,
                            data_format='channels_first'
                            )
            )
        return m

### Meramente para teste o load config

In [3]:
import os
import sys
import yaml

def load_config(config_fname):
    config_filepath = '../config/' + config_fname + '.yaml'
    if os.path.exists(config_filepath):
        print(f'cli: Configuration from {config_filepath}')
    else:
        sys.exit(f'cli: ERROR! Configuration file {config_filepath} is missing!!')

    with open(config_filepath, 'r') as f:
        cfg = yaml.safe_load(f)
    return cfg

In [4]:
cfg = load_config("default")

fs = cfg['MODEL']['FS']
dur = cfg['MODEL']['DUR']
n_fft = cfg['MODEL']['STFT_WIN']
stft_hop = cfg['MODEL']['STFT_HOP']
n_mels = cfg['MODEL']['N_MELS']
f_min = cfg['MODEL']['F_MIN']
f_max = cfg['MODEL']['F_MAX']
if cfg['MODEL']['FEAT'] == 'melspec':
    segment_norm = False
elif cfg['MODEL']['FEAT'] == 'melspec_maxnorm':
    segment_norm = True
else:
    raise NotImplementedError(cfg['MODEL']['FEAT'])

cli: Configuration from ../config/default.yaml


In [5]:
input_shape = (1, int(fs * dur))
n_fft=1024
stft_hop=256
amin=1e-10, # minimum amp.
dynamic_range=80.
name='Mel-spectrogram'

pad_l = n_fft // 2
pad_r = n_fft // 2

mel_fb_kwargs = {
            'sample_rate': fs,
            'n_freq': n_fft // 2 + 1,
            'n_mels': n_mels,
            'f_min': f_min,
            'f_max': f_max,
            }

pad_layer = Lambda(lambda z: tf.pad(z, tf.constant([[0, 0], [0, 0], [pad_l, pad_r]])))

In [6]:
m = construct_melspec_layer(input_shape, name, pad_layer, n_fft, stft_hop, mel_fb_kwargs)

In [7]:
p = tf.keras.Sequential(name='Permute')
p.add(Permute((3, 2, 1), input_shape=m.output_shape[1:]))

In [8]:
x = tf.keras.Input(shape=(1, 8000))

In [9]:
x = m(x) + 0.06
x = tf.math.log(tf.maximum(x, amin)) / math.log(10)
x = x - tf.reduce_max(x)
x = tf.maximum(x, -1 * dynamic_range)

if segment_norm:
    x = (x - tf.reduce_min(x) / 2) / tf.abs(tf.reduce_min(x) / 2 + 1e-10)

In [11]:
p(x) # Permute((3,2,1))

<KerasTensor: shape=(None, 256, 32, 1) dtype=float32 (created by layer 'Permute')>

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Lambda, Permute
from kapre.time_frequency import STFT, Magnitude, ApplyFilterbank
import math

def construct_melspec_layer(input_shape, name, pad_layer, n_fft, stft_hop, mel_fb_kwargs):
    m = tf.keras.Sequential(name=name)
    m.add(tf.keras.layers.InputLayer(input_shape=input_shape))
    m.add(pad_layer)
    m.add(
        STFT(
            n_fft=n_fft,
            hop_length=stft_hop,
            pad_begin=False, # We do not use Kapre's padding, due to the @tf.function compatiability
            pad_end=False, # We do not use Kapre's padding, due to the @tf.function compatiability
            input_data_format='channels_first',
            output_data_format='channels_first')
        )
    m.add(
        Magnitude()
        )
    m.add(
        ApplyFilterbank(type='mel',
                        filterbank_kwargs=mel_fb_kwargs,
                        data_format='channels_first'
                        )
        )
    return m



def mel_layer(cfg):
    fs = cfg['MODEL']['FS']
    dur = cfg['MODEL']['DUR']
    n_fft = cfg['MODEL']['STFT_WIN']
    stft_hop = cfg['MODEL']['STFT_HOP']
    n_mels = cfg['MODEL']['N_MELS']
    f_min = cfg['MODEL']['F_MIN']
    f_max = cfg['MODEL']['F_MAX']
    if cfg['MODEL']['FEAT'] == 'melspec':
        segment_norm = False
    elif cfg['MODEL']['FEAT'] == 'melspec_maxnorm':
        segment_norm = True
    else:
        raise NotImplementedError(cfg['MODEL']['FEAT'])
    
    input_shape = (1, int(fs * dur))
    #n_fft=1024
    #stft_hop=256
    amin=1e-10, # minimum amp.
    dynamic_range=80.
    name='Mel-spectrogram'

    pad_l = n_fft // 2
    pad_r = n_fft // 2

    mel_fb_kwargs = {
                'sample_rate': fs,
                'n_freq': n_fft // 2 + 1,
                'n_mels': n_mels,
                'f_min': f_min,
                'f_max': f_max,
                }

    pad_layer = Lambda(lambda z: tf.pad(z, tf.constant([[0, 0], [0, 0], [pad_l, pad_r]])))


    m = construct_melspec_layer(input_shape, name, pad_layer, n_fft, stft_hop, mel_fb_kwargs)


    x = m(x) + 0.06
    x = tf.math.log(tf.maximum(x, amin)) / math.log(10)
    x = x - tf.reduce_max(x)
    x = tf.maximum(x, -1 * dynamic_range)

    if segment_norm:
        x = (x - tf.reduce_min(x) / 2) / tf.abs(tf.reduce_min(x) / 2 + 1e-10)


    # Permute
    p = tf.keras.Sequential(name='Permute')
    p.add(Permute((3, 2, 1), input_shape=m.output_shape[1:]))

    p(x) # Permute((3,2,1))

    return p(x)

# **Rascunhos**

In [None]:
def construct_melspec_layer(input_shape, n_fft, stft_hop, mel_fb_kwargs, pad_l, pad_r):
    m = tf.keras.Sequential(name='Mel-spectrogram')
    m.add(tf.keras.layers.InputLayer(input_shape=input_shape))
    m.add(Lambda(lambda z: tf.pad(z, tf.constant([[0, 0], [0, 0], [pad_l, pad_r]]))))
    m.add(STFT(n_fft=n_fft, hop_length=stft_hop, pad_begin=False, pad_end=False,
               input_data_format='channels_first', output_data_format='channels_first'))
    m.add(Magnitude())
    m.add(ApplyFilterbank(type='mel', filterbank_kwargs=mel_fb_kwargs, data_format='channels_first'))
    return m


def melspec_layer(input_shape=(1, 8000), segment_norm=False, n_fft=1024, stft_hop=256,
                  n_mels=256, fs=8000, dur=1., f_min=300., f_max=4000., amin=1e-10,
                  dynamic_range=80., name='Mel-spectrogram', trainable=False):
    mel_fb_kwargs = {'sample_rate': fs, 'n_freq': n_fft // 2 + 1, 'n_mels': n_mels,
                     'f_min': f_min, 'f_max': f_max}
    pad_l = n_fft // 2
    pad_r = n_fft // 2
    padded_input_shape = (1, int(fs * dur) + pad_l + pad_r)
    m = construct_melspec_layer(input_shape, n_fft, stft_hop, mel_fb_kwargs, pad_l, pad_r)

    p = tf.keras.Sequential(name='Permute')
    p.add(Permute((3, 2, 1), input_shape=m.output_shape[1:]))

    def call(x):
        x = m(x) + 0.06
        x = tf.math.log(tf.maximum(x, amin)) / math.log(10)
        x = x - tf.reduce_max(x)
        x = tf.maximum(x, -1 * dynamic_range)
        if segment_norm:
            x = (x - tf.reduce_min(x) / 2) / tf.abs(tf.reduce_min(x) / 2 + 1e-10)
        return p(x)

    return call


def get_melspec_layer(cfg, trainable=False):
    fs = cfg['MODEL']['FS']
    dur = cfg['MODEL']['DUR']
    n_fft = cfg['MODEL']['STFT_WIN']
    stft_hop = cfg['MODEL']['STFT_HOP']
    n_mels = cfg['MODEL']['N_MELS']
    f_min = cfg['MODEL']['F_MIN']
    f_max = cfg['MODEL']['F_MAX']
    if cfg['MODEL']['FEAT'] == 'melspec':
        segment_norm = False
    elif cfg['MODEL']['FEAT'] == 'melspec_maxnorm':
        segment_norm = True
    else:
        raise NotImplementedError(cfg['MODEL']['FEAT'])

    input_shape = (1, int(fs * dur))
    l = melspec_layer(input_shape=input_shape,
                      segment_norm=segment_norm,
                      n_fft=n_fft,
                      stft_hop=stft_hop,
                      n_mels=n_mels,
                      fs=fs,
                      dur=dur,
                      f_min=f_min,
                      f_max=f_max,
                      trainable=trainable)
    return l


In [None]:
class Melspec_layer(Model):
    """
    A wrapper class, based on the implementation:
        https://github.com/keunwoochoi/kapre
        
    Input:
        (B,1,T)
    Output:
        (B,C,T,1) with C=Number of mel-bins
    
    USAGE:
        
        See get_melspec_layer() in the below.
        
    """
    def __init__(
            self,
            input_shape=(1, 8000),
            segment_norm=False,
            n_fft=1024,
            stft_hop=256,
            n_mels=256,
            fs=8000,
            dur=1.,
            f_min=300.,
            f_max=4000.,
            amin=1e-10, # minimum amp.
            dynamic_range=80.,
            name='Mel-spectrogram',
            trainable=False,
            **kwargs
            ):
        super(Melspec_layer, self).__init__(name=name, trainable=False, **kwargs)
        
        self.mel_fb_kwargs = {
            'sample_rate': fs,
            'n_freq': n_fft // 2 + 1,
            'n_mels': n_mels,
            'f_min': f_min,
            'f_max': f_max,
            }
        self.n_fft = n_fft
        self.stft_hop = stft_hop
        self.n_mels = n_mels
        self.amin = amin
        self.dynamic_range = dynamic_range
        self.segment_norm = segment_norm
        
        # 'SAME' Padding layer
        self.pad_l = n_fft // 2
        self.pad_r = n_fft // 2
        self.padded_input_shape = (1, int(fs * dur) + self.pad_l + self.pad_r)
        self.pad_layer = Lambda(
            lambda z: tf.pad(z, tf.constant([[0, 0], [0, 0],
                                             [self.pad_l, self.pad_r]]))
            )
        
        # Construct log-power Mel-spec layer
        self.m = self.construct_melspec_layer(input_shape, name)

        # Permute layer
        self.p = tf.keras.Sequential(name='Permute')
        self.p.add(Permute((3, 2, 1), input_shape=self.m.output_shape[1:]))
        
        super(Melspec_layer, self).build((None, input_shape[0], input_shape[1]))
        
        
    def construct_melspec_layer(self, input_shape, name):
        m = tf.keras.Sequential(name=name)
        m.add(tf.keras.layers.InputLayer(input_shape=input_shape))
        m.add(self.pad_layer)
        m.add(
            STFT(
                n_fft=self.n_fft,
                hop_length=self.stft_hop,
                pad_begin=False, # We do not use Kapre's padding, due to the @tf.function compatiability
                pad_end=False, # We do not use Kapre's padding, due to the @tf.function compatiability
                input_data_format='channels_first',
                output_data_format='channels_first')
            )
        m.add(
            Magnitude()
            )
        m.add(
            ApplyFilterbank(type='mel',
                            filterbank_kwargs=self.mel_fb_kwargs,
                            data_format='channels_first'
                            )
            )
        return m
        

    @tf.function
    def call(self, x):        
        x = self.m(x) + 0.06
        #x = tf.sqrt(x)
        
        x = tf.math.log(tf.maximum(x, self.amin)) / math.log(10)
        x = x - tf.reduce_max(x)
        x = tf.maximum(x, -1 * self.dynamic_range)
        if self.segment_norm:
            x = (x - tf.reduce_min(x) / 2) / tf.abs(tf.reduce_min(x) / 2 + 1e-10)
        return self.p(x) # Permute((3,2,1))

    
def get_melspec_layer(cfg, trainable=False):
    fs = cfg['MODEL']['FS']
    dur = cfg['MODEL']['DUR']
    n_fft = cfg['MODEL']['STFT_WIN']
    stft_hop = cfg['MODEL']['STFT_HOP']
    n_mels = cfg['MODEL']['N_MELS']
    f_min = cfg['MODEL']['F_MIN']
    f_max = cfg['MODEL']['F_MAX']
    if cfg['MODEL']['FEAT'] == 'melspec':
        segment_norm = False
    elif cfg['MODEL']['FEAT'] == 'melspec_maxnorm':
        segment_norm = True
    else:
        raise NotImplementedError(cfg['MODEL']['FEAT'])
    
    input_shape = (1, int(fs * dur))
    l = Melspec_layer(input_shape=input_shape,
                      segment_norm=segment_norm,
                      n_fft=n_fft,
                      stft_hop=stft_hop,
                      n_mels=n_mels,
                      fs=fs,
                      dur=dur,
                      f_min=f_min,
                      f_max=f_max)
    l.trainable = trainable
    return l
                        