In [19]:
import numpy as np
import tensorflow as tf


def convlayer(hidden_ch=128,
              strides=[(1,1),(1,1)],
              norm='layer_norm2d'):
    
    
    conv2d_1x3 = tf.keras.layers.Conv2D(hidden_ch,
                                        kernel_size=(1, 3),
                                        strides=strides[0],
                                        padding='SAME',
                                        dilation_rate=(1, 1),
                                        kernel_initializer='glorot_uniform',
                                        bias_initializer='zeros')
    conv2d_3x1 = tf.keras.layers.Conv2D(hidden_ch,
                                        kernel_size=(3, 1),
                                        strides=strides[1],
                                        padding='SAME',
                                        dilation_rate=(1, 1),
                                        kernel_initializer='glorot_uniform',
                                        bias_initializer='zeros')
    
    if norm == 'layer_norm1d':
        BN_1x3 = tf.keras.layers.LayerNormalization(axis=-1)
        BN_3x1 = tf.keras.layers.LayerNormalization(axis=-1)
    elif norm == 'layer_norm2d':
        BN_1x3 = tf.keras.layers.LayerNormalization(axis=(1, 2, 3))
        BN_3x1 = tf.keras.layers.LayerNormalization(axis=(1, 2, 3))
    else:
        BN_1x3 = tf.keras.layers.BatchNormalization(axis=-1) # Fix axis: 2020 Apr20
        BN_3x1 = tf.keras.layers.BatchNormalization(axis=-1)
        
    forward = tf.keras.Sequential([conv2d_1x3,
                                    tf.keras.layers.ELU(),
                                    BN_1x3,
                                    conv2d_3x1,
                                    tf.keras.layers.ELU(),
                                    BN_3x1
                                    ])
    
    return forward


def divenclayer(q=128, unit_dim=[32, 1]):
    
    def unit_block(input):
        x = tf.keras.layers.Dense(unit_dim[0], activation='elu')(input)
        x = tf.keras.layers.Dense(unit_dim[1])(x)
        return x
    
    def construct_layers():
        layers = list()
        for _ in range(q): # q: num_slices
            layers.append(tf.keras.Sequential([tf.keras.layers.Dense(unit_dim[0], activation='elu'),
                                               tf.keras.layers.Dense(unit_dim[1])]))
        return layers
            
    x = tf.reshape(x, shape=[x.shape[0], q, -1])
    
    out = list()
    for i in range(q):
        out.append(construct_layers[i](x[:, i, :]))
    
    return tf.concat(out, axis=1)


class DivEncLayer(tf.keras.layers.Layer):
    """
    Multi-head projection a.k.a. 'divide and encode' layer:
        
    • The concept of 'divide and encode' was discovered  in Lai et.al.,
     'Simultaneous Feature Learning and Hash Coding with Deep Neural Networks',
      2015. https://arxiv.org/abs/1504.03410
    • It was also adopted in Gfeller et.al. 'Now Playing: Continuo-
      us low-power music recognition', 2017. https://arxiv.org/abs/1711.10958
    
    Arguments
    ---------
    q: (int) number of slices as 'slice_length = input_dim / q'
    unit_dim: [(int), (int)]
    norm: 'layer_norm1d' or 'layer_norm2d' uses 1D-layer normalization on the feature.
          'batch_norm' or else uses batch normalization. Default is 'layer_norm2d'.

    Input
    -----
    x: (B,1,1,C)
    
    Returns
    -------
    emb: (B,Q)
    
    """
    def __init__(self, q=128, unit_dim=[32, 1], norm='batch_norm'):
        super(DivEncLayer, self).__init__()

        self.q = q
        self.unit_dim = unit_dim
        self.norm = norm
        
        if norm in ['layer_norm1d', 'layer_norm2d']:
            self.BN = [tf.keras.layers.LayerNormalization(axis=-1) for i in range(q)]
        else:
            self.BN = [tf.keras.layers.BatchNormalization(axis=-1) for i in range(q)]
            
        self.split_fc_layers = self._construct_layers() 


    def build(self, input_shape):
        # Prepare output embedding variable for dynamic batch-size 
        self.slice_length = int(input_shape[-1] / self.q)

 
    def _construct_layers(self):
        layers = list()
        for i in range(self.q): # q: num_slices
            layers.append(tf.keras.Sequential([tf.keras.layers.Dense(self.unit_dim[0], activation='elu'),
                                               #self.BN[i],
                                               tf.keras.layers.Dense(self.unit_dim[1])]))
        return layers

 
    @tf.function
    def _split_encoding(self, x_slices):
        """
        Input: (B,Q,S)
        Returns: (B,Q)
        
        """
        out = list()
        for i in range(self.q):
            out.append(self.split_fc_layers[i](x_slices[:, i, :]))
        return tf.concat(out, axis=1)

    
    def call(self, x): # x: (B,1,1,2048)
        x = tf.reshape(x, shape=[x.shape[0], self.q, -1]) # (B,Q,S); Q=num_slices; S=slice length; (B,128,8 or 16)
        return self._split_encoding(x)
    
    


In [16]:
def create_sequential_front_conv(input_shape=(256,32,1),
                                    emb_sz=128, # q
                                    front_hidden_ch=[128, 128, 256, 256, 512, 512, 1024, 1024],
                                    front_strides=[[(1,2), (2,1)], [(1,2), (2,1)],
                                                   [(1,2), (2,1)], [(1,2), (2,1)],
                                                   [(1,1), (2,1)], [(1,2), (2,1)],
                                                   [(1,1), (2,1)], [(1,2), (2,1)]],
                                    norm='layer_norm2d'):

    front_conv = tf.keras.Sequential(name='ConvLayers')
    if ((front_hidden_ch[-1] % emb_sz) != 0):
        front_hidden_ch[-1] = ((front_hidden_ch[-1]//emb_sz) + 1) * emb_sz

    for i in range(len(front_strides)):
        front_conv.add(convlayer(hidden_ch=front_hidden_ch[i], strides=front_strides[i], norm=norm))
    front_conv.add(tf.keras.layers.Flatten())

    return front_conv

def div_enc_layer(emb_sz=128, fc_unit_dim=[32,1], norm='layer_norm2d',):
    
    div_enc = tf.keras.Sequential(name="DivEnclayer")
    div_enc.add(DivEncLayer(q=emb_sz, unit_dim=fc_unit_dim, norm=norm))

    return div_enc

def get_fingerprinting(conv_model, div_enc):
    
    fingerprinting_model = tf.keras.Sequential(name='Fingerprinting')
    fingerprinting_model.add(conv_model)
    fingerprinting_model.add(div_enc)
    fingerprinting_model.add(tf.keras.layers.Lambda(lambda x: tf.math.l2_normalize(x, axis=1)))
    
    return fingerprinting_model


In [22]:
conv_layer = create_sequential_front_conv()
enc_layer = div_enc_layer()

finger_model = get_fingerprinting(conv_layer, enc_layer) 


In [20]:
y = finger_model(tf.ones(shape=(120, 256, 32, 1)))

In [21]:
finger_model.summary()

In [26]:
import tensorflow as tf

# Função para criar o bloco de camadas densas
def create_dense_block(units: list):
    return tf.keras.Sequential([
        tf.keras.layers.Dense(units[0], activation='elu'),
        tf.keras.layers.Dense(units[1])
    ])

# Criar o modelo
def create_model(input_shape=(256, 256, 3), num_blocks=128, block_size=8):
    model = tf.keras.Sequential()
    
    # Primeiro bloco convolucional e average pooling
    model.add(tf.keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same', input_shape=input_shape))
    model.add(tf.keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same'))
    model.add(tf.keras.layers.MaxPooling2D((2, 2)))
    model.add(tf.keras.layers.Conv2D(128, (3, 3), activation='relu', padding='same'))
    model.add(tf.keras.layers.Conv2D(128, (3, 3), activation='relu', padding='same'))
    model.add(tf.keras.layers.MaxPooling2D((2, 2)))
    model.add(tf.keras.layers.Conv2D(256, (3, 3), activation='relu', padding='same'))
    model.add(tf.keras.layers.Conv2D(256, (3, 3), activation='relu', padding='same'))
    model.add(tf.keras.layers.MaxPooling2D((2, 2)))
    model.add(tf.keras.layers.Conv2D(512, (3, 3), activation='relu', padding='same'))
    model.add(tf.keras.layers.Conv2D(512, (3, 3), activation='relu', padding='same'))
    model.add(tf.keras.layers.MaxPooling2D((2, 2)))
    model.add(tf.keras.layers.Conv2D(1024, (3, 3), activation='relu', padding='same'))
    model.add(tf.keras.layers.Conv2D(1024, (3, 3), activation='relu', padding='same'))
    model.add(tf.keras.layers.GlobalAveragePooling2D())  # Average pooling para saída 1024
    
    # Divisão em 128 blocos
    split_layer = tf.keras.layers.Lambda(lambda x: tf.split(x, num_blocks, axis=1))
    model.add(split_layer)
    
    # Sequencial de camadas densas para cada bloco
    for _ in range(num_blocks):
        model.add(create_dense_block(units=[32,1]))
    
    # Concatenação dos resultados
    model.add(tf.keras.layers.Concatenate(axis=1))
    
    return model

# Criar o modelo
model = create_model()
# Sumário do modelo
#model.summary()

ValueError: Exception encountered when calling Sequential.call().

[1mCannot convert '32' to a shape.[0m

Arguments received by Sequential.call():
  • args=(['<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3371>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3372>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3373>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3374>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3375>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3376>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3377>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3378>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3379>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3380>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3381>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3382>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3383>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3384>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3385>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3386>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3387>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3388>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3389>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3390>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3391>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3392>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3393>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3394>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3395>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3396>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3397>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3398>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3399>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3400>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3401>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3402>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3403>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3404>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3405>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3406>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3407>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3408>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3409>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3410>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3411>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3412>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3413>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3414>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3415>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3416>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3417>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3418>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3419>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3420>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3421>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3422>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3423>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3424>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3425>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3426>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3427>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3428>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3429>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3430>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3431>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3432>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3433>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3434>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3435>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3436>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3437>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3438>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3439>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3440>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3441>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3442>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3443>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3444>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3445>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3446>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3447>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3448>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3449>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3450>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3451>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3452>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3453>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3454>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3455>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3456>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3457>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3458>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3459>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3460>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3461>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3462>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3463>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3464>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3465>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3466>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3467>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3468>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3469>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3470>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3471>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3472>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3473>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3474>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3475>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3476>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3477>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3478>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3479>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3480>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3481>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3482>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3483>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3484>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3485>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3486>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3487>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3488>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3489>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3490>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3491>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3492>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3493>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3494>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3495>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3496>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3497>', '<KerasTensor shape=(None, 8), dtype=float32, sparse=False, name=keras_tensor_3498>'],)
  • kwargs={'mask': ['None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None']}