# **Train e validation**

## Imports

In [156]:
import os
import sys
import pathlib
import click
import yaml
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'

In [171]:
import glob
import tensorflow as tf
from tensorflow.keras.utils import Progbar
from dataloader_keras import genUnbalSequence

## Load da config file

In [35]:
def load_config(config_fname):
    config_filepath = '../config/' + config_fname + '.yaml'
    if os.path.exists(config_filepath):
        print(f'cli: Configuration from {config_filepath}')
    else:
        sys.exit(f'cli: ERROR! Configuration file {config_filepath} is missing!!')

    with open(config_filepath, 'r') as f:
        cfg = yaml.safe_load(f)
    return cfg

In [36]:
cfg = load_config('default')

cli: Configuration from ../config/default.yaml


## step2: Dataset

In [67]:
class Dataset():
    def __init__(self, mode='train'):
        if mode == 'train': 
            _prefix = 'train-10k-30s/'
            self.filepath = cfg['DIR']['SOURCE_ROOT_DIR'] + _prefix + '**/*.wav'
        elif mode == 'val':
            self.filepath='data/SHS100K-VAL'

        # Data location
        self.bg_root_dir = cfg['DIR']['BG_ROOT_DIR'] #! é este que nao preciso? ver
        self.ir_root_dir = cfg['DIR']['IR_ROOT_DIR']
        self.speech_root_dir = cfg['DIR']['SPEECH_ROOT_DIR']

        # BSZ
        self.tr_batch_sz = cfg['BSZ']['TR_BATCH_SZ']
        self.tr_n_anchor = cfg['BSZ']['TR_N_ANCHOR']

        # Model parameters
        self.dur = cfg['MODEL']['DUR']
        self.hop = cfg['MODEL']['HOP']
        self.fs = cfg['MODEL']['FS']

        # Time-domain augmentation parameter
        self.tr_snr = cfg['TD_AUG']['TR_SNR']
        self.ts_snr = cfg['TD_AUG']['TS_SNR']
        self.val_snr = cfg['TD_AUG']['VAL_SNR']
        self.tr_use_bg_aug = cfg['TD_AUG']['TR_BG_AUG']
        self.ts_use_bg_aug = cfg['TD_AUG']['TS_BG_AUG']
        self.val_use_bg_aug = cfg['TD_AUG']['VAL_BG_AUG']
        self.tr_use_ir_aug = cfg['TD_AUG']['TR_IR_AUG']
        self.ts_use_ir_aug = cfg['TD_AUG']['TS_IR_AUG']
        self.val_use_ir_aug = cfg['TD_AUG']['VAL_IR_AUG']
        self.tr_use_speech_aug = cfg['TD_AUG']['TR_SPEECH_AUG']
        self.ts_use_speech_aug = cfg['TD_AUG']['TS_SPEECH_AUG']
        self.val_use_speech_aug = cfg['TD_AUG']['VAL_SPEECH_AUG']

        # Pre-load file paths for augmentation
        self.tr_bg_fps = self.ts_bg_fps = self.val_bg_fps = None
        self.tr_ir_fps = self.ts_ir_fps = self.val_ir_fps = None
        self.tr_speech_fps = self.ts_speech_fps = self.val_speech_fps = None
        self.__set_augmentation_fps()

        # Source (music) file paths
        self.tr_source_fps = self.val_source_fps = None


    def train_ds(self):    
        self.tr_source_fps = sorted(glob.glob(self.filepath, recursive=True))
        
        reduce_items_p = cfg['DATA_SEL']['REDUCE_ITEMS_P']

        ds = genUnbalSequence(
            fns_event_list=self.tr_source_fps,
            bsz=self.tr_batch_sz,
            n_anchor=self.tr_n_anchor, #ex) bsz=40, n_anchor=8: 4 positive samples per anchor
            duration=self.dur,  # duration in seconds
            hop=self.hop,
            fs=self.fs,
            shuffle=True,
            random_offset_anchor=True,
            bg_mix_parameter=[self.tr_use_bg_aug, self.tr_bg_fps, self.tr_snr],
            ir_mix_parameter=[self.tr_use_ir_aug, self.tr_ir_fps],
            speech_mix_parameter=[self.tr_use_speech_aug, self.tr_speech_fps,
                                  self.tr_snr],
            reduce_items_p=reduce_items_p)
        return ds
    
    def __set_augmentation_fps(self):
        """
        Set file path lists:

            If validation set was not available, we replace it with subset of
            the trainset.

        """
        # File lists for Augmentations
        if self.tr_use_bg_aug:
            self.tr_bg_fps = sorted(glob.glob(self.bg_root_dir +
                                              'tr/**/*.wav', recursive=True))
        if self.ts_use_bg_aug:
            self.ts_bg_fps = sorted(glob.glob(self.bg_root_dir +
                                              'ts/**/*.wav', recursive=True))
        if self.val_use_bg_aug:
            self.val_bg_fps = sorted(glob.glob(self.bg_root_dir +
                                               'tr/**/*.wav', recursive=True))

        if self.tr_use_ir_aug:
            self.tr_ir_fps = sorted(
                glob.glob(self.ir_root_dir + 'tr/**/*.wav', recursive=True))
        if self.ts_use_ir_aug:
            self.ts_ir_fps = sorted(
                glob.glob(self.ir_root_dir + 'ts/**/*.wav', recursive=True))
        if self.val_use_ir_aug:
            self.val_ir_fps = sorted(
                glob.glob(self.ir_root_dir + 'tr/**/*.wav', recursive=True))

        if self.tr_use_speech_aug:
            self.tr_speech_fps = sorted(
                glob.glob(self.speech_root_dir + 'train/**/*.wav',
                          recursive=True))
        self.ts_speech_fps = sorted(
            glob.glob(self.speech_root_dir + 'test/**/*.wav',
                      recursive=True))
        if self.val_use_speech_aug:
            self.val_speech_fps = sorted(
                glob.glob(self.speech_root_dir + 'dev/**/*.wav',
                          recursive=True))
        return

In [95]:
data = Dataset('train')
train_data = data.train_ds()

Próximo passo, meter o dados mais visíveis. Para caso eu queira ver uma musica, seja só fazer um print no main.ipynb

## Create the model

In [148]:
from melspectrogram import get_melspec_layer
from specaug_chain import get_specaug_chain_layer
from nnfp import get_fingerprinter

In [149]:
m_pre = get_melspec_layer(cfg, trainable=False)

# m_specaug: spec-augmentation layer.
m_specaug = get_specaug_chain_layer(cfg, trainable=False)

m_fp = get_fingerprinter(cfg, trainable=False)

In [None]:
import numpy as np

In [None]:
from tensorflow import keras

class SimpleDense(keras.layers.Layer): # All Keras layers inherit from the base Layer class.

  def __init__(self, units, activation=None): #construtor
    super().__init__()
    self.units = units
    self.activation = activation

  def build(self, input_shape): # Weight creation takes place in the build() method.
    input_dim = input_shape[-1]
    self.W = self.add_weight(shape=(input_dim, self.units), initializer="random_normal") # add_weight() is a shortcut method for creating weights. It is also possible to create standalone variables and assign them as layer attributes, like self.W = tf.Variable(tf.random.uniform(w_shape)).
    self.b = self.add_weight(shape=(self.units,), initializer="zeros")

  def call(self, inputs): # We define the forward pass computation in the call() method.
    y = tf.matmul(inputs, self.W) + self.b
    if self.activation is not None:
      y = self.activation(y)
    return y

In [153]:
class ConvLayer(tf.keras.layers.Layer):
    """
    Separable convolution layer
    
    Arguments
    ---------
    hidden_ch: (int)
    strides: [(int, int), (int, int)]
    norm: 'layer_norm1d' for normalization on Freq axis. (default)
          'layer_norm2d' for normalization on on FxT space 
          'batch_norm' or else, batch-normalization
    
    Input
    -----
    x: (B,F,T,1)
    
    [Conv1x3]>>[ELU]>>[BN]>>[Conv3x1]>>[ELU]>>[BN]
    
    Output
    ------
    x: (B,F,T,C) with {F=F/stride, T=T/stride, C=hidden_ch}
    
    """
    def __init__(self,
                 hidden_ch=128,
                 strides=[(1,1),(1,1)],
                 norm='layer_norm2d'):
        super(ConvLayer, self).__init__()
        self.conv2d_1x3 = tf.keras.layers.Conv2D(hidden_ch,
                                                 kernel_size=(1, 3),
                                                 strides=strides[0],
                                                 padding='SAME',
                                                 dilation_rate=(1, 1),
                                                 kernel_initializer='glorot_uniform',
                                                 bias_initializer='zeros')
        self.conv2d_3x1 = tf.keras.layers.Conv2D(hidden_ch,
                                                 kernel_size=(3, 1),
                                                 strides=strides[1],
                                                 padding='SAME',
                                                 dilation_rate=(1, 1),
                                                 kernel_initializer='glorot_uniform',
                                                 bias_initializer='zeros')
        
        if norm == 'layer_norm1d':
            self.BN_1x3 = tf.keras.layers.LayerNormalization(axis=-1)
            self.BN_3x1 = tf.keras.layers.LayerNormalization(axis=-1)
        elif norm == 'layer_norm2d':
            self.BN_1x3 = tf.keras.layers.LayerNormalization(axis=(1, 2, 3))
            self.BN_3x1 = tf.keras.layers.LayerNormalization(axis=(1, 2, 3))
        else:
            self.BN_1x3 = tf.keras.layers.BatchNormalization(axis=-1) # Fix axis: 2020 Apr20
            self.BN_3x1 = tf.keras.layers.BatchNormalization(axis=-1)
            
        self.forward = tf.keras.Sequential([self.conv2d_1x3,
                                            tf.keras.layers.ELU(),
                                            self.BN_1x3,
                                            self.conv2d_3x1,
                                            tf.keras.layers.ELU(),
                                            self.BN_3x1
                                            ])

       
    def call(self, x):
        return self.forward(x)


class DivEncLayer(tf.keras.layers.Layer):
    """
    Multi-head projection a.k.a. 'divide and encode' layer:
        
    • The concept of 'divide and encode' was discovered  in Lai et.al.,
     'Simultaneous Feature Learning and Hash Coding with Deep Neural Networks',
      2015. https://arxiv.org/abs/1504.03410
    • It was also adopted in Gfeller et.al. 'Now Playing: Continuo-
      us low-power music recognition', 2017. https://arxiv.org/abs/1711.10958
    
    Arguments
    ---------
    q: (int) number of slices as 'slice_length = input_dim / q'
    unit_dim: [(int), (int)]
    norm: 'layer_norm1d' or 'layer_norm2d' uses 1D-layer normalization on the feature.
          'batch_norm' or else uses batch normalization. Default is 'layer_norm2d'.

    Input
    -----
    x: (B,1,1,C)
    
    Returns
    -------
    emb: (B,Q)
    
    """
    def __init__(self, q=128, unit_dim=[32, 1], norm='batch_norm'):
        super(DivEncLayer, self).__init__()

        self.q = q
        self.unit_dim = unit_dim
        self.norm = norm
        
        if norm in ['layer_norm1d', 'layer_norm2d']:
            self.BN = [tf.keras.layers.LayerNormalization(axis=-1) for i in range(q)]
        else:
            self.BN = [tf.keras.layers.BatchNormalization(axis=-1) for i in range(q)]
            
        self.split_fc_layers = self._construct_layers() 


    def build(self, input_shape):
        # Prepare output embedding variable for dynamic batch-size 
        self.slice_length = int(input_shape[-1] / self.q)

 
    def _construct_layers(self):
        layers = list()
        for i in range(self.q): # q: num_slices
            layers.append(tf.keras.Sequential([tf.keras.layers.Dense(self.unit_dim[0], activation='elu'),
                                               #self.BN[i],
                                               tf.keras.layers.Dense(self.unit_dim[1])]))
        return layers

 
    @tf.function
    def _split_encoding(self, x_slices):
        """
        Input: (B,Q,S)
        Returns: (B,Q)
        
        """
        out = list()
        for i in range(self.q):
            out.append(self.split_fc_layers[i](x_slices[:, i, :]))
        return tf.concat(out, axis=1)

    
    def call(self, x): # x: (B,1,1,2048)
        x = tf.reshape(x, shape=[x.shape[0], self.q, -1]) # (B,Q,S); Q=num_slices; S=slice length; (B,128,8 or 16)
        return self._split_encoding(x)

In [158]:
class FingerPrinter(tf.keras.Model):
    """
    Fingerprinter: 'Neural Audio Fingerprint for High-specific Audio Retrieval
        based on Contrastive Learning', https://arxiv.org/abs/2010.11910
    
    IN >> [Convlayer]x8 >> [DivEncLayer] >> [L2Normalizer] >> OUT 
    
    Arguments
    ---------
    input_shape: tuple (int), not including the batch size
    front_hidden_ch: (list)
    front_strides: (list)
    emb_sz: (int) default=128
    fc_unit_dim: (list) default=[32,1]
    norm: 'layer_norm1d' for normalization on Freq axis. 
          'layer_norm2d' for normalization on on FxT space (default).
          'batch_norm' or else, batch-normalization.
    use_L2layer: True (default)
    
    • Note: batch-normalization will not work properly with TPUs.
                    
    
    Input
    -----
    x: (B,F,T,1)
    
        
    Returns
    -------
    emb: (B,Q) 
    
    """
    def __init__(self,
                 input_shape=(256,32,1),
                 front_hidden_ch=[128, 128, 256, 256, 512, 512, 1024, 1024],
                 front_strides=[[(1,2), (2,1)], [(1,2), (2,1)],
                                [(1,2), (2,1)], [(1,2), (2,1)],
                                [(1,1), (2,1)], [(1,2), (2,1)],
                                [(1,1), (2,1)], [(1,2), (2,1)]],
                 emb_sz=128, # q
                 fc_unit_dim=[32,1],
                 norm='layer_norm2d',
                 use_L2layer=True):
        super(FingerPrinter, self).__init__()
        self.front_hidden_ch = front_hidden_ch
        self.front_strides = front_strides
        self.emb_sz=emb_sz
        self.norm = norm
        self.use_L2layer = use_L2layer
        
        self.n_clayers = len(front_strides)
        self.front_conv = tf.keras.Sequential(name='ConvLayers')
        if ((front_hidden_ch[-1] % emb_sz) != 0):
            front_hidden_ch[-1] = ((front_hidden_ch[-1]//emb_sz) + 1) * emb_sz                
        
        # Front (sep-)conv layers
        #x = tf.zeros((1,)+ input_shape, dtype=tf.float32)
        #print(f"ConvLayer entrada: {self.front_conv(x).shape}")
        for i in range(self.n_clayers):
            self.front_conv.add(ConvLayer(hidden_ch=front_hidden_ch[i],
                strides=front_strides[i], norm=norm))
            #print(f"ConvLayer {i+1}: {self.front_conv(x).shape}")
        self.front_conv.add(tf.keras.layers.Flatten()) # (B,F',T',C) >> (B,D)
            
        # Divide & Encoder layer
        self.div_enc = DivEncLayer(q=emb_sz, unit_dim=fc_unit_dim, norm=norm)

        
    @tf.function
    def call(self, inputs):
        x = self.front_conv(inputs) # (B,D) with D = (T/2^4) x last_hidden_ch
        x = self.div_enc(x) # (B,Q)
        if self.use_L2layer:
            return tf.math.l2_normalize(x, axis=1) 
        else:
            return x

In [159]:
input_shape = (256, 32, 1) 
emb_sz = cfg['MODEL']['EMB_SZ']
norm = cfg['MODEL']['BN']
fc_unit_dim = [32, 1]

m = FingerPrinter(input_shape=input_shape,
                    emb_sz=emb_sz,
                    fc_unit_dim=fc_unit_dim,
                    norm=norm)
m.trainable = True

In [162]:
from NTxent_loss_single_gpu import NTxentLoss

In [163]:
total_nsteps = cfg['TRAIN']['MAX_EPOCH'] * len(train_data)

lr_schedule = tf.keras.experimental.CosineDecay(
            initial_learning_rate=float(cfg['TRAIN']['LR']),
            decay_steps=total_nsteps,
            alpha=1e-06)

opt = tf.keras.optimizers.Adam(learning_rate=lr_schedule)

loss = NTxentLoss(
    n_org=cfg['BSZ']['TR_N_ANCHOR'],
    n_rep=cfg['BSZ']['TR_BATCH_SZ'] - cfg['BSZ']['TR_N_ANCHOR'],
    tau=cfg['LOSS']['TAU'])

In [165]:
m.compile(optimizer=opt, loss=loss, metrics=['accuracy', 'precision ', 'recall ', 'f1_score'])

In [169]:
history = m_fp.fit(train_data, train_labels, epochs=1, batch_size = 120)

TypeError: unsupported operand type(s) for *: 'slice' and 'int'

In [None]:
# Use fit() to train the model, optionally providing validation data to monitor performance on unseen data.
model.fit(train_images, train_labels, epochs=3, validation_data=(val_images, val_labels))

In [None]:
m.summary()

In [None]:
from tensorflow.keras.datasets import mnist

def get_mnist_model(): # Create a model (we factor this into a separate function so as to reuse it later).
  inputs = keras.Input(shape=(28 * 28,))
  FingerPrinter(input_shape=input_shape,
                      emb_sz=emb_sz,
                      fc_unit_dim=fc_unit_dim,
                      norm=norm)

  model = keras.Model(inputs, outputs)
  return model



(images, labels), (test_images, test_labels) = mnist.load_data() # Load your data, reserving some for validation.
images = images.reshape((60000, 28 * 28)).astype("float32") / 255
test_images = test_images.reshape((10000, 28 * 28)).astype("float32") / 255
train_images, val_images = images[10000:], images[:10000]
train_labels, val_labels = labels[10000:], labels[:10000]


model = get_mnist_model()

# Compile the model by specifying its optimizer, the loss function to minimize, and the metrics to monitor.
model.compile(optimizer="rmsprop", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

# Use fit() to train the model, optionally providing validation data to monitor performance on unseen data.
model.fit(train_images, train_labels, epochs=3, validation_data=(val_images, val_labels))


test_metrics = model.evaluate(test_images, test_labels) # Use evaluate() to compute the loss and metrics on new data.
predictions = model.predict(test_images) # Use predict() to compute classification probabilities on new data.

### Compile the model

In [132]:
from NTxent_loss_single_gpu import NTxentLoss

In [133]:
total_nsteps = cfg['TRAIN']['MAX_EPOCH'] * len(train_data)

lr_schedule = tf.keras.experimental.CosineDecay(
            initial_learning_rate=float(cfg['TRAIN']['LR']),
            decay_steps=total_nsteps,
            alpha=1e-06)

opt = tf.keras.optimizers.Adam(learning_rate=lr_schedule)

loss = NTxentLoss(
    n_org=cfg['BSZ']['TR_N_ANCHOR'],
    n_rep=cfg['BSZ']['TR_BATCH_SZ'] - cfg['BSZ']['TR_N_ANCHOR'],
    tau=cfg['LOSS']['TAU'])

In [None]:
history.history

 ## step3: criterion and optimizer

In [None]:
# Training loop
ep_max = cfg['TRAIN']['MAX_EPOCH']
for ep in range(ep_max + 1):
    tf.print(f'EPOCH: {ep}/{ep_max}')

    # Train
    """ Parallelism to speed up preprocessing.............. """
    train_ds = dataset.get_train_ds(cfg['DATA_SEL']['REDUCE_ITEMS_P'])
    progbar = Progbar(len(train_ds))
    enq = tf.keras.utils.OrderedEnqueuer(
        train_ds, use_multiprocessing=True, shuffle=train_ds.shuffle)
    enq.start(workers=cfg['DEVICE']['CPU_N_WORKERS'],
                max_queue_size=cfg['DEVICE']['CPU_MAX_QUEUE'])
    i = 0
    while i < len(enq.sequence):
        X = next(enq.get()) # X: Tuple(Xa, Xp)
        avg_loss, sim_mtx = train_step(X, m_pre, m_specaug, m_fp,
                                        loss_obj_train, lr_schedule)
        progbar.add(1, values=[("tr loss", avg_loss)])
        i += 1
    enq.stop()
    """ End of Parallelism................................. """

## Train

In [85]:
def train_step(X, m_pre, m_specaug, m_fp, loss_obj, learning_rate):
    """ Train step """
    # X: (Xa, Xp)
    # Xa: anchors or originals, s.t. [xa_0, xa_1,...]
    # Xp: augmented replicas, s.t. [xp_0, xp_1] with xp_n = rand_aug(xa_n).
    n_anchors = len(X[0])
    X = tf.concat(X, axis=0)
    feat = m_specaug(m_pre(X))  # (nA+nP, F, T, 1)
    m_fp.trainable = True
    with tf.GradientTape() as tape:
        emb = m_fp(feat)  # (BSZ, Dim)
        loss, sim_mtx, _ = loss_obj.compute_loss(
            emb[:n_anchors, :], emb[n_anchors:, :]) # {emb_org, emb_rep}
    #g = t.gradient(loss, m_fp.trainable_variables)
    #helper.optimizer.apply_gradients(zip(g, m_fp.trainable_variables))
    
    grad_loss_wrt_m_fp = tape.gradient(loss, [m_fp]) # Retrieve the gradient of the loss with regard to weights.
    m_fp.assign_sub(grad_loss_wrt_m_fp * learning_rate) # Update the weights.

    #... # To tensorboard.
    return loss#, sim_mtx # avg_loss: average within the current epoch


### mini-batch?

In [107]:
def build_fp(cfg):
    """ Build fingerprinter """
    # m_pre: log-power-Mel-spectrogram layer, S.
    m_pre = get_melspec_layer(cfg, trainable=False)

    # m_specaug: spec-augmentation layer.
    m_specaug = get_specaug_chain_layer(cfg, trainable=False)
    assert(m_specaug.bypass==False) # Detachable by setting m_specaug.bypass.

    # m_fp: fingerprinter g(f(.)).
    m_fp = get_fingerprinter(cfg, trainable=False)
    return m_pre, m_specaug, m_fp

In [108]:
m_pre, m_specaug, m_fp = build_fp(cfg)

In [109]:
enq = tf.keras.utils.OrderedEnqueuer(
            train_data, use_multiprocessing=True, shuffle=train_data.shuffle)

enq.start(workers=cfg['DEVICE']['CPU_N_WORKERS'],
            max_queue_size=cfg['DEVICE']['CPU_MAX_QUEUE'])

In [110]:
i = 0
while i < len(enq.sequence):
  X = next(enq.get())
  loss = train_step(X, m_pre, m_specaug, m_fp, loss_obj_train, lr_schedule)
  print(f"Loss at step {step}: {loss:.4f}")

2024-04-25 15:26:10.289903: W tensorflow/core/grappler/optimizers/loop_optimizer.cc:933] Skipping loop optimization for Merge node with control input: sequential_137/spec_n_cutout_3/StatefulPartitionedCall/cond/branch_executed/_104


AttributeError: 'FingerPrinter' object has no attribute '_id'

In [None]:
def training_step(inputs, targets):
  with tf.GradientTape() as tape: # Forward pass, inside a gradient tape scope
    predictions = model(inputs) # Forward pass, inside a gradient tape scope
    loss = square_loss(predictions, targets) # Forward pass, inside a gradient tape scope
  grad_loss_wrt_W, grad_loss_wrt_b = tape.gradient(loss, [W, b]) # Retrieve the gradient of the loss with regard to weights.
  W.assign_sub(grad_loss_wrt_W * learning_rate) # Update the weights.
  b.assign_sub(grad_loss_wrt_b * learning_rate) # Update the weights.
  return loss

In [180]:
import tensorflow
from tensorflow import keras
import tensorflow as tf

In [184]:
class Linear(keras.layers.Layer):
    def __init__(self, units=32):
        super().__init__()
        self.units = units

    def build(self, input_shape):
        self.w = self.add_weight(
            shape=(input_shape[-1], self.units),
            initializer="random_normal",
            trainable=True,
        )
        self.b = self.add_weight(
            shape=(self.units,), initializer="random_normal", trainable=True
        )

    def call(self, inputs):
        return tf.matmul(inputs, self.w) + self.b

In [185]:
class MLPBlock(keras.layers.Layer):
    def __init__(self):
        super().__init__()
        self.linear_1 = Linear(32)
        self.linear_2 = Linear(32)
        self.linear_3 = Linear(1)

    def call(self, inputs):
        x = self.linear_1(inputs)
        x = tf.nn.relu(x)
        x = self.linear_2(x)
        x = tf.nn.relu(x)
        return self.linear_3(x)

        
mlp = MLPBlock()
y = mlp(tf.ones(shape=(3, 64)))  # The first call to the `mlp` will create the weights
print("weights:", len(mlp.weights))
print("trainable weights:", len(mlp.trainable_weights))

weights: 6
trainable weights: 6


In [None]:
mlp.summary()

In [187]:
# Define a classe Linear
class Linear(tf.keras.layers.Layer):
    def __init__(self, units):
        super(Linear, self).__init__()
        self.units = units
    def build(self, input_shape):
        self.w = self.add_weight(shape=(input_shape[-1], self.units),
                                 initializer='random_normal',
                                 trainable=True)
        self.b = self.add_weight(shape=(self.units,),
                                 initializer='zeros',
                                 trainable=True)
    def call(self, inputs):
        return tf.matmul(inputs, self.w) + self.b

# Define a função que cria o modelo MLP usando a Sequential API
def create_mlp_model():
    model = tf.keras.Sequential([
        Linear(32),
        tf.keras.layers.Activation('relu'),
        Linear(32),
        tf.keras.layers.Activation('relu'),
        Linear(1)
    ])
    return model

# Crie o modelo MLP
mlp_model = create_mlp_model()

# Execute uma passagem de encaminhamento para construir os pesos
y = mlp_model(tf.ones(shape=(3, 64)))

# Imprima o número de pesos e pesos treináveis no modelo
print("weights:", len(mlp_model.weights))
print("trainable weights:", len(mlp_model.trainable_weights))

weights: 6
trainable weights: 6


In [188]:
mlp_model.summary()

Model: "sequential_960"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 linear_15 (Linear)          (3, 32)                   2080      
                                                                 
 activation_6 (Activation)   (3, 32)                   0         
                                                                 
 linear_16 (Linear)          (3, 32)                   1056      
                                                                 
 activation_7 (Activation)   (3, 32)                   0         
                                                                 
 linear_17 (Linear)          (3, 1)                    33        
                                                                 
Total params: 3169 (12.38 KB)
Trainable params: 3169 (12.38 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
