Same as [`model_pipeline.ipynb`](model_pipeline.ipynb), but experimental since we want to try and do things more efficiently

In [1]:
# from transformers import GPT2Config, TFGPT2Model
import tensorflow as tf
import numpy as np
import os

physical_devices = tf.config.list_physical_devices('GPU')
for physical_device in physical_devices:
  try:
    tf.config.experimental.set_memory_growth(physical_device, True)
  except:
    # Invalid device or cannot modify virtual devices once initialized.
    pass

import utils
import config

ROOT_PATH = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
conf = config.Config("single_instruments_type", ROOT_PATH)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# dataset = tf.data.Dataset.load(conf.lmda_genres_tf_data_path)   \
dataset = tf.data.Dataset.load(conf.tf_data_path)               \
    .cache()                                                    \
    .shuffle(conf.SHUFFLE_SIZE)                                 \
    .batch(conf.BATCH_SIZE)                                     \
    .prefetch(conf.PREFETCH_SIZE)                                                 

In [3]:
song_batch = next(dataset.take(1).as_numpy_iterator())[0][:, :conf.SEQ_LEN-1, :]
print("Song_shape: {}\n".format(song_batch.shape))

decoder_output = tf.random.uniform((conf.BATCH_SIZE, conf.SEQ_LEN, conf.TOKEN_DIM), minval=-1, maxval=1)
print(f"Decoder output shape: {decoder_output.shape}\n")

out_logits = [layer(decoder_output) for layer in conf.output_dense_layers]
for i, out_logit_part in enumerate(out_logits):
    print(f"Output logit #{i}: {out_logit_part.shape}")

Song_shape: (2, 1023, 11)

Decoder output shape: (2, 1024, 512)

Output logit #0: (2, 1024, 8)
Output logit #1: (2, 1024, 256)
Output logit #2: (2, 1024, 131)
Output logit #3: (2, 1024, 128)
Output logit #4: (2, 1024, 136)
Output logit #5: (2, 1024, 256)
Output logit #6: (2, 1024, 129)
Output logit #7: (2, 1024, 128)
Output logit #8: (2, 1024, 25)
Output logit #9: (2, 1024, 153)
Output logit #10: (2, 1024, 49)


In [4]:
class MaskTypeProbabilitiesLayer(tf.keras.layers.Layer):
    def __init__(self, trainable=False, name=None, dtype=None, dynamic=False, **kwargs):
        super().__init__(trainable, name, dtype, dynamic, **kwargs)

    @tf.function
    def create_mask(self, inputs):
        batch_gt_types = inputs
        mask = tf.TensorArray(tf.bool, size=conf.SEQ_LEN)
        mask = mask.write(0, tf.constant([True, False, False, False, False, False, False, False], dtype=tf.bool))
        for i in tf.range(conf.SEQ_LEN-1):
            token_type = batch_gt_types[i]
            if token_type == 0: # only start of song token: cannot be anything else than instrument choice (1)
                type_mask = tf.constant([False, True, False, False, False, False, False, False], dtype=tf.bool)
            elif token_type == 1: # we reached instrument choice: cannot be anything else than instrument choice (1) or start of events (2)
                type_mask = tf.constant([False, True, True, False, False, False, False, False], dtype=tf.bool)
            elif token_type >= 2 and token_type < 7: # we reached start of events or notes
                type_mask = tf.constant([False, False, False, True, True, True, True, True], dtype=tf.bool)
            elif token_type == 7: # at the end of the song we can ONLY GUESS "000000000" TODO: change ending token to type 7s -> 7000000000
                type_mask = tf.constant([True, False, False, False, False, False, False, False], dtype=tf.bool)
            else:
                # ERROR. Define a random type mask so that it's defined in all branches for tf.function
                type_mask = tf.constant([False, False, False, False, False, False, False, False], dtype=tf.bool)
            mask = mask.write(i+1, type_mask)
        return mask.stack()

    def call(self, inputs, training=True):
        '''
        Takes as input the ground truth song (at training time) or the logits (at testing time) 
        and computes a mask for the type probabilities.
        '''
        if training:
            # Use the groundtruth song as a target
            song        = inputs
            gt_types    = song[:,:,0]       # Get the token types from the song (batch_size x seq_len-1)
            # Iterate over the batch to collect the appropriate masks from the song
            masks = tf.map_fn(fn=self.create_mask, 
                elems=gt_types, 
                fn_output_signature=tf.TensorSpec(
                    (conf.SEQ_LEN, conf.INPUT_RANGES['type']), 
                    dtype=tf.bool)
            )
            return masks
        else:
            # Compute the types and their masks one by one based on the type chosen at the previous iteration
            # TODO: implement this branch
            pass

mask_probabilities = MaskTypeProbabilitiesLayer()(song_batch, training=True)
mask_probabilities.shape

TensorShape([2, 1024, 8])

In [5]:
# With these masks we can compute the probabilities for the token types
activations = [tf.keras.layers.Softmax()]*len(conf.INPUT_RANGES)

types_probabilities = activations[0](out_logits[0], mask_probabilities) # (last out logit predicts a token that's out of bound in our sequence)
types_probabilities[0, :5]

<tf.Tensor: shape=(5, 8), dtype=float32, numpy=
array([[1.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        ],
       [0.        , 1.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        ],
       [0.        , 0.7258223 , 0.27417767, 0.        , 0.        ,
        0.        , 0.        , 0.        ],
       [0.        , 0.6820952 , 0.31790477, 0.        , 0.        ,
        0.        , 0.        , 0.        ],
       [0.        , 0.2189493 , 0.7810507 , 0.        , 0.        ,
        0.        , 0.        , 0.        ]], dtype=float32)>

In [39]:
# Now the second part of the layer: given the type probabilities, compute the other constraints
class MaskingActivationLayer(tf.keras.layers.Layer):
    def __init__(self, trainable=False, name=None, dtype=None, dynamic=False, **kwargs):
        super().__init__(trainable, name, dtype, dynamic, **kwargs)
        self.default_mask = conf.default_mask
        self.full_mask    = conf.full_mask
        self._numerators  = conf.numerators
        self._tot_numerators = conf.tot_numerators

    @tf.function
    def get_max_beat_from_time_sign(self, time_sign):
        '''
        Since the time sign is defined (in utils.time_sign_map()) as: 
            conf.numerators.index(time_sign[0]) + conf.denominators.index(time_sign[1])*conf.tot_numerators

        to retrieve the NUMERATOR of the time_sign given the index you need to divide by conf.tot_numerators and take the rest of the division
        that gives you the index of the corresponding numerator in conf.numerators
        then you use gather or, more simply, a slice to get the actual value of the numerator

        You then subtract 1 because the beat is in [0, numerator)
        '''
        idx = tf.math.floormod(time_sign, self._tot_numerators)
        return tf.gather(self._numerators, idx) - 1

    @tf.function
    def get_mask_for_all_tokens(self, inputs): 
        '''
        Inputs:
        - chosen_types:         (SEQ_LEN-1)*1
        - song_tokens:          (SEQ_LEN-1)*11
        - seq_scores:           (SEQ_LEN-1)*1391

        Returns a list of ndarrays of bool type used for masking
        Inputs are for a SINGLE ELEMENT OF A BATCH of size SEQ_LEN*(1+11+1391) where 1391 is the summed length of logits (minus the type)
        '''
        # Collect inputs from longer tensor
        chosen_types, song_tokens, seq_scores = inputs
        chosen_types = tf.cast(chosen_types, dtype=tf.int32)
        song_tokens  = tf.cast(song_tokens , dtype=tf.int32)
        seq_scores   = tf.cast(seq_scores  , dtype=tf.int32)
        # Indexes
        index_tensor = tf.range(conf.SEQ_LEN-1, dtype=tf.int32)
        # Define mask (output) using a TensorArray
        mask = tf.TensorArray(dtype=tf.bool, size=conf.SEQ_LEN-1)
        # Iterate over the indexes
        for idx in index_tensor:
            ## SETUP ##
            # Define the default variables and flags
            default_token_parts   = [True]*(len(conf.INPUT_RANGES)-1)
            default_flag          = False
            min_measure           = tf.constant(-1, dtype=tf.int32)
            min_beat              = tf.constant(-1, dtype=tf.int32)
            min_position          = tf.constant(-1, dtype=tf.int32)
            allowed_instruments   = tf.constant([0]*conf.INPUT_RANGES["instrument"], dtype=tf.int32)
            allowed_key_sign      = tf.constant(-1, dtype=tf.int32)
            allowed_time_sign     = tf.constant(-1, dtype=tf.int32)
            allowed_tempo         = tf.constant(-1, dtype=tf.int32)
            forbidden_instruments_flag = False
            forbidden_instruments = tf.constant([0]*conf.INPUT_RANGES["instrument"], dtype=tf.int32)
            forbidden_key_sign    = tf.constant(-1, dtype=tf.int32)
            forbidden_time_sign   = tf.constant(-1, dtype=tf.int32)
            forbidden_tempo       = tf.constant(-1, dtype=tf.int32)
            # Define the inputs
            chosen_type = chosen_types[idx]
            scores      = seq_scores[idx]
            song        = song_tokens * (tf.expand_dims([1]*idx + [0]*(conf.SEQ_LEN-1-idx), axis=-1)) # Mask all tokens after index idx
            ## MAIN BODY ##
            if chosen_type == 0 or chosen_type == 2: # TODO: change 0s to 7s at the end of the song
                # Original comments: 
                # only way it chooses 0 is that max_type==7 --> AFTER END OF SONG --> only thing the model can do is guess all zeros
                # "does not have to learn nothing" --> it's all zeros just like the padding tensors
                default_token_parts = [True, True, True, True, True, True, True, True, True, True]
                default_flag = True
            elif chosen_type == 1: # Instrument selection, false only for type and instrument type (the ones that you can choose)
                # TODO: this function was GREATLY changed. Is it ok?    
                if tf.size(tf.where(song[:idx, 0] == 1)[:,0]) == 0:
                    # Choice of first instrument
                    default_token_parts = [True, True, True, True, True, False, True, True, True, True]  # TODO: Element 6 should not be default = True right?
                    default_flag = True
                else:
                    # TODO: Original is this:
                    # forbidden_instruments, _ = tf.unique(tf.gather(
                    #     song[:, 6],
                    #     tf.squeeze(tf.where(song[:, 0] == 2))           
                    # ))
                    # I don't think it does what it's supposed to. 
                    # Forbidden instruments should be a 1D tensor of all previously defined instruments, right?
                    # Instruments are defined with type 1, right?
                    forbidden_instruments, _ = tf.unique(tf.gather(
                        song[:idx, 6], 
                        tf.where(song[:idx, 0] == 1)[:,0]        # Cast to 1D array
                    ))
                    forbidden_instruments_flag = True
            elif chosen_type == 3: # Notes: They have the same key_sign, time_sign and tempo as last previous event, everything has to be manually decided
                min_measure = song[idx, 1]   # It has to be >= than the last measure
                # If in the MEASURE SCORES the MAX SCORE between all possible measures == min_measure, the measure is min_measure.
                # In this case, we need to make sure that beat >= last_beat
                # TODO: I changed this. Is it okay? We are trying to get the part of the score that's between 0 and 256 right? (there is no type in my scores)
                if tf.math.argmax(
                    scores[:conf.INPUT_RANGES["measure"]], 
                        output_type=tf.int32) == min_measure:  
                    min_beat = song[idx,2]      # It has to be >= than the last beat when measure is the same
                    if tf.math.argmax(scores[
                        conf.INPUT_RANGES["measure"] : 
                        conf.INPUT_RANGES["measure"] + conf.INPUT_RANGES["beat"]], 
                        output_type=tf.int32) == min_beat:
                        min_position = song[idx,3]  # It has to be >= than the last position (if beat and measure are the same)
                    else:
                        min_position = tf.constant(0, dtype=tf.int32)
                else:
                    min_beat = tf.constant(0, dtype=tf.int32)
                    min_position = tf.constant(0, dtype=tf.int32)
                # Only some instruments, key signs, time signs and tempos are allowed for these events: 
                # - for instruments, the allowed ones are the ones that have been defined previously with type = 1
                # - for the others, the allowed ones are the ones that are collected right before the note from event types 4, 5 and 6
                allowed_instruments, _ = tf.unique(tf.gather(
                    song[:idx, 6], 
                    tf.where(song[:idx, 0] == 1)[:,0]
                ))
                # TODO: There are cases where there is not a LAST key_sign/time_sign, ...
                # If the model chooses 3, we cannot be certain that there is at least a 4, 5 or 6 before it
                # In these cases we use the default masks
                allowed_key_signs = tf.gather(
                    song[:idx, 8], 
                    tf.where(song[:idx, 0] == 4)[:,0]) # if type == 4 --> read the LAST key_sign
                if tf.size(allowed_key_signs) > 0:
                    allowed_key_sign = allowed_key_signs[-1]
                # TODO: else?
                allowed_time_signs = tf.gather(
                    song[:idx, 9], 
                    tf.where(song[:idx, 0] == 5)[:,0]) # if type == 5 --> read the LAST time_sign
                if tf.size(allowed_time_signs) > 0:
                    allowed_time_sign = allowed_time_signs[-1]
                # TODO: else?
                allowed_tempos = tf.gather(
                    song[:idx, 10], 
                    tf.where(song[:idx, 0] == 6)[:,0]) # if type == 6 --> read the LAST tempo
                if tf.size(allowed_tempos) > 0:
                    allowed_tempo = allowed_tempos[-1]
                # TODO: else?
            elif chosen_type >= 4 and chosen_type <= 6:     # key_sign, time_sign, tempo
                # If last event is at the beginning of a measure, you can add an event at the same time
                if song[idx, 3] == 0 and song[idx, 2] == 0:  # if beat and position == 0, the event can be at this measure
                    min_measure = song[idx, 1]
                else:
                    min_measure = song[idx, 1] + 1                   # otherwise it goes to the next measure
                # Fine-grain checks
                # TODO: As before, there are cases where there is not a LAST key_sign/time_sign. 
                # In these cases we should use the default masks.
                if chosen_type == 4:
                    # Cannot put the same key_sign again
                    forbidden_key_signs = tf.gather(
                        song[:idx, 8], 
                        tf.where(song[:idx, 0] == 4)[:,0]) # if type == 4 --> read the LAST key_sign
                    if tf.size(forbidden_key_signs) > 0:
                        forbidden_key_sign = forbidden_key_signs[-1]
                elif chosen_type == 5:
                    # Cannot put the same time_sign again
                    forbidden_time_signs = tf.gather(
                        song[:idx, 9], 
                        tf.where(song[:idx, 0] == 5)[:,0]) # if type == 5 --> read the LAST time_sign
                    if tf.size(forbidden_time_signs) > 0:
                        forbidden_time_sign = forbidden_time_signs[-1]
                elif chosen_type == 6:
                    # Cannot put the same tempo again
                    forbidden_tempos = tf.gather(
                        song[:idx, 10], 
                        tf.where(song[:idx, 0] == 6)[:,0]) # if type == 6 --> read the LAST tempo
                    if tf.size(forbidden_tempos) > 9:
                        forbidden_tempo = forbidden_tempos[-1]
            elif chosen_type == 7: # end of song --> only type can be chosen, all the others are default
                default_token_parts = [True, True, True, True, True, True, True, True, True, True]
                default_flag = True


            ## ENDING PART ##
            # Put together the masks
            if default_flag: 
                # No manual masking required, either "can freely choose this part of the token" (True) or 
                # "can only choose default for this part of the token" (False)
                mask.write(idx, tf.concat(
                    # Default mask only allows to predict a 0
                    # Full mask allows to predict any value
                    [self.default_mask[i] if default_token_parts[i] else self.full_mask[i] 
                        for i in range(len(default_token_parts))], axis=-1)
                )
            else: 
                # We need to do manual masking. Define all tensors
                measure_mask     = self.default_mask[0]
                beat_mask        = self.default_mask[1]
                position_mask    = self.default_mask[2]
                duration_mask    = self.default_mask[3]
                pitch_mask       = self.default_mask[4]
                instruments_mask = self.default_mask[5]
                velocity_mask    = self.default_mask[6]
                key_sign_mask    = self.default_mask[7]
                time_sign_mask   = self.default_mask[8]
                tempo_mask       = self.default_mask[9]

                ## maybe it's better to repeat the same logic that is present in the first part of the code 
                ## just to avoid skipping any strange case
                
                if not forbidden_instruments_flag:
                    # TODO: I didn't understand this comment ## nemmeno io
                    # Measure mask, beat and position go to default if type==2 and forbidden_instruments_flag == True
                    # so if forbidden_instruments_flag == False --> you can change it
                    measure_mask = tf.cast(
                        tf.concat([
                            tf.repeat([False], min_measure),        # Can be equal to or greater than min_measure
                            tf.repeat([True],  conf.INPUT_RANGES["measure"]-min_measure)], 
                            axis=-1),
                        dtype=tf.dtypes.bool)
                    if min_beat != -1:
                        # oss: allowed_time_sign is always != None if min_beat != None
                        # TODO: Did not understand this function ## explained inside the func
                        max_beat = self.get_max_beat_from_time_sign(allowed_time_sign)
                        # allowed beats are only AFTER previous beat and BEFORE max_beat from the numerator of the time_sign
                        beat_mask = tf.cast(tf.concat([
                            tf.repeat([False], min_beat),
                            tf.repeat([True],  max_beat-min_beat), 
                            tf.repeat([False], conf.INPUT_RANGES["beat"]-max_beat)],
                            axis=-1), 
                        dtype=tf.dtypes.bool)
                    if min_position != -1:
                        position_mask = tf.cast(tf.concat([
                            tf.repeat([False], min_position), 
                            tf.repeat([True],  conf.INPUT_RANGES["position"]-min_position)],
                            axis=-1), 
                        dtype=tf.dtypes.bool)

                else:
                    instruments_mask = tf.sparse.SparseTensor(  # Forbidden instruments
                        indices= tf.expand_dims(tf.cast(forbidden_instruments, tf.int64), axis=-1),
                        values = tf.zeros_like(forbidden_instruments),
                        dense_shape=[conf.INPUT_RANGES["instrument"]]
                    )
                    instruments_mask = tf.cast(
                        tf.sparse.to_dense(tf.sparse.reorder(instruments_mask), default_value=1), 
                        dtype=tf.dtypes.bool)
                    # FORBIDDEN INSTRUMENTS is ONLY USED WHEN type==1 --> measure_mask, beat, position are all default
                
                if chosen_type==3:
                    # Mask that's true only for defined instruments
                    instruments_mask = tf.sparse.SparseTensor( # Allowed instruments
                        indices=tf.expand_dims(tf.cast(allowed_instruments, tf.int64), axis=-1),
                        values=tf.ones_like(allowed_instruments),               # TODO: this was zeros_like. Shouldn't it be inverted tho?
                        dense_shape=[conf.INPUT_RANGES["instrument"]]
                    )
                    instruments_mask = tf.cast(
                        tf.sparse.to_dense(tf.sparse.reorder(instruments_mask), default_value=1), # TODO: the default value was 0. Shouldn't it be 1 tho?
                        dtype=tf.dtypes.bool)
                    # TODO: I think this part should be indented like this
                    # Deal with key signs and time signs
                    if allowed_key_sign != -1:
                        key_sign_mask = tf.convert_to_tensor([
                            i == allowed_key_sign 
                            for i in range(conf.INPUT_RANGES["key_sign"])], 
                            dtype=tf.bool)
                    elif forbidden_key_sign != -1: ## forbidden_key_sign can only appear if chosen_type = 4!
                        # Inverse
                        key_sign_mask = tf.convert_to_tensor([
                            i != forbidden_key_sign 
                            for i in range(conf.INPUT_RANGES["key_sign"])], 
                            dtype=tf.bool)
                    else: 
                        pass
                    if allowed_time_sign != -1:
                        time_sign_mask = tf.convert_to_tensor([
                            i == allowed_time_sign 
                            for i in range(conf.INPUT_RANGES["time_sign"])], 
                            dtype=tf.bool)
                    elif forbidden_time_sign != -1: ## forbidden_time_sign can only appear if chosen_type = 5!
                            time_sign_mask = tf.convert_to_tensor([
                                i != forbidden_time_sign 
                                for i in range(conf.INPUT_RANGES["time_sign"])], 
                                dtype=tf.bool)
                    else:
                        pass
                    if allowed_tempo != -1:
                        tempo_mask = tf.convert_to_tensor([
                            i == allowed_tempo 
                            for i in range(conf.INPUT_RANGES["tempo"])], 
                            dtype=tf.bool)
                    elif forbidden_tempo != -1: ## forbidden_tempo can only appear if chosen_type = 6!
                            tempo_mask = tf.convert_to_tensor([
                                i != forbidden_tempo 
                                for i in range(conf.INPUT_RANGES["tempo"])], 
                                dtype=tf.bool)
                    else:
                        pass

                # Write on the mask
                mask.write(idx, tf.concat([
                    measure_mask, beat_mask, position_mask, duration_mask,
                    pitch_mask, instruments_mask, velocity_mask, key_sign_mask,
                    time_sign_mask, tempo_mask], axis=-1))
        # Return the whole mask
        return mask.stack()

    def call(self, inputs, training=True):
        '''
        Inputs:
        - songs:                BATCH*(SEQ_LEN-1)*11
        - out_logits:           BATCH*(SEQ_LEN-1)*1391 (all except type)
        - types_probabilities:  BATCH*(SEQ_LEN-1)*8 --> becomes chosen_types through argmax --> BATCH*(SEQ_LEN-1)*1

        passes through map_fn --> get_mask_fro_all_tokens to debatch
        '''
        songs, out_logits, types_probabilities = inputs
        chosen_types  = tf.expand_dims(tf.math.argmax(types_probabilities[:,:-1], axis=2), axis=-1)
        concat_logits = tf.concat(out_logits[1:], axis=-1)                 # Concatenate all logits (except type) into a tensor batch_size x seq_len x 1391
        masks = tf.map_fn(fn=self.get_mask_for_all_tokens, elems=(         # Iterate function over batch dimension 
                tf.cast(chosen_types, concat_logits.dtype),                # BATCH*(SEQ_LEN-1)*1
                tf.cast(songs,   concat_logits.dtype),                     # BATCH*(SEQ_LEN-1)*11
                concat_logits[:, :conf.SEQ_LEN-1, :]                       # BATCH*(SEQ_LEN-1)*1391
            ), fn_output_signature=tf.TensorSpec(                          # Total: a BATCH * SEQ_LEN-1 * 1403 tensor
                (conf.SEQ_LEN-1, conf.input_ranges_sum - conf.INPUT_RANGES['type']),
                dtype=tf.bool
            ))
        return masks

In [49]:
# TODO: It's not working. There must be some logic problems... But if it works it's definetely faster
# also there's a high chance that those problem are also in the original version... So it's worth looking into
mask = MaskingActivationLayer()([song_batch, out_logits, types_probabilities])      # It's much faster!! If we can make this work we're good
index = 0
masks = []
for key in conf.INPUT_RANGES:
    if key != 'type':       # We have already checked for the type
        masks.append(mask[:, :, index:index+conf.INPUT_RANGES[key]])
        index += conf.INPUT_RANGES[key]

<tf.Tensor: shape=(256,), dtype=bool, numpy=
array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, Fal

In [28]:
song_input = tf.keras.layers.Input(shape=(conf.SEQ_LEN-1, len(conf.INPUT_RANGES)), dtype=tf.int8)

mask_type_probabilities_layer = MaskTypeProbabilitiesLayer()
final_masking_layer = MaskingActivationLayer()
activations = [tf.keras.layers.Softmax()]*len(conf.INPUT_RANGES)

mask_for_type_probabilities = mask_type_probabilities_layer(song_input, training=True)
type_probabilities = activations[0](out_logits[0], mask_for_type_probabilities)
final_mask = final_masking_layer([song_input, out_logits, type_probabilities])

# Unpack the final masks
index = 0
masks = []
for key in conf.INPUT_RANGES:
    if key != 'type':       # We have already checked for the type
        masks.append(final_mask[:, :, index:index+conf.INPUT_RANGES[key]])
        index += conf.INPUT_RANGES[key]

model = tf.keras.Model(inputs=song_input, outputs=masks)

In [29]:
masks = model(song_batch)
[mask.shape for mask in masks]

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x000001FA421CA8C0>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "c:\Users\Volpe\anaconda3\envs\ai3i\lib\site-packages\tensorflow\python\autograph\operators\control_flow.py", line 1416, in _py_if_stmt
    return body() if cond else orelse()  File "C:\Users\Volpe\AppData\Local\Temp\__autograph_generated_filejtl6gjth.py", line 343, in if_body_28
    ag__.converted_call(ag__.ld(mask).write, (ag__.ld(idx), ag__.converted_call(ag__.ld(tf).concat, ([ag__.if_exp(ag__.ld(default_token_parts)[ag__.ld(i)], lambda : ag__.ld(self).default_mask[ag__.ld(i)], lambda : ag__.ld(self).full_mask[ag__.ld(i)], 'default_token_parts[i]') for i in ag__.converted_call(ag__.ld(range), (ag__.converted_call(ag__.ld(len), (ag__.ld(default_token_parts),), None, fscope),), None, fscope)],), dict(axis=-1), fscop

InvalidArgumentError: Exception encountered when calling layer 'masking_activation_layer' (type MaskingActivationLayer).

in user code:

    File "C:\Users\Volpe\AppData\Local\Temp\ipykernel_22544\2265276239.py", line 182, in get_mask_for_all_tokens  *
        measure_mask = tf.cast(

    InvalidArgumentError: {{function_node __wrapped__Tile_device_/job:localhost/replica:0/task:0/device:CPU:0}} Expected multiples[1] >= 0, but got -104 [Op:Tile]


Call arguments received by layer 'masking_activation_layer' (type MaskingActivationLayer):
  • inputs=['tf.Tensor(shape=(2, 1023, 11), dtype=int8)', ['tf.Tensor(shape=(2, 1024, 8), dtype=float32)', 'tf.Tensor(shape=(2, 1024, 256), dtype=float32)', 'tf.Tensor(shape=(2, 1024, 131), dtype=float32)', 'tf.Tensor(shape=(2, 1024, 128), dtype=float32)', 'tf.Tensor(shape=(2, 1024, 136), dtype=float32)', 'tf.Tensor(shape=(2, 1024, 256), dtype=float32)', 'tf.Tensor(shape=(2, 1024, 129), dtype=float32)', 'tf.Tensor(shape=(2, 1024, 128), dtype=float32)', 'tf.Tensor(shape=(2, 1024, 25), dtype=float32)', 'tf.Tensor(shape=(2, 1024, 153), dtype=float32)', 'tf.Tensor(shape=(2, 1024, 49), dtype=float32)'], 'tf.Tensor(shape=(2, 1024, 8), dtype=float32)']
  • training=True