# Testing Masking Logic to Ignore Padding

In [31]:
import numpy as np
# print the array with 2 decimal places
np.set_printoptions(precision=2)

# Generate a random 2D array
np.random.seed(0)
a = np.random.rand(3, 5)
a

array([[0.55, 0.72, 0.6 , 0.54, 0.42],
       [0.65, 0.44, 0.89, 0.96, 0.38],
       [0.79, 0.53, 0.57, 0.93, 0.07]])

In [32]:
# Add 2 columns to the array and fill them with zeros
b = np.zeros((3, 2))
c = np.concatenate((a, b), axis=1)
c

array([[0.55, 0.72, 0.6 , 0.54, 0.42, 0.  , 0.  ],
       [0.65, 0.44, 0.89, 0.96, 0.38, 0.  , 0.  ],
       [0.79, 0.53, 0.57, 0.93, 0.07, 0.  , 0.  ]])

In [33]:
# Generate a mask with 1s and 0s for the non padded columns of the array
mask = np.ones((3, 5+2), dtype=float)
mask[:, -2:] = 0
print(f"Mask:\n{mask} \nshape: {mask.shape}")

Mask:
[[1. 1. 1. 1. 1. 0. 0.]
 [1. 1. 1. 1. 1. 0. 0.]
 [1. 1. 1. 1. 1. 0. 0.]] 
shape: (3, 7)


In [34]:
# Calculate the number of non-padded columns using the only the mask matrix
non_padded_columns = np.sum(mask == 1) / mask.shape[0]
non_padded_columns

np.float64(5.0)

In [None]:
# Create a new masks with all ones for now
masks = np.ones((3, 5+2), dtype=float)
# Make a variable called masks that has an additional dimension in the beginning of size 2
# So 4 x 7 x 3, where 4 is batch size and 7 is columns and 3 is rows
masks = np.expand_dims(masks, 0)
masks = np.repeat(masks, 4, axis=0)
# edit the masks of different batch elements
masks[0][:, -2:] = 0
masks[1][:, -1] = 0
masks[2][:, -3:] = 0

print(f"Masks:\n{masks} \nshape: {masks.shape}")

Masks:
[[[1. 1. 1. 1. 1. 0. 0.]
  [1. 1. 1. 1. 1. 0. 0.]
  [1. 1. 1. 1. 1. 0. 0.]]

 [[1. 1. 1. 1. 1. 1. 0.]
  [1. 1. 1. 1. 1. 1. 0.]
  [1. 1. 1. 1. 1. 1. 0.]]

 [[1. 1. 1. 1. 0. 0. 0.]
  [1. 1. 1. 1. 0. 0. 0.]
  [1. 1. 1. 1. 0. 0. 0.]]

 [[1. 1. 1. 1. 1. 1. 1.]
  [1. 1. 1. 1. 1. 1. 1.]
  [1. 1. 1. 1. 1. 1. 1.]]] 
shape: (4, 3, 7)


In [36]:
# Calculate the number of non-padded columns for each batch element using the variable of dimension: batch size x rows x columns
non_padded_columns = np.sum(masks == 1, axis=(1, 2)) / masks.shape[1]
non_padded_columns

array([5., 6., 4., 7.])

In the case of GRU network, the `non-padded_columns` variable can be used a `variable_sequence_length` variable that is passed to the forward function to ignore the padded values. This way loss won't be calculated for the padded values as the padded values aren't forwarded to the model in the first place.

Also when comparing this example to dmatrix masks, the `masks.shape[1]` contains the octave bands dimension so that is what you have to divide the sum of all 1s in the mask matrix with. This gives the number of time frames that are not padded.

In [37]:
# Initialize the loss array with random number
loss = np.random.rand(3, 5+2)
print(f"Loss array:\n{loss}")

Loss array:
[[0.09 0.02 0.83 0.78 0.87 0.98 0.8 ]
 [0.46 0.78 0.12 0.64 0.14 0.94 0.52]
 [0.41 0.26 0.77 0.46 0.57 0.02 0.62]]


In [38]:
# Multiply loss by masks
loss = loss * masks
print(f"Loss array after multiplication:\n{loss}")

# loss = (loss * masks.mean(dim=2)).sum() / masks.sum() # This is how it is done in train.py for GRU

Loss array after multiplication:
[[[0.09 0.02 0.83 0.78 0.87 0.   0.  ]
  [0.46 0.78 0.12 0.64 0.14 0.   0.  ]
  [0.41 0.26 0.77 0.46 0.57 0.   0.  ]]

 [[0.09 0.02 0.83 0.78 0.87 0.98 0.  ]
  [0.46 0.78 0.12 0.64 0.14 0.94 0.  ]
  [0.41 0.26 0.77 0.46 0.57 0.02 0.  ]]

 [[0.09 0.02 0.83 0.78 0.   0.   0.  ]
  [0.46 0.78 0.12 0.64 0.   0.   0.  ]
  [0.41 0.26 0.77 0.46 0.   0.   0.  ]]

 [[0.09 0.02 0.83 0.78 0.87 0.98 0.8 ]
  [0.46 0.78 0.12 0.64 0.14 0.94 0.52]
  [0.41 0.26 0.77 0.46 0.57 0.02 0.62]]]


In [69]:
# Review variables
print(f"non_padded_columns:\n{non_padded_columns} \nshape: {non_padded_columns.shape}")

# Create a variable that contains input in the dimension of batch size x rows x columns in this case 4 x 3 x 7
input = np.random.rand(4, 3, 5+2)
# print(f"Input array:\n{input}")
print(f"Input array shape:\n{input.shape}")

# Use the non_padded_columns variable to remove the padding from the input variable for each batch element
# Remember that now the columns dimension will have different size for each batch element (maybe use torch tensor for this)
#TODO -  Can't figure out how to do this

# print(f"Input array after removing padding:\n{input}")
print(f"Input array shape after removing padding:\n{input.shape}")


non_padded_columns:
tensor([5, 6, 4, 7]) 
shape: torch.Size([4])
Input array shape:
(4, 3, 7)
Input array shape after removing padding:
(4, 3, 7)


## Something about packing and unpacking sequences

In [55]:
import torch

# Convert input and mask to PyTorch tensors
input_tensor = torch.tensor(input, dtype=torch.float32)  # Shape: (batch_size, rows, cols)
print(f"Input tensor shape: {input_tensor.shape}")
mask_tensor = torch.tensor(masks, dtype=torch.float32)  # Shape: (batch_size, rows, cols)
print(f"Mask tensor shape: {mask_tensor.shape}")

# Compute the number of non-padded time steps per batch element (sum along time axis)
non_padded_lengths = torch.sum(mask_tensor[:, 0, :] == 1, dim=1)  # Shape: (batch_size,)

print(f"Non-padded time steps per batch: {non_padded_lengths}")

# Permute input to (batch, sequence_length, feature_dim) for GRU compatibility
input_tensor = input_tensor.permute(0, 2, 1)  # Shape: (batch_size, sequence_length, feature_dim)
print(f"Input tensor shape after permutation: {input_tensor.shape}")

# Pack sequences for GRU
packed_input = torch.nn.utils.rnn.pack_padded_sequence(
    input_tensor, non_padded_lengths.cpu(), batch_first=True, enforce_sorted=False
)

print(f"Packed input: {packed_input.data}")
print(f"Packed input data: {packed_input.data.shape}")
print(f"Packed input batch_sizes: {packed_input.batch_sizes}")


Input tensor shape: torch.Size([4, 3, 7])
Mask tensor shape: torch.Size([4, 3, 7])
Non-padded time steps per batch: tensor([5, 6, 4, 7])
Input tensor shape after permutation: torch.Size([4, 7, 3])
Packed input: tensor([[0.8792, 0.5305, 0.2791],
        [0.9680, 0.1313, 0.5922],
        [0.6898, 0.7488, 0.5024],
        [0.2484, 0.9242, 0.7401],
        [0.5437, 0.9221, 0.2097],
        [0.9944, 0.6041, 0.8968],
        [0.0584, 0.2378, 0.9426],
        [0.5059, 0.8623, 0.6805],
        [0.2827, 0.0895, 0.1157],
        [0.4518, 0.3828, 0.4067],
        [0.7307, 0.1719, 0.6340],
        [0.3104, 0.0487, 0.6224],
        [0.0302, 0.4059, 0.5771],
        [0.0709, 0.8954, 0.5521],
        [0.8817, 0.4493, 0.8673],
        [0.3730, 0.2536, 0.7105],
        [0.7103, 0.0243, 0.6953],
        [0.2928, 0.9678, 0.2717],
        [0.2724, 0.3045, 0.9402],
        [0.0079, 0.3426, 0.6720],
        [0.1524, 0.5469, 0.4554],
        [0.3727, 0.6222, 0.9489]])
Packed input data: torch.Size([22, 3])
P