# Testing Masking Logic to Ignore Padding

In [31]:
import numpy as np
# print the array with 2 decimal places
np.set_printoptions(precision=2)

# Generate a random 2D array
np.random.seed(0)
a = np.random.rand(3, 5)
a

array([[0.55, 0.72, 0.6 , 0.54, 0.42],
       [0.65, 0.44, 0.89, 0.96, 0.38],
       [0.79, 0.53, 0.57, 0.93, 0.07]])

In [32]:
# Add 2 columns to the array and fill them with zeros
b = np.zeros((3, 2))
c = np.concatenate((a, b), axis=1)
c

array([[0.55, 0.72, 0.6 , 0.54, 0.42, 0.  , 0.  ],
       [0.65, 0.44, 0.89, 0.96, 0.38, 0.  , 0.  ],
       [0.79, 0.53, 0.57, 0.93, 0.07, 0.  , 0.  ]])

In [33]:
# Generate a mask with 1s and 0s for the non padded columns of the array
mask = np.ones((3, 5+2), dtype=float)
mask[:, -2:] = 0
print(f"Mask:\n{mask} \nshape: {mask.shape}")

Mask:
[[1. 1. 1. 1. 1. 0. 0.]
 [1. 1. 1. 1. 1. 0. 0.]
 [1. 1. 1. 1. 1. 0. 0.]] 
shape: (3, 7)


In [34]:
# Calculate the number of non-padded columns using the only the mask matrix
non_padded_columns = np.sum(mask == 1) / mask.shape[0]
non_padded_columns

np.float64(5.0)

In [35]:
# Create a new masks with all ones for now
masks = np.ones((3, 5+2), dtype=float)
# Make a variable called masks that has an additional dimension in the beginning of size 2
# So 4 x 7 x 3, where 4 is batch size and 7 is columns and 3 is rows
masks = np.expand_dims(masks, 0)
masks = np.repeat(masks, 4, axis=0)
# edit the masks of different batch elements
masks[0][:, -2:] = 0
masks[1][:, -1] = 0
masks[2][:, -3:] = 0

print(f"Masks:\n{masks} \nshape: {masks.shape}")

Masks:
[[[1. 1. 1. 1. 1. 0. 0.]
  [1. 1. 1. 1. 1. 0. 0.]
  [1. 1. 1. 1. 1. 0. 0.]]

 [[1. 1. 1. 1. 1. 1. 0.]
  [1. 1. 1. 1. 1. 1. 0.]
  [1. 1. 1. 1. 1. 1. 0.]]

 [[1. 1. 1. 1. 0. 0. 0.]
  [1. 1. 1. 1. 0. 0. 0.]
  [1. 1. 1. 1. 0. 0. 0.]]

 [[1. 1. 1. 1. 1. 1. 1.]
  [1. 1. 1. 1. 1. 1. 1.]
  [1. 1. 1. 1. 1. 1. 1.]]] 
shape: (4, 3, 7)


In [36]:
# Calculate the number of non-padded columns for each batch element using the variable of dimension: batch size x rows x columns
non_padded_columns = np.sum(masks == 1, axis=(1, 2)) / masks.shape[1]
non_padded_columns

array([5., 6., 4., 7.])

In the case of GRU network, the `non-padded_columns` variable can be used a `variable_sequence_length` variable that is passed to the forward function to ignore the padded values. This way loss won't be calculated for the padded values as the padded values aren't forwarded to the model in the first place.

Also when comparing this example to dmatrix masks, the `masks.shape[1]` contains the octave bands dimension so that is what you have to divide the sum of all 1s in the mask matrix with. This gives the number of time frames that are not padded.

In [37]:
# Initialize the loss array with random number
loss = np.random.rand(3, 5+2)
print(f"Loss array:\n{loss}")

Loss array:
[[0.09 0.02 0.83 0.78 0.87 0.98 0.8 ]
 [0.46 0.78 0.12 0.64 0.14 0.94 0.52]
 [0.41 0.26 0.77 0.46 0.57 0.02 0.62]]


In [38]:
# Multiply loss by masks
loss = loss * masks
print(f"Loss array after multiplication:\n{loss}")

# loss = (loss * masks.mean(dim=2)).sum() / masks.sum() # This is how it is done in train.py for GRU

Loss array after multiplication:
[[[0.09 0.02 0.83 0.78 0.87 0.   0.  ]
  [0.46 0.78 0.12 0.64 0.14 0.   0.  ]
  [0.41 0.26 0.77 0.46 0.57 0.   0.  ]]

 [[0.09 0.02 0.83 0.78 0.87 0.98 0.  ]
  [0.46 0.78 0.12 0.64 0.14 0.94 0.  ]
  [0.41 0.26 0.77 0.46 0.57 0.02 0.  ]]

 [[0.09 0.02 0.83 0.78 0.   0.   0.  ]
  [0.46 0.78 0.12 0.64 0.   0.   0.  ]
  [0.41 0.26 0.77 0.46 0.   0.   0.  ]]

 [[0.09 0.02 0.83 0.78 0.87 0.98 0.8 ]
  [0.46 0.78 0.12 0.64 0.14 0.94 0.52]
  [0.41 0.26 0.77 0.46 0.57 0.02 0.62]]]
