### This notebook demonstrates padding numpy arrays or tensors with `tf.data.padded_batch` function.


In [1]:
import tensorflow as tf

2022-10-26 17:14:47.375737: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-10-26 17:14:47.579788: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-10-26 17:14:47.579813: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2022-10-26 17:14:47.630419: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2022-10-26 17:14:48.905535: W tensorflow/stream_executor/platform/de

### Combines consecutive elements of this dataset into padded batches.

```python
padded_batch(
    batch_size,
    padded_shapes=None,
    padding_values=None,
    drop_remainder=False,
    name=None
)
```

In [2]:
A = (tf.data.Dataset.range(1, 5)
     .map(lambda x: tf.fill([x], x)))


for element in A.as_numpy_iterator():
    print(element)

[1]
[2 2]
[3 3 3]
[4 4 4 4]


2022-10-26 17:14:50.630934: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2022-10-26 17:14:50.630964: W tensorflow/stream_executor/cuda/cuda_driver.cc:263] failed call to cuInit: UNKNOWN ERROR (303)
2022-10-26 17:14:50.630986: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (debonair): /proc/driver/nvidia/version does not exist
2022-10-26 17:14:50.631395: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
#Pad the smallest per-batch size that fits all elements.
B = A.padded_batch(batch_size = 2)

for element in B.as_numpy_iterator():
    print(element)

[[1 0]
 [2 2]]
[[3 3 3 0]
 [4 4 4 4]]


In [4]:
#Pad to a fixed size.
C = A.padded_batch(batch_size = 2, padded_shapes = 5)

for element in C.as_numpy_iterator():
    print(element)

[[1 0 0 0 0]
 [2 2 0 0 0]]
[[3 3 3 0 0]
 [4 4 4 4 0]]


In [5]:
#Components of nested elements can be padded independently.
elements = [([1, 2, 3], [10]),
            ([4, 5], [11, 12])]

elements

[([1, 2, 3], [10]), ([4, 5], [11, 12])]

In [6]:
dataset = tf.data.Dataset.from_generator(
    lambda: iter(elements), (tf.int32, tf.int32))

# Pad the first component of the tuple to length 4 and the second component to the smallest size that fits.
dataset = dataset.padded_batch(batch_size=2, 
                               padded_shapes=([4], [None]))

list(dataset.as_numpy_iterator())

[(array([[1, 2, 3, 0],
         [4, 5, 0, 0]], dtype=int32),
  array([[10,  0],
         [11, 12]], dtype=int32))]

In [7]:
#Pad with a single value and multiple components.
E = tf.data.Dataset.zip((A, A)).padded_batch(batch_size = 2)

for element in E.as_numpy_iterator():
    print(element)

(array([[1, 0],
       [2, 2]]), array([[1, 0],
       [2, 2]]))
(array([[3, 3, 3, 0],
       [4, 4, 4, 4]]), array([[3, 3, 3, 0],
       [4, 4, 4, 4]]))
