<a href="https://colab.research.google.com/github/MohneetKaur/Computer-Vision-with-Python-and-Tensorflow/blob/main/Ragged_Tensors.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf

Ragged tensors are an advanced data structure in TensorFlow designed to efficiently handle non-uniform or "ragged" data. Unlike regular tensors (dense tensors), which require dimensions to be uniformly shaped, ragged tensors allow for the storage of arrays with varying sizes along specified dimensions. This feature makes them particularly useful for dealing with sequences or collections of elements where each member can have a different length, such as sentences in a text document or variable-length sequences in time-series data.

In [2]:
 tensor_two_d = tf.constant([[1,2,0],
                            [3,],
                            [1,5,6],
                            [2,3]])


print(tensor_two_d.shape)

ValueError: Can't convert non-rectangular Python sequence to Tensor.

In [None]:
# To solve the above issue, RaggedTensor is used

tensor_two_d = [[1,2,0],
                [3,],
                [1,5,6],
                [2,3]]

tensor_ragged = tf.ragged.constant(tensor_two_d)
print(tensor_ragged)

In [3]:
# ragged_boolean_mask

# Aliases for True & False so data and mask line up.
T, F = (True, False)

data=[[1, 2, 3], [4, 5, 6], [7, 8, 9]]

mask=[[T, F, T], [F, F, F], [T, F, F]]

tf.ragged.boolean_mask(data, mask)


<tf.RaggedTensor [[1, 3], [], [7]]>

In [4]:
tf.ragged.boolean_mask(  # Mask rows of a 2D RaggedTensor.
    tf.ragged.constant([[1, 2, 3], [4], [5, 6]]),
    tf.ragged.constant([True, False, True])).to_list()

[[1, 2, 3], [5, 6]]

In [5]:
# RaggedTensor.from_row_splits method

tf.RaggedTensor.from_row_splits(
      values=[3, 1, 4, 1, 5, 9, 2, 6],
      row_splits=[0, 4, 4, 7, 8, 8])

<tf.RaggedTensor [[3, 1, 4, 1], [], [5, 9, 2], [6], []]>

In [6]:
# Multiple ragged dimensions

inner_rt = tf.RaggedTensor.from_row_splits(  # =rt1 from above
    values=[3, 1, 4, 1, 5, 9, 2, 6], row_splits=[0, 4, 4, 7, 8, 8])
outer_rt = tf.RaggedTensor.from_row_splits(
    values=inner_rt, row_splits=[0, 3, 3, 5])
print(outer_rt.to_list(),'\n')

print(outer_rt.ragged_rank)

[[[3, 1, 4, 1], [], [5, 9, 2]], [], [[6], []]] 

2


In [7]:
# RaggedTensor.from_nested_row_splits method

tf.RaggedTensor.from_nested_row_splits(
    flat_values=[3, 1, 4, 1, 5, 9, 2, 6],
    nested_row_splits=([0, 3, 3, 5], [0, 4, 4, 7, 8, 8])).to_list()

[[[3, 1, 4, 1], [], [5, 9, 2]], [], [[6], []]]

In [8]:
# Uniform Inner Dimensions

"""RaggedTensors with uniform inner dimensions can be defined by using a multidimensional Tensor for values. """

rt = tf.RaggedTensor.from_row_splits(values=tf.ones([5, 3], tf.int32),
                                  row_splits=[0, 2, 5])
print(rt.to_list())


print(rt.shape)

[[[1, 1, 1], [1, 1, 1]], [[1, 1, 1], [1, 1, 1], [1, 1, 1]]]
(2, None, 3)


In [17]:
# Sentences with a variable number of words
sentences = ["Hello world", "TensorFlow is great", "Examples are useful"]

# Tokenize the sentences into words
tokenizer = tf.keras.preprocessing.text.Tokenizer()
tokenizer.fit_on_texts(sentences)
sequences = tokenizer.texts_to_sequences(sentences)

# Create a ragged tensor from the tokenized sequences
ragged_tensor = tf.ragged.constant(sequences)
print("Ragged Tensor:\n", ragged_tensor)

Ragged Tensor:
 <tf.RaggedTensor [[1, 2], [3, 4, 5], [6, 7, 8]]>
