In [None]:
import tensorflow as tf
import tensorflow_probability as tfp
import numpy as np
from tensorflow import keras
print(tf.__version__)

2.19.0


# In this code we will learn about the fundamentals of tensorflow

In [None]:
# Creating tensors
scalar = tf.constant(100)
scalar

<tf.Tensor: shape=(), dtype=int32, numpy=100>

In [None]:
# Checking the number of dimensions of a tensor
scalar.ndim

0

In [None]:
array = np.array([[1,2,3],[7,8,9]])
arr = tf.constant(array)
arr


<tf.Tensor: shape=(2, 3), dtype=int64, numpy=
array([[1, 2, 3],
       [7, 8, 9]])>

In [None]:
arr.ndim # checking the dimension

2

In [None]:
# By default we get dtype as int32 or int64
# Now let's create our own dtype

another = tf.constant([[1.,2.,3.],
                       [6.,7.,8.]], dtype = tf.float16)
another

<tf.Tensor: shape=(2, 3), dtype=float16, numpy=
array([[1., 2., 3.],
       [6., 7., 8.]], dtype=float16)>

In [None]:
# here a scalar has no dimension
# a vector has one dimension
# and a matrix has two dimension

In [None]:
# for a 3d tensor the shape goes as (no_of_blocks or slices or depth, rows, columns)
tensor = tf.constant([
    [[1,2,3],[4,5,6]],
    [[6,7,8],[0,1,2]]
    ]) # here are two slices, in each slice there is two rows and three columns
tensor

<tf.Tensor: shape=(2, 2, 3), dtype=int32, numpy=
array([[[1, 2, 3],
        [4, 5, 6]],

       [[6, 7, 8],
        [0, 1, 2]]], dtype=int32)>

In [None]:
tensor.ndim

3

# Using tf.Variable to create tensors (mutable or changeable)

In [None]:
change = tf.Variable([7,7])
change

<tf.Variable 'Variable:0' shape=(2,) dtype=int32, numpy=array([7, 7], dtype=int32)>

In [None]:
# We can't change our tensor in the below way
# change[0] = 8
# ---------------------------------------------------------------------------
# TypeError                                 Traceback (most recent call last)
# /tmp/ipython-input-14-1996702720.py in <cell line: 0>()
# ----> 1 change[0] = 8

# TypeError: 'ResourceVariable' object does not support item assignment

In [None]:
# Changing the tensor change
change[0].assign(8)

<tf.Variable 'UnreadVariable' shape=(2,) dtype=int32, numpy=array([8, 7], dtype=int32)>

In [None]:
# Another way of changing mutable tensor
change.assign([9,7])
change

<tf.Variable 'Variable:0' shape=(2,) dtype=int32, numpy=array([9, 7], dtype=int32)>

In [None]:
# We can add value too
change.assign_add([7,9])
change

<tf.Variable 'Variable:0' shape=(2,) dtype=int32, numpy=array([16, 16], dtype=int32)>

In [None]:
# we can subtract too
change.assign_sub([1,2])
change

<tf.Variable 'Variable:0' shape=(2,) dtype=int32, numpy=array([15, 14], dtype=int32)>

# Creating Random Tensors

In [None]:
random_42 = tf.random.Generator.from_seed(42) # Seed is used for reproducibility
random_1= random_42.normal(shape = (3,2))
random_2 = random_42.normal(shape = (3,2))
random_1


<tf.Tensor: shape=(3, 2), dtype=float32, numpy=
array([[-0.7565803 , -0.06854702],
       [ 0.07595026, -1.2573844 ],
       [-0.23193763, -1.8107855 ]], dtype=float32)>

# Creating Random Tensor of INTEGER only

In [None]:
soviyat = tf.random.uniform(shape = (2,2,3), minval = 0, maxval = 10, dtype = tf.int32) # NOTE: To create integer use dtype = tf.int32
print(soviyat)

tf.Tensor(
[[[8 3 1]
  [7 2 2]]

 [[8 9 8]
  [5 6 5]]], shape=(2, 2, 3), dtype=int32)


In [None]:
# Checking if random_1 and random_2 are equal or not as they are from the same seed i.e (42).
random_1 == random_2

<tf.Tensor: shape=(3, 2), dtype=bool, numpy=
array([[False, False],
       [False, False],
       [False, False]])>

In [None]:
# Even though you're using the same seed, you're generating two different tensors in sequence from the same generator object.
# random_1 = random_42.normal(shape=(3, 2))  # First draw
# random_2 = random_42.normal(shape=(3, 2))  # Second draw
# Each call advances the internal state of the generator. So:
# random_1 is based on the first random state.
# random_2 is based on the next state → hence different values.

In [None]:
# So to have identical random tensors
# We need to create two separate generator objects, each initialized with the same seed:
randomseed_42 = tf.random.Generator.from_seed(42)
random_3 = randomseed_42.normal(shape = (3,2))
random_1 == random_3

<tf.Tensor: shape=(3, 2), dtype=bool, numpy=
array([[ True,  True],
       [ True,  True],
       [ True,  True]])>

In [None]:
# Practice
random_1 = tf.random.Generator.from_seed(1)
randomobj_1 = tf.random.Generator.from_seed(1)
var = random_1.uniform(shape = (3,2))
var2 = randomobj_1.uniform(shape = (3,2)) # var2 = random_1.uniform(shape = (3,2)) it gives False,False ....
var == var2

<tf.Tensor: shape=(3, 2), dtype=bool, numpy=
array([[ True,  True],
       [ True,  True],
       [ True,  True]])>

# Shuffling our tensor

In [None]:
not_shuffled = tf.constant([[1,2],[10,7],[8,9]])
shuffled = tf.random.shuffle(not_shuffled)
print(not_shuffled)
print(shuffled)


tf.Tensor(
[[ 1  2]
 [10  7]
 [ 8  9]], shape=(3, 2), dtype=int32)
tf.Tensor(
[[10  7]
 [ 1  2]
 [ 8  9]], shape=(3, 2), dtype=int32)


# Shuffling using seed for reproducibility

In [None]:
# seed = tf.random.Generator.from_seed(42) # For reproducibility
# not_shuffled2 = tf.constant([[1,2,3],
#                              [4,5,6],
#                              [7,8,9]])
# shuffled2 = seed.shuffle(not_shuffled2)
# shuffled2
# ---------------------------------------------------------------------------
# AttributeError                            Traceback (most recent call last)
# /tmp/ipython-input-28-977898508.py in <cell line: 0>()
#       3                              [4,5,6],
#       4                              [7,8,9]])
# ----> 5 shuffled2 = seed.shuffle(not_shuffled2)
#       6 shuffled2

# AttributeError: 'Generator' object has no attribute 'shuffle'

In [None]:
# Global random seed
# tf.random.set_seed(42) # Comment this line and there is no reproducibility
not_shuffled3 = tf.constant([[10,20,30],
                             [4,50,6],
                             [7,80,9]])
shuffled3 = tf.random.shuffle(not_shuffled3)
shuffled3

<tf.Tensor: shape=(3, 3), dtype=int32, numpy=
array([[ 7, 80,  9],
       [10, 20, 30],
       [ 4, 50,  6]], dtype=int32)>

In [None]:
# Another way for reproducibility
not_shuffled4 = tf.constant([[100,200,300],
                             [40,500,60],
                             [70,800,90]])
shuffled4 = tf.random.shuffle(not_shuffled4, seed = 2)
print(shuffled4)
shuffled5 = tf.random.shuffle(not_shuffled4, seed = 2)
print(shuffled5)
# Different results
#  Why is this?
# TensorFlow random seeding = combination of two things:
# a global graph-level seed (via tf.random.set_seed)
# a local op-level seed (via the seed argument)
# If you only set the op-level seed, TensorFlow still advances the global random generator each time you execute, so results change on each call.
# When you fix both, you get true deterministic behavior every time.

tf.Tensor(
[[100 200 300]
 [ 40 500  60]
 [ 70 800  90]], shape=(3, 3), dtype=int32)
tf.Tensor(
[[100 200 300]
 [ 70 800  90]
 [ 40 500  60]], shape=(3, 3), dtype=int32)


In [None]:
# ******* LOOK THIS *******
tf.random.set_seed(123)
not_shuffled6 = tf.constant([[1,2],
                             [3,4],
                             [5,6],
                             [7,8]])
shuffled6 = tf.random.shuffle(not_shuffled6)
print(shuffled6) # # Global seed 123 + auto-generated op-seed for this shuffle
shuffled7 = tf.random.shuffle(not_shuffled6)
print(shuffled7) # # Global seed 123 + auto-generated op-seed for this shuffle
shuffled8 = tf.random.shuffle(not_shuffled6)
print(shuffled8) # # Global seed 123 + auto-generated op-seed for this shuffle

tf.Tensor(
[[3 4]
 [5 6]
 [1 2]
 [7 8]], shape=(4, 2), dtype=int32)
tf.Tensor(
[[1 2]
 [7 8]
 [3 4]
 [5 6]], shape=(4, 2), dtype=int32)
tf.Tensor(
[[1 2]
 [7 8]
 [5 6]
 [3 4]], shape=(4, 2), dtype=int32)


In [None]:
 # Continued from above
 tf.random.set_seed(13)

shuffled9 = tf.random.shuffle(not_shuffled6, seed=12)
print(shuffled9)

# reset global seed again to same value
# tf.random.set_seed(13)

shuffled10 = tf.random.shuffle(not_shuffled6, seed=12)
print(shuffled10)


tf.Tensor(
[[5 6]
 [3 4]
 [1 2]
 [7 8]], shape=(4, 2), dtype=int32)
tf.Tensor(
[[1 2]
 [7 8]
 [5 6]
 [3 4]], shape=(4, 2), dtype=int32)


# Understanding Global Level seed and Operational Level seed

In [None]:
# If neither the global seed nor the operation seed is set,
# we get different results for every call to the random op and every re-run of the program:

print(tf.random.uniform([1]))
print(tf.random.uniform([1]))

tf.Tensor([0.5983684], shape=(1,), dtype=float32)
tf.Tensor([0.53933334], shape=(1,), dtype=float32)


In [None]:
# If the global seed is set but the operation seed is not set, we get
# different results for every call to the random op, but the same sequence for every re-run of the program:

tf.random.set_seed(1234)
print(tf.random.uniform([1]))
print(tf.random.uniform([1]))

tf.Tensor([0.5380393], shape=(1,), dtype=float32)
tf.Tensor([0.3253647], shape=(1,), dtype=float32)


In [None]:
# Note that tf.function acts like a re-run of a program in this case
# global seed is set but the operation seed is not set
tf.random.set_seed(1234)

@tf.function
def f():
  a = tf.random.uniform([1])
  b = tf.random.uniform([1])
  return a, b

@tf.function
def g():
  a = tf.random.uniform([1])
  b = tf.random.uniform([1])
  return a, b

print(f())
print(g())

(<tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.13047123], dtype=float32)>, <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.1689806], dtype=float32)>)
(<tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.13047123], dtype=float32)>, <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.1689806], dtype=float32)>)


In [None]:
# If the operation seed is set, we get different results for every call to the random op,
# but the same sequence for every re-run of the program:

print(tf.random.uniform([1], seed=1))  # generates 'A1'
print(tf.random.uniform([1], seed=1))  # generates 'A2'

# Reason
# The reason we get 'A2' instead 'A1' on the second call of tf.random.uniform above is because the same tf.random.uniform
#  kernel (i.e. internal representation) is used by TensorFlow for all calls of it with the same arguments,
# and the kernel maintains an internal counter which is incremented every time it is executed, generating different results.

# Calling tf.random.set_seed will reset any such counters:

tf.Tensor([0.1689806], shape=(1,), dtype=float32)
tf.Tensor([0.7539084], shape=(1,), dtype=float32)


In [None]:
# Calling tf.random.set_seed will reset any such counters:

tf.random.set_seed(1234)
print(tf.random.uniform([1], seed=1))  # generates 'A1'
print(tf.random.uniform([1], seed=1))  # generates 'A2'
tf.random.set_seed(1234)
print(tf.random.uniform([1], seed=1))  # generates 'A1'
print(tf.random.uniform([1], seed=1))  # generates 'A2'

tf.Tensor([0.1689806], shape=(1,), dtype=float32)
tf.Tensor([0.7539084], shape=(1,), dtype=float32)
tf.Tensor([0.1689806], shape=(1,), dtype=float32)
tf.Tensor([0.7539084], shape=(1,), dtype=float32)


In [None]:
# When multiple identical random ops are wrapped in a tf.function, their behaviors
# change because the ops no long share the same counter. For example:

@tf.function
def foo():
  a = tf.random.uniform([1], seed=1)
  b = tf.random.uniform([1], seed=1)
  return a, b
print(foo())  # prints '(A1, A1)'
print(foo())  # prints '(A2, A2)'

@tf.function
def bar():
  a = tf.random.uniform([1])
  b = tf.random.uniform([1])
  return a, b
print(bar())  # prints '(A1, A2)'
print(bar())  # prints '(A3, A4)'

(<tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.1689806], dtype=float32)>, <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.1689806], dtype=float32)>)
(<tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.7539084], dtype=float32)>, <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.7539084], dtype=float32)>)
(<tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.13047123], dtype=float32)>, <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.1689806], dtype=float32)>)
(<tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.6087816], dtype=float32)>, <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.7539084], dtype=float32)>)


In [None]:
# In simple terms:
# ✅ in eager mode → random ops share one counter
# ✅ in tf.function → random ops inside the graph have separate counters, so even if they are identical, they behave independently

In [None]:
# practice
tf.random.set_seed(42) # global level random set
non_shuffled10 = tf.constant([[1,2],[9,0],[8,0],[7,6]])
shuffled10 = tf.random.shuffle(non_shuffled10, seed = 42) # operational level random seed
shuffled10

<tf.Tensor: shape=(4, 2), dtype=int32, numpy=
array([[9, 0],
       [8, 0],
       [1, 2],
       [7, 6]], dtype=int32)>

# Other ways to make Tensors

In [None]:
# just like np.ones
tf.ones([2,3], dtype = tf.int32)

<tf.Tensor: shape=(2, 3), dtype=int32, numpy=
array([[1, 1, 1],
       [1, 1, 1]], dtype=int32)>

In [None]:
tf.zeros(shape = (2,3), dtype = tf.int32)

<tf.Tensor: shape=(2, 3), dtype=int32, numpy=
array([[0, 0, 0],
       [0, 0, 0]], dtype=int32)>

# Creating Tensors using Numpy Array

In [None]:
# The main difference between Numpy array and Tensorflow tensors is that tensors can be run on GPU
# which is much faster for numerical computing

In [None]:
# Tip
# X = capital for matrix
# y = small for vector

In [None]:
numpy_A = np.arange(1 ,25, 2) # Start , Stop , step size
numpy_A

array([ 1,  3,  5,  7,  9, 11, 13, 15, 17, 19, 21, 23])

In [None]:
tensor1 = tf.constant(numpy_A) # in numpy_A there are total 12 elements
tensor2 = tf.constant(numpy_A, shape = (2,3,2)) # 2* 3* 2 = 12
print(tensor1)
print(tensor2)

tf.Tensor([ 1  3  5  7  9 11 13 15 17 19 21 23], shape=(12,), dtype=int64)
tf.Tensor(
[[[ 1  3]
  [ 5  7]
  [ 9 11]]

 [[13 15]
  [17 19]
  [21 23]]], shape=(2, 3, 2), dtype=int64)


# GETTING INFORMATION FROM TENSORS

In [None]:
# Creating a Rank 4 tensor
tensor = tf.zeros(shape = (2, 3, 4, 5))
tensor # Think of it has two blocks(cubes), each of which has three slices of 4 rows and five columns

<tf.Tensor: shape=(2, 3, 4, 5), dtype=float32, numpy=
array([[[[0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.]]],


       [[[0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.]]]], dtype=float32)>

In [None]:
tensor[0]

<tf.Tensor: shape=(3, 4, 5), dtype=float32, numpy=
array([[[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]],

       [[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]],

       [[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]]], dtype=float32)>

In [None]:
tensor[:,1]

# it means:
# : → take all elements along the first dimension (the 2 blocks)
# 1 → take only the second matrix in each block (Python indexing starts at 0)

<tf.Tensor: shape=(2, 4, 5), dtype=float32, numpy=
array([[[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]],

       [[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]]], dtype=float32)>

In [None]:
tensor.shape, tf.size(tensor) , tensor.ndim # Note size is a function not a attribute and
# to calculate size we can simply do 2*3*4*5 = 120

(TensorShape([2, 3, 4, 5]), <tf.Tensor: shape=(), dtype=int32, numpy=120>, 4)

In [None]:
# Getting various attributes from our tensor
print("Dimensions : ", tensor.ndim)
print("Shape : ", tensor.shape)
print("Elements along the 0 axis/dimension : ", tensor.shape[0])
print("Elements along the last axis/dimension : ", tensor.shape[-1])
print("Total elements in our tensor : ", tf.size(tensor))
print("Total elements in our tensor : ", tf.size(tensor).numpy())

Dimensions :  4
Shape :  (2, 3, 4, 5)
Elements along the 0 axis/dimension :  2
Elements along the last axis/dimension :  5
Total elements in our tensor :  tf.Tensor(120, shape=(), dtype=int32)
Total elements in our tensor :  120


# INDEXING AND SLICING TENSORS

In [None]:
# Getting the first two elements of each dimension
tensor[:2, :2, :2, :2]

# Visually described:

# block dimension:
# 2 blocks selected (:2)
# within each block, you:
# select 2 matrices
# inside each matrix, you:
# select first 2 rows
# inside each row, first 2 columns

<tf.Tensor: shape=(2, 2, 2, 2), dtype=float32, numpy=
array([[[[0., 0.],
         [0., 0.]],

        [[0., 0.],
         [0., 0.]]],


       [[[0., 0.],
         [0., 0.]],

        [[0., 0.],
         [0., 0.]]]], dtype=float32)>

In [None]:
# Getting the first element form each dimension except the final one
tensor[:1, :1, :1, :] # first block/cube ko first matrix/slice ko first row ko all columns

<tf.Tensor: shape=(1, 1, 1, 5), dtype=float32, numpy=array([[[[0., 0., 0., 0., 0.]]]], dtype=float32)>

In [None]:
# practice
rank_4_tensor = tf.constant([
  [  # block 0
    [ [ 1,  2,  3],
      [ 4,  5,  6] ],
    [ [ 7,  8,  9],
      [10, 11, 12] ]
  ],
  [  # block 1
    [ [13, 14, 15],
      [16, 17, 18] ],
    [ [19, 20, 21],
      [22, 23, 24] ]
  ]
])

print("shape:", rank_4_tensor.shape)
print(rank_4_tensor)


shape: (2, 2, 2, 3)
tf.Tensor(
[[[[ 1  2  3]
   [ 4  5  6]]

  [[ 7  8  9]
   [10 11 12]]]


 [[[13 14 15]
   [16 17 18]]

  [[19 20 21]
   [22 23 24]]]], shape=(2, 2, 2, 3), dtype=int32)


In [None]:
# Indexing and slicing
rank_4_tensor[0, 1, 0, -1] # first block ko second slice ko first row ko last column i.e 9

<tf.Tensor: shape=(), dtype=int32, numpy=9>

# Expanding the dimension of tensor

In [None]:
# creating a rank two tensor
random_35 = tf.random.Generator.from_seed(35)
random = random_35.uniform(shape = (2,3))
random

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[0.71682584, 0.39619052, 0.00361979],
       [0.9058354 , 0.79911244, 0.7933638 ]], dtype=float32)>

In [None]:
# Getting the last element of each row of the above reandom tensor
random[:, -1]

<tf.Tensor: shape=(2,), dtype=float32, numpy=array([0.00361979, 0.7933638 ], dtype=float32)>

In [None]:
# Expanding rank2 tensor
rank_2_tensor = tf.constant([[1,2,3],
                             [4,5,6]])
print(rank_2_tensor.shape)
rank_3_tensor = rank_2_tensor[:,:, tf.newaxis] # adding dimension at the end
rank_3_tensor

(2, 3)


<tf.Tensor: shape=(2, 3, 1), dtype=int32, numpy=
array([[[1],
        [2],
        [3]],

       [[4],
        [5],
        [6]]], dtype=int32)>

In [None]:
rank_3_tensor = rank_2_tensor[:, tf.newaxis, :] # adding dimension in the middle
rank_3_tensor

<tf.Tensor: shape=(2, 1, 3), dtype=int32, numpy=
array([[[1, 2, 3]],

       [[4, 5, 6]]], dtype=int32)>

In [None]:
rank_3_tensor = rank_2_tensor[tf.newaxis, :, :] # adding dimension at the beginning
rank_3_tensor

<tf.Tensor: shape=(1, 2, 3), dtype=int32, numpy=
array([[[1, 2, 3],
        [4, 5, 6]]], dtype=int32)>

In [None]:
rank_3_tensor = rank_2_tensor[..., tf.newaxis] # ... means “keep all existing dimensions as they are”. and add dimension at the end
rank_3_tensor

<tf.Tensor: shape=(2, 3, 1), dtype=int32, numpy=
array([[[1],
        [2],
        [3]],

       [[4],
        [5],
        [6]]], dtype=int32)>

In [None]:
rank_3_tensor = rank_2_tensor[tf.newaxis, ...] # ... means “keep all existing dimensions as they are”. and add dimension at the beginning
rank_3_tensor

<tf.Tensor: shape=(1, 2, 3), dtype=int32, numpy=
array([[[1, 2, 3],
        [4, 5, 6]]], dtype=int32)>

In [None]:
# Alternative to tf.newaxis is tf.expand_dims()
x = tf.constant([1, 2, 3])     # shape (3,)
y = tf.expand_dims(x, axis=0)  # shape (1, 3)
print(y)

tf.Tensor([[1 2 3]], shape=(1, 3), dtype=int32)


In [None]:
x = tf.constant([1, 2, 3])
z = tf.expand_dims(x, axis=-1)  # shape (3, 1)
print(z)


tf.Tensor(
[[1]
 [2]
 [3]], shape=(3, 1), dtype=int32)


In [None]:
m = tf.constant([[1, 2, 3],
                 [4, 5, 6]])   # shape (2, 3)

m_exp = tf.expand_dims(m, axis=1)  # shape (2, 1, 3)
print(m_exp)


tf.Tensor(
[[[1 2 3]]

 [[4 5 6]]], shape=(2, 1, 3), dtype=int32)


# Manipulating tensor

In [None]:
# You can add values to the tensor using addition operator
tensor = tf.constant([[10,7],[3,4]])
tensor + 10

<tf.Tensor: shape=(2, 2), dtype=int32, numpy=
array([[20, 17],
       [13, 14]], dtype=int32)>

In [None]:
# here the original tensor is unchanged
print(tensor)
# inorder to change the tensor
tensor = tensor + 10
print(tensor)

tf.Tensor(
[[10  7]
 [ 3  4]], shape=(2, 2), dtype=int32)
tf.Tensor(
[[20 17]
 [13 14]], shape=(2, 2), dtype=int32)


In [None]:
# Similarly subtraction, multiplication and division(operators) also works

In [None]:
# we can use tensorflow built-in function too
tensor7 = tf.constant([[1,2,3],[4,5,6]])
y = tf.multiply(tensor7, 10)
# TensorFlow built-in math operations (like tf.multiply, tf.add, tf.subtract) do not modify the original tensor.
# They always return a new tensor with the result.
print(y)
print(tensor7)

tf.Tensor(
[[10 20 30]
 [40 50 60]], shape=(2, 3), dtype=int32)
tf.Tensor(
[[1 2 3]
 [4 5 6]], shape=(2, 3), dtype=int32)


In [None]:
# Testing
tensor7 = tf.add(tensor7,10)
print(tensor7)
# By assigning tensor7 = tf.add(tensor7,10)
# you rebind the variable tensor7 to the new tensor, but the original constant tensor is unaffected.

tf.Tensor(
[[11 12 13]
 [14 15 16]], shape=(2, 3), dtype=int32)


## tf.concat vs tf.stack

In [None]:
i = tf.constant([1,2])
j = tf.constant([3,4])
print(tf.concat([i,j], axis = 0)) # NOTE: here axis = 1 gives error as concat doesn't increase the rank/dimension
print(tf.stack([i,j], axis = 0)) # Stack increases rank/dimension
print(tf.stack([i,j], axis = 1)) # Stack increases rank/dimension

tf.Tensor([1 2 3 4], shape=(4,), dtype=int32)
tf.Tensor(
[[1 2]
 [3 4]], shape=(2, 2), dtype=int32)
tf.Tensor(
[[1 3]
 [2 4]], shape=(2, 2), dtype=int32)


In [None]:
### NOTE: For tf.concat all the shape must be same except the concat axis ###
### NOTE: For tf.stack all the shape must be same ###
k = tf.constant([[1,2,5],[3,4,5]])
l = tf.constant([[5,6],[7,8]])
# print(tf.concat([k,l], axis = 0)) # NOTE: It produces error
print(tf.concat([k,l], axis = 1)) # Column wise so no error think or visualize

### BOTH OF THE BELOW PRODUCES ERROR ###
# print(tf.stack([k,l], axis = 0))
# # print(tf.stack([k,l], axis = 1))

tf.Tensor(
[[1 2 5 5 6]
 [3 4 5 7 8]], shape=(2, 5), dtype=int32)


# MATRIX MULTIPLICATION

In [None]:
# In machine learning, matrix multiplication is one of the most common tensor operations.
X = tf.constant([[1,2,3],[4,5,6]]) # shape is (2,3)
Y = tf.constant([[1,2,3],[3,4,5]]) # shape is (2,3)
print(X.shape,Y.shape)
# For matrix multiplication inner dimensions must match and the result is outer dimension
# Multiplying using python operator
print(X.shape, tf.reshape(Y, shape = (3,2)).shape)
print(X @ tf.reshape(Y, shape = (3,2)))
# Using tensorflow function named matmul
print(tf.matmul(X, tf.reshape(Y, shape = (3,2))))

(2, 3) (2, 3)
(2, 3) (3, 2)
tf.Tensor(
[[19 23]
 [43 53]], shape=(2, 2), dtype=int32)
tf.Tensor(
[[19 23]
 [43 53]], shape=(2, 2), dtype=int32)


# FOR TRANSPOSE

In [None]:
# tf.transpose(X)
# tf.transpose(X, perm = None, conjugate = False, name = None)
# Let's see what perm does
tensor = tf.constant([
                      [[1,2,3],
                       [4,5,6]],
                      [[7,8,9],
                       [10,11,12]]
                      ])
# tf.rank(tensor) # for dimension/rank
print(tensor)
print(tf.transpose(tensor, perm = None)) # as perm is None the axes/dimensions are reversed i.e (0,1,2)-->(2,1,0)
# i.e (slice, row, column) --> (column, row, slice)
print(tf.transpose(tensor, perm = [0,2,1]))# we have provided our own perm(list) so the new tensor will have (slice, column, row)

# Batch of matrices: transpose last two dims only
# What “batch matrix” means
# In TensorFlow docs, a batch of matrices means:
# You have a tensor with rank ≥ 2
# The last two dimensions are treated as a matrix
# Any extra leading dimensions are the “batch”

y = tf.linalg.matrix_transpose(tensor) # meaning to understand only --> tf.transpose(x, perm=[..., -1, -2])
print(y)

tf.Tensor(
[[[ 1  2  3]
  [ 4  5  6]]

 [[ 7  8  9]
  [10 11 12]]], shape=(2, 2, 3), dtype=int32)
tf.Tensor(
[[[ 1  7]
  [ 4 10]]

 [[ 2  8]
  [ 5 11]]

 [[ 3  9]
  [ 6 12]]], shape=(3, 2, 2), dtype=int32)
tf.Tensor(
[[[ 1  4]
  [ 2  5]
  [ 3  6]]

 [[ 7 10]
  [ 8 11]
  [ 9 12]]], shape=(2, 3, 2), dtype=int32)
tf.Tensor(
[[[ 1  4]
  [ 2  5]
  [ 3  6]]

 [[ 7 10]
  [ 8 11]
  [ 9 12]]], shape=(2, 3, 2), dtype=int32)


#DYNAMIC TENSORFLOW TRICK

In [None]:
### MOVING LAST AXIS TO THE FRONT ###
tensor = tf.constant([
                      [[1,2,3],
                       [4,5,6]],
                      [[7,8,9],
                       [10,11,12]]
                      ])
rank = tf.rank(tensor)
print(rank)
print(tensor.shape)
last_to_front = tf.concat([[rank-1], tf.range(0,rank-1)],axis = 0)
print(last_to_front)
print(tf.transpose(tensor, perm = last_to_front))

tf.Tensor(3, shape=(), dtype=int32)
(2, 2, 3)
tf.Tensor([2 0 1], shape=(3,), dtype=int32)
tf.Tensor(
[[[ 1  4]
  [ 7 10]]

 [[ 2  5]
  [ 8 11]]

 [[ 3  6]
  [ 9 12]]], shape=(3, 2, 2), dtype=int32)


#DIFFERENCE BETWEEN RESHAPE AND TRANSPOSE

In [None]:
x = tf.constant([[1,2,3],
                 [4,5,6]])  # shape (2,3)

print(tf.transpose(x))        # shape (3,2)
print(tf.reshape(x, (3,2)))   # shape (3,2)

tf.Tensor(
[[1 4]
 [2 5]
 [3 6]], shape=(3, 2), dtype=int32)
tf.Tensor(
[[1 2]
 [3 4]
 [5 6]], shape=(3, 2), dtype=int32)


#USING TENSOR DOT FOR MATRIX MULTIPLICATION

In [None]:
a = tf.constant([1, 2, 3])   # shape (3,)
b = tf.constant([4, 5, 6])   # shape (3,)

c = tf.tensordot(a, b, axes=1) # NOTE: For matrix multiplication we should use axes = 1
print(c)

d = tf.tensordot(a, b, axes=0)
print(d)
# axes=0 → multiply but don’t collapse → outer product, result gets bigger.
# axes=1 → multiply and collapse 1 axis → dot product or matrix multiplication.
# More generally → axes=n means “collapse n dimensions.”

tf.Tensor(32, shape=(), dtype=int32)
tf.Tensor(
[[ 4  5  6]
 [ 8 10 12]
 [12 15 18]], shape=(3, 3), dtype=int32)


In [None]:
### Another Example ###
A = tf.constant([[1, 2],
                 [3, 4]])   # shape (2,2)

B = tf.constant([[5, 6],
                 [7, 8]])   # shape (2,2)
C = tf.tensordot(A, B, axes=0)
print(C.shape)   # (2,2,2,2)
print(C)
D = tf.tensordot(A, B, axes=1) # Matrix multiplication
print(D.shape)   # (2,2)
print(D)

(2, 2, 2, 2)
tf.Tensor(
[[[[ 5  6]
   [ 7  8]]

  [[10 12]
   [14 16]]]


 [[[15 18]
   [21 24]]

  [[20 24]
   [28 32]]]], shape=(2, 2, 2, 2), dtype=int32)
(2, 2)
tf.Tensor(
[[19 22]
 [43 50]], shape=(2, 2), dtype=int32)


#CHANGING THE DATATYPE OF TENSOR

In [None]:
 # The default datatype will depend on what is inside the tensor
 B = tf.constant([1.2,3.4])
 print(B.dtype)
 C = tf.constant([7,8])
 print(C.dtype)

<dtype: 'float32'>
<dtype: 'int32'>


In [None]:
### CHANGING FROM FLOAT32 TO FLOAT16 REDUCED PRECISION ###
B = tf.cast(B, dtype = tf.float16)
print(tensor.dtype)
C = tf.cast(C, dtype = tf.float32)
print(C.dtype)

<dtype: 'int32'>
<dtype: 'float32'>


# AGGREGATION IN TENSORS

In [None]:
### GETTING THE ABSOLUTE VALUES ###
D = tf.constant([-7, -10])
print(D)
print(tf.abs(D))

tf.Tensor([ -7 -10], shape=(2,), dtype=int32)
tf.Tensor([ 7 10], shape=(2,), dtype=int32)


In [None]:
### CREATING A RANDOM TENSOR WITH LOW 0 AND HIGH 100 OF SIZE 50 ###
np.random.seed(42) # Fixing the seed for reproducibility
E = tf.constant(np.random.randint(0, 100, size= 50))
E

<tf.Tensor: shape=(50,), dtype=int64, numpy=
array([51, 92, 14, 71, 60, 20, 82, 86, 74, 74, 87, 99, 23,  2, 21, 52,  1,
       87, 29, 37,  1, 63, 59, 20, 32, 75, 57, 21, 88, 48, 90, 58, 41, 91,
       59, 79, 14, 61, 61, 46, 61, 50, 54, 63,  2, 50,  6, 20, 72, 38])>

In [None]:
tf.size(E), E.shape, E.ndim

(<tf.Tensor: shape=(), dtype=int32, numpy=50>, TensorShape([50]), 1)

In [None]:
# Finding the min
tf.reduce_min(E)

<tf.Tensor: shape=(), dtype=int64, numpy=1>

In [None]:
# Finding the max
tf.reduce_max(E)

<tf.Tensor: shape=(), dtype=int64, numpy=99>

In [None]:
# Finding the mean
tf.reduce_mean(E)

<tf.Tensor: shape=(), dtype=int64, numpy=50>

In [None]:
# Finding the sum
tf.reduce_sum(E)

<tf.Tensor: shape=(), dtype=int64, numpy=2542>

In [None]:
# Finding the product
tf.reduce_prod(E)

<tf.Tensor: shape=(), dtype=int64, numpy=-4760612669386391552>

In [None]:
### AGGREGATING HIGHER RANK TENSORS ###
# Using Numpy to create higher Rank Tensors
# Using Nested List Comprehension

np.random.seed(42) # Using seed for reproducibility
tensor = tf.constant([[np.random.randint(0,9) for _ in range(4)] for _ in range(3)])
print(tensor)

tf.Tensor(
[[6 3 7 4]
 [6 2 6 7]
 [4 3 7 7]], shape=(3, 4), dtype=int32)


In [None]:
# Finding the mean by collapsing rows
tf.reduce_mean(tensor, axis = 0)

<tf.Tensor: shape=(4,), dtype=int32, numpy=array([5, 2, 6, 6], dtype=int32)>

In [None]:
# Finding the mean by collapsing columns
tf.reduce_mean(tensor, axis = 1)

<tf.Tensor: shape=(3,), dtype=int32, numpy=array([5, 5, 5], dtype=int32)>

In [None]:
# Finding the mean overall
tf.reduce_mean(tensor)

<tf.Tensor: shape=(), dtype=int32, numpy=5>

In [None]:
# Finding the variance
# Inorder to find the variance first casting the tensor to float32
tensor = tf.cast(tensor, dtype = tf.float32)
variance = tf.math.reduce_variance(tensor)
print(variance)


tf.Tensor(3.138889, shape=(), dtype=float32)


In [None]:
# Finding the variace using axis
variance0 = tf.math.reduce_variance(tensor, axis= 0)
print(variance0)
variance1 = tf.math.reduce_variance(tensor, axis = 1)
print(variance1)

tf.Tensor([0.88888884 0.22222221 0.22222222 2.        ], shape=(4,), dtype=float32)
tf.Tensor([2.5    3.6875 3.1875], shape=(3,), dtype=float32)


In [None]:
# Finding the standard deviation which is basically the squareroot of variance
tf.math.reduce_std(tensor)

<tf.Tensor: shape=(), dtype=float32, numpy=1.7716909646987915>

In [None]:
# We can use tensorflow_probability library to calculate variance too
tfp.stats.variance(tensor) # By looking at the output we can say that it provided result similar too reduce_variance on axis = 0

<tf.Tensor: shape=(4,), dtype=float32, numpy=array([0.88888884, 0.22222221, 0.22222222, 2.        ], dtype=float32)>

In [None]:
tfp.stats.variance(tensor, sample_axis = None) # Now it has provided overall variance

<tf.Tensor: shape=(), dtype=float32, numpy=3.1388890743255615>

## FINDING THE POSITIONAL MAXIMUM AND MINIMUM

In [None]:
# First creating a tensor and flattening it
tf.random.set_seed(42) # For reproducibility
tensor = tf.random.uniform(shape = (3,4), minval = 10, maxval = 99)
print(tensor)

# Method 1 of flattening
tensorflat1 = tf.reshape(tensor, shape = (12,))
print(tensorflat1)

# Method 2 of flattening
tensorflat2 = tf.reshape(tensor, [-1])
print(tensorflat2)

tensorflat1 == tensorflat2
# By looking at the output we can say that Method1 of flattening == Method2 of flattening

tf.Tensor(
[[69.14603  49.249603 41.406544 51.338947]
 [12.995777 70.93584  75.87045  87.64756 ]
 [30.143045 29.864521 37.624542 74.28789 ]], shape=(3, 4), dtype=float32)
tf.Tensor(
[69.14603  49.249603 41.406544 51.338947 12.995777 70.93584  75.87045
 87.64756  30.143045 29.864521 37.624542 74.28789 ], shape=(12,), dtype=float32)
tf.Tensor(
[69.14603  49.249603 41.406544 51.338947 12.995777 70.93584  75.87045
 87.64756  30.143045 29.864521 37.624542 74.28789 ], shape=(12,), dtype=float32)


<tf.Tensor: shape=(12,), dtype=bool, numpy=
array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True])>

In [None]:
# Finding the maximum value and positional max_value
print(tf.reduce_max(tensorflat1))
print(tf.argmax(tensorflat1)) # SAME
print(tf.math.argmax(tensorflat1)) # SAME

tf.Tensor(87.64756, shape=(), dtype=float32)
tf.Tensor(7, shape=(), dtype=int64)
tf.Tensor(7, shape=(), dtype=int64)


In [None]:
# Indexing on our largest value position
tensorflat1[tf.argmax(tensorflat1)]

<tf.Tensor: shape=(), dtype=float32, numpy=87.6475601196289>

In [None]:
# Using reduce_max to check
tf.reduce_max(tensorflat1)

<tf.Tensor: shape=(), dtype=float32, numpy=87.6475601196289>

In [None]:
# Finding the positional maximum value in axis = 0 that is collapsing the rows
print(tensor)
tf.math.argmax(tensor, axis = 0)

tf.Tensor(
[[69.14603  49.249603 41.406544 51.338947]
 [12.995777 70.93584  75.87045  87.64756 ]
 [30.143045 29.864521 37.624542 74.28789 ]], shape=(3, 4), dtype=float32)


<tf.Tensor: shape=(4,), dtype=int64, numpy=array([0, 1, 1, 1])>

In [None]:
# Finding the positional maximum value in axis = 1 that is collapsing the columns
print(tensor)
print(tf.math.argmax(tensor, axis = 1)) # postion / indices
print(tf.reduce_max(tensor, axis = 1)) # values

tf.Tensor(
[[69.14603  49.249603 41.406544 51.338947]
 [12.995777 70.93584  75.87045  87.64756 ]
 [30.143045 29.864521 37.624542 74.28789 ]], shape=(3, 4), dtype=float32)
tf.Tensor([0 3 3], shape=(3,), dtype=int64)
tf.Tensor([69.14603 87.64756 74.28789], shape=(3,), dtype=float32)


In [None]:
# Using tf.where to find the row and column co-ordinates
print(tensor)
# So to find the overall maximum value we need to flatten
print(tf.math.argmax(tensorflat1))
max_value = tf.reduce_max(tensor)
print(max_value)
print(tf.where(tensor == max_value)) # It gives the row and column coordinates

tf.Tensor(
[[69.14603  49.249603 41.406544 51.338947]
 [12.995777 70.93584  75.87045  87.64756 ]
 [30.143045 29.864521 37.624542 74.28789 ]], shape=(3, 4), dtype=float32)
tf.Tensor(7, shape=(), dtype=int64)
tf.Tensor(87.64756, shape=(), dtype=float32)
tf.Tensor([[1 3]], shape=(1, 2), dtype=int64)


## SQUEEZING A TENSOR (Removing all single dimensions)

In [None]:
tf.random.set_seed(7)
G = tf.constant(tf.random.uniform(shape = (20,), minval = 0, maxval = 99), shape = (1,1,1,1,20))
# Here in the above code, it is similar to below
# data = tf.random.uniform([20], minval = 0, maxval = 99) --> shape (20,)
# G = tf.reshape(data, (1,1,1,1,20)) --> shape (1,1,1,1,20)
G

<tf.Tensor: shape=(1, 1, 1, 1, 20), dtype=float32, numpy=
array([[[[[82.61093 , 23.103294, 87.08554 ,  4.618273, 79.54619 ,
           93.25897 , 48.07487 , 95.005516, 65.222755, 11.156461,
           42.56243 , 59.784126,  9.299885,  8.150338, 86.6343  ,
           95.00018 , 24.242455, 46.510883, 74.40475 , 82.76895 ]]]]],
      dtype=float32)>

In [None]:
### SQUEEZE ###
G_squeezed = tf.squeeze(G)
G_squeezed, G_squeezed.shape


(<tf.Tensor: shape=(20,), dtype=float32, numpy=
 array([82.61093 , 23.103294, 87.08554 ,  4.618273, 79.54619 , 93.25897 ,
        48.07487 , 95.005516, 65.222755, 11.156461, 42.56243 , 59.784126,
         9.299885,  8.150338, 86.6343  , 95.00018 , 24.242455, 46.510883,
        74.40475 , 82.76895 ], dtype=float32)>,
 TensorShape([20]))

## ONE HOT ENCODING

In [None]:
# Creating a list of indices
indices = [0,1,2,3] # could be categories such as red, green, blue, purple
depth = 4
# Below is the one-hot encoded version of indices
print(tf.one_hot(indices, depth)) # output = 4 * 4
# What happens when depth is 5
depth = 5
print(tf.one_hot(indices, depth)) # output = 4 * 5
# What happens when depth = 3
depth = 3
print(tf.one_hot(indices, depth)) # output = 4 * 3

tf.Tensor(
[[1. 0. 0. 0.]
 [0. 1. 0. 0.]
 [0. 0. 1. 0.]
 [0. 0. 0. 1.]], shape=(4, 4), dtype=float32)
tf.Tensor(
[[1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0.]], shape=(4, 5), dtype=float32)
tf.Tensor(
[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 0. 0.]], shape=(4, 3), dtype=float32)


In [None]:
### SPECIFYING CUSTOM VALUES FOR ONE_HOT ENCODING ###
print(tf.one_hot(indices, depth = 4, on_value = 5, off_value = -1))

tf.Tensor(
[[ 5 -1 -1 -1]
 [-1  5 -1 -1]
 [-1 -1  5 -1]
 [-1 -1 -1  5]], shape=(4, 4), dtype=int32)


## ONE HOT ENCODING OF RANK 2 TENSOR

In [None]:
rank2 = tf.constant([[0,1],
                     [2,2]])
print(tf.one_hot(rank2, depth = 7)) # NOTE: Output shape = (2,2,depth)

tf.Tensor(
[[[1. 0. 0. 0. 0. 0. 0.]
  [0. 1. 0. 0. 0. 0. 0.]]

 [[0. 0. 1. 0. 0. 0. 0.]
  [0. 0. 1. 0. 0. 0. 0.]]], shape=(2, 2, 7), dtype=float32)


## MATH OPERATION, SQUARING, LOG, SQUARE ROOT

In [None]:
import math # NOTE: For e, import math
tensor = tf.constant([1, math.e, math.e ** 2])
print(tensor)

tf.Tensor([1.        2.7182817 7.389056 ], shape=(3,), dtype=float32)


In [None]:
### Using Logarithm ###
log = tf.math.log(tensor)
print(log)

tf.Tensor([0.         0.99999994 2.        ], shape=(3,), dtype=float32)


## TENSORS AND NUMPY

In [None]:
### TENSORFLOW INTERACTS BEAUTIFULLY WITH NUMPY ARRAYS ###
# Creating tensor using Numpy array
tensor = tf.constant(np.array([1,2,3]))
print(tensor)

tf.Tensor([1 2 3], shape=(3,), dtype=int64)


In [None]:
# Converting tensor to numpy array
# Method 1
np.array(tensor), type(np.array(tensor))

(array([1, 2, 3]), numpy.ndarray)

In [None]:
# Converting tensor to numpy array
# Method 2
tensor.numpy(), type(tensor.numpy())

(array([1, 2, 3]), numpy.ndarray)

In [None]:
tensor.numpy()[1]

np.int64(2)

In [None]:
### The default types of each are slightly different ###
numpy_tensor = tf.constant(np.array([1,2,3]))
tensor = tf.constant([1,2,3])
numpy_tensor.dtype, tensor.dtype

(tf.int64, tf.int32)

In [None]:
### NOTE ###
tensor = tf.constant([1,2,3], shape = (3,1)) # Reshape in tf.constant() itself.
print(tensor)
tensor1 = tf.constant([1,2,3], dtype = tf.float32) # Cast in tf.constant() itself.
print(tensor1)

tf.Tensor(
[[1]
 [2]
 [3]], shape=(3, 1), dtype=int32)
tf.Tensor([1. 2. 3.], shape=(3,), dtype=float32)


# tf.data.Dataset.from_tensors() and tf.data.Dataset.from_tensor_slices()

Here, tf.data is and API and Dataset is a class. Similarly, from_tensors()  and from_tensor_slices() are static methods of Dataset class, each having there own uses and both returns a Dataset object.

In [None]:
### USING from_tensors() STATIC METHOD ###
tensor = tf.constant([[1,2], [3,4]])
dataset_obj = tf.data.Dataset.from_tensors(tensor)
dataset_obj

<_TensorDataset element_spec=TensorSpec(shape=(2, 2), dtype=tf.int32, name=None)>

In [None]:
### NOW USING from_tensor_slices() STATIC METHOD TO SEE THE DIFFERENCE ###
set_obj = tf.data.Dataset.from_tensor_slices(tensor)
set_obj

<_TensorSliceDataset element_spec=TensorSpec(shape=(2,), dtype=tf.int32, name=None)>

In [None]:
# The main difference we can see is in the shape i.e shape = (2,2) for TensorDataset element_spec
# and shape = (2,) for TensorSliceDataset element_spec

In [None]:
### FOR from_tensor_slice() static method ###
for slice in set_obj:
  print(slice)

tf.Tensor([1 2], shape=(2,), dtype=int32)
tf.Tensor([3 4], shape=(2,), dtype=int32)


In [None]:
### FOR form_tensors() static method ###
for slice in dataset_obj:
  print(slice)

tf.Tensor(
[[1 2]
 [3 4]], shape=(2, 2), dtype=int32)


# ANOTHER EXAMPLE

In [None]:
### Using from_tensors() static method ###
tensor1 = tf.constant([[1,2],[3,4],[5,6]])
label = tf.constant([3,1,2])
# dataset_obj1 = tf.data.Dataset.from_tensors(tensor1, label)
# Note: The just above commented line will give error as from_tensors() accepts only one argument, not two separate ones.
# That’s why TensorFlow gets confused and throws the “truth value of an array with more than one element is ambiguous” error.

# SOLUTION
# If you want (features, label) pairs in one dataset element, you need to pass a tuple as a single argument:
# Wrap (tensor1, label) as a tuple
dataset_obj1 = tf.data.Dataset.from_tensors((tensor1, label))
dataset_obj1

<_TensorDataset element_spec=(TensorSpec(shape=(3, 2), dtype=tf.int32, name=None), TensorSpec(shape=(3,), dtype=tf.int32, name=None))>

In [None]:
### FOR form_tensors() static method ###
for slice in dataset_obj1:
  print(slice)

(<tf.Tensor: shape=(3, 2), dtype=int32, numpy=
array([[1, 2],
       [3, 4],
       [5, 6]], dtype=int32)>, <tf.Tensor: shape=(3,), dtype=int32, numpy=array([3, 1, 2], dtype=int32)>)


In [None]:
### Using from_tensors_slices() static method ###
set_obj1 = tf.data.Dataset.from_tensor_slices((tensor1, label))
set_obj1

<_TensorSliceDataset element_spec=(TensorSpec(shape=(2,), dtype=tf.int32, name=None), TensorSpec(shape=(), dtype=tf.int32, name=None))>

In [None]:
### FOR from_tensor_slice() static method ###
for slice in set_obj1:
  print(slice)

(<tf.Tensor: shape=(2,), dtype=int32, numpy=array([1, 2], dtype=int32)>, <tf.Tensor: shape=(), dtype=int32, numpy=3>)
(<tf.Tensor: shape=(2,), dtype=int32, numpy=array([3, 4], dtype=int32)>, <tf.Tensor: shape=(), dtype=int32, numpy=1>)
(<tf.Tensor: shape=(2,), dtype=int32, numpy=array([5, 6], dtype=int32)>, <tf.Tensor: shape=(), dtype=int32, numpy=2>)


# Here in the set_obj1 it consists of three elements, and each element is a tuple of two components.

In [None]:
### VERY, VERY IMPORTANT ###
names = ["First_element", "Second_element", "Third_element"]
for (features, label), position in zip(set_obj1, names):
  print(f"{position}")
  print(f"Features: {features}")
  print(f"Label: {label}")

First_element
Features: [1 2]
Label: 3
Second_element
Features: [3 4]
Label: 1
Third_element
Features: [5 6]
Label: 2


In [None]:
# Workings
for features, label in set_obj1:
  print(f"Features: {features}")
  print(f"Label: {label}")

Features: [1 2]
Label: 3
Features: [3 4]
Label: 1
Features: [5 6]
Label: 2


In [None]:
# # Workings
# names = ["First_element", "Second_element", "Third_element"]
# for features, label, position in zip(set_obj1, names):
#   print(f"{position}")
#   print(f"Features: {features}")
#   print(f"Label: {label}")
### IT Will give error ###

# Chaining transformations using tf.data.Dataset

In [None]:
# dataset = tf.data.Dataset(tf.range(10)) This gives error as we can't instantiate abstract class using constructor.
dataset = tf.data.Dataset.range(5)
print(type(dataset))
# Below code will create an infinite execution
# dataset = dataset.repeat() # As there is no argument provided in the repeat() method, there will be infinite repetition.
# for item in dataset:
#   print(item)

dataset = dataset.repeat(2)
for item in dataset:
  print(item)
  print(type(item))
print(type(dataset))

<class 'tensorflow.python.data.ops.range_op._RangeDataset'>
tf.Tensor(0, shape=(), dtype=int64)
<class 'tensorflow.python.framework.ops.EagerTensor'>
tf.Tensor(1, shape=(), dtype=int64)
<class 'tensorflow.python.framework.ops.EagerTensor'>
tf.Tensor(2, shape=(), dtype=int64)
<class 'tensorflow.python.framework.ops.EagerTensor'>
tf.Tensor(3, shape=(), dtype=int64)
<class 'tensorflow.python.framework.ops.EagerTensor'>
tf.Tensor(4, shape=(), dtype=int64)
<class 'tensorflow.python.framework.ops.EagerTensor'>
tf.Tensor(0, shape=(), dtype=int64)
<class 'tensorflow.python.framework.ops.EagerTensor'>
tf.Tensor(1, shape=(), dtype=int64)
<class 'tensorflow.python.framework.ops.EagerTensor'>
tf.Tensor(2, shape=(), dtype=int64)
<class 'tensorflow.python.framework.ops.EagerTensor'>
tf.Tensor(3, shape=(), dtype=int64)
<class 'tensorflow.python.framework.ops.EagerTensor'>
tf.Tensor(4, shape=(), dtype=int64)
<class 'tensorflow.python.framework.ops.EagerTensor'>
<class 'tensorflow.python.data.ops.repea

In [None]:
# We can use batch() and repeat() methods together.
# Their order is interchangeable; either method can be applied before or after the other.

dataset = tf.data.Dataset.from_tensor_slices(tf.range(5))
print(type(dataset))
# dataset = dataset.repeat(2)
# for item in dataset:
#   print(item)
#   print(type(item))
# print(dataset)
# print(type(dataset))

dataset_1 = dataset.repeat(2).batch(5)
for item in dataset_1:
  print(item)
  print(type(item))
print(dataset_1)
print(type(dataset_1))

print(f"\nNext Dataset")
dataset_2 = dataset.batch(5).repeat(2)
for item in dataset_2:
  print(item)
  print(type(item))
print(dataset_2)
print(type(dataset_2))

print(f"\nNext Dataset")
dataset_3 = dataset.batch(3).repeat(2)
for item in dataset_3:
  print(item)

print(f"\nNext Dataset")
dataset_4 = dataset.repeat(2).batch(3)
for item in dataset_4:
  print(item)

print(f"\nNext Dataset")
dataset_5 = dataset.batch(3, drop_remainder = True).repeat(2)
for item in dataset_5:
  print(item)

# Note: You can call batch(), with drop_remainder = True if you want it to drop the final batch, such that all
# batches have the exact same size.


<class 'tensorflow.python.data.ops.from_tensor_slices_op._TensorSliceDataset'>
tf.Tensor([0 1 2 3 4], shape=(5,), dtype=int32)
<class 'tensorflow.python.framework.ops.EagerTensor'>
tf.Tensor([0 1 2 3 4], shape=(5,), dtype=int32)
<class 'tensorflow.python.framework.ops.EagerTensor'>
<_BatchDataset element_spec=TensorSpec(shape=(None,), dtype=tf.int32, name=None)>
<class 'tensorflow.python.data.ops.batch_op._BatchDataset'>

Next Dataset
tf.Tensor([0 1 2 3 4], shape=(5,), dtype=int32)
<class 'tensorflow.python.framework.ops.EagerTensor'>
tf.Tensor([0 1 2 3 4], shape=(5,), dtype=int32)
<class 'tensorflow.python.framework.ops.EagerTensor'>
<_RepeatDataset element_spec=TensorSpec(shape=(None,), dtype=tf.int32, name=None)>
<class 'tensorflow.python.data.ops.repeat_op._RepeatDataset'>

Next Dataset
tf.Tensor([0 1 2], shape=(3,), dtype=int32)
tf.Tensor([3 4], shape=(2,), dtype=int32)
tf.Tensor([0 1 2], shape=(3,), dtype=int32)
tf.Tensor([3 4], shape=(2,), dtype=int32)

Next Dataset
tf.Tensor([0

# Using map() method of tf.data.Dataset class for transforming

In [None]:
dataset = tf.data.Dataset.from_tensor_slices(tf.range(5))
print(dataset)
print(type(dataset))

for item in dataset:
  print(item)
  print(type(item))

# Now, using map() method from tf.data.Dataset class along with lambda function.
ds = dataset.map(lambda x : x + 10)
print("\n")
print(ds)
print(type(ds))
for item in ds:
  print(item)
  print(type(item))

<_TensorSliceDataset element_spec=TensorSpec(shape=(), dtype=tf.int32, name=None)>
<class 'tensorflow.python.data.ops.from_tensor_slices_op._TensorSliceDataset'>
tf.Tensor(0, shape=(), dtype=int32)
<class 'tensorflow.python.framework.ops.EagerTensor'>
tf.Tensor(1, shape=(), dtype=int32)
<class 'tensorflow.python.framework.ops.EagerTensor'>
tf.Tensor(2, shape=(), dtype=int32)
<class 'tensorflow.python.framework.ops.EagerTensor'>
tf.Tensor(3, shape=(), dtype=int32)
<class 'tensorflow.python.framework.ops.EagerTensor'>
tf.Tensor(4, shape=(), dtype=int32)
<class 'tensorflow.python.framework.ops.EagerTensor'>


<_MapDataset element_spec=TensorSpec(shape=(), dtype=tf.int32, name=None)>
<class 'tensorflow.python.data.ops.map_op._MapDataset'>
tf.Tensor(10, shape=(), dtype=int32)
<class 'tensorflow.python.framework.ops.EagerTensor'>
tf.Tensor(11, shape=(), dtype=int32)
<class 'tensorflow.python.framework.ops.EagerTensor'>
tf.Tensor(12, shape=(), dtype=int32)
<class 'tensorflow.python.framework.

# Now using batch() method along with map() method

In [None]:
dataset = tf.data.Dataset.range(5)
ds = dataset.batch(3)
print(ds)
print(type(ds))
for item in ds:
  print(item)
  print(type(item))

ds = ds.map(lambda x : x + 2) # Here, x is a batch.
print("\n")
print(type(ds))
for item in ds:
  print(item)
  print(type(item))

print("\n")
ds = dataset.batch(4).map(lambda x: x*2)
print(type(ds))
for item in ds:
  print(item)
  print(type(item))

<_BatchDataset element_spec=TensorSpec(shape=(None,), dtype=tf.int64, name=None)>
<class 'tensorflow.python.data.ops.batch_op._BatchDataset'>
tf.Tensor([0 1 2], shape=(3,), dtype=int64)
<class 'tensorflow.python.framework.ops.EagerTensor'>
tf.Tensor([3 4], shape=(2,), dtype=int64)
<class 'tensorflow.python.framework.ops.EagerTensor'>


<class 'tensorflow.python.data.ops.map_op._MapDataset'>
tf.Tensor([2 3 4], shape=(3,), dtype=int64)
<class 'tensorflow.python.framework.ops.EagerTensor'>
tf.Tensor([5 6], shape=(2,), dtype=int64)
<class 'tensorflow.python.framework.ops.EagerTensor'>


<class 'tensorflow.python.data.ops.map_op._MapDataset'>
tf.Tensor([0 2 4 6], shape=(4,), dtype=int64)
<class 'tensorflow.python.framework.ops.EagerTensor'>
tf.Tensor([8], shape=(1,), dtype=int64)
<class 'tensorflow.python.framework.ops.EagerTensor'>


# Using filter() method of tf.data.Dataset class

In [None]:
dataset = tf.data.Dataset.range(5)
ds = dataset.repeat(3).batch(7)
print(type(ds))
for item in ds:
  print(item)
  print(type(item))

# Using filter() method as shown below:
ds = ds.filter(lambda x: tf.reduce_sum(x) > 12) # Here x is a batch.
print("\n")
print(ds)
print(type(ds))
for item in ds:
  print(item)
  print(type(item))

<class 'tensorflow.python.data.ops.batch_op._BatchDataset'>
tf.Tensor([0 1 2 3 4 0 1], shape=(7,), dtype=int64)
<class 'tensorflow.python.framework.ops.EagerTensor'>
tf.Tensor([2 3 4 0 1 2 3], shape=(7,), dtype=int64)
<class 'tensorflow.python.framework.ops.EagerTensor'>
tf.Tensor([4], shape=(1,), dtype=int64)
<class 'tensorflow.python.framework.ops.EagerTensor'>


<_FilterDataset element_spec=TensorSpec(shape=(None,), dtype=tf.int64, name=None)>
<class 'tensorflow.python.data.ops.filter_op._FilterDataset'>
tf.Tensor([2 3 4 0 1 2 3], shape=(7,), dtype=int64)
<class 'tensorflow.python.framework.ops.EagerTensor'>


# Using take() method of tf.data.Dataset class

In [None]:
dataset = tf.data.Dataset.range(5)
ds = dataset.repeat(2).batch(4, drop_remainder = True)
print(type(ds))
for item in ds:
  print(item)
  print(type(item))

ds = ds.take(1)
print("\n")
print(ds)
print(type(ds))
for item in ds:
  print(item)
  print(type(item))

ds = dataset.take(2)
print("\n")
print(ds)
print(type(ds))
for item in ds:
  print(item)
  print(type(item))

<class 'tensorflow.python.data.ops.batch_op._BatchDataset'>
tf.Tensor([0 1 2 3], shape=(4,), dtype=int64)
<class 'tensorflow.python.framework.ops.EagerTensor'>
tf.Tensor([4 0 1 2], shape=(4,), dtype=int64)
<class 'tensorflow.python.framework.ops.EagerTensor'>


<_TakeDataset element_spec=TensorSpec(shape=(4,), dtype=tf.int64, name=None)>
<class 'tensorflow.python.data.ops.take_op._TakeDataset'>
tf.Tensor([0 1 2 3], shape=(4,), dtype=int64)
<class 'tensorflow.python.framework.ops.EagerTensor'>


<_TakeDataset element_spec=TensorSpec(shape=(), dtype=tf.int64, name=None)>
<class 'tensorflow.python.data.ops.take_op._TakeDataset'>
tf.Tensor(0, shape=(), dtype=int64)
<class 'tensorflow.python.framework.ops.EagerTensor'>
tf.Tensor(1, shape=(), dtype=int64)
<class 'tensorflow.python.framework.ops.EagerTensor'>


# Learning about shuffle() method

In [None]:
dataset = tf.data.Dataset.range(10)
print(dataset)
print(type(dataset))

ds = dataset.shuffle(buffer_size = 10, seed = 42) # Using buffer_size that is same as the length of dataset helps in perfect shuffling.
print(ds)
print(type(ds))

for item in ds:
  print(item)
  # print(type(item)) # EagerTensor

sam = tf.data.Dataset.from_tensor_slices(tf.range(10)).shuffle(10, seed = 42)
print("\n")
print(sam)
print(type(sam))

for item in sam:
  print(item)
  # print(type(item)) # EagerTensor


<_RangeDataset element_spec=TensorSpec(shape=(), dtype=tf.int64, name=None)>
<class 'tensorflow.python.data.ops.range_op._RangeDataset'>
<_ShuffleDataset element_spec=TensorSpec(shape=(), dtype=tf.int64, name=None)>
<class 'tensorflow.python.data.ops.shuffle_op._ShuffleDataset'>
tf.Tensor(8, shape=(), dtype=int64)
tf.Tensor(4, shape=(), dtype=int64)
tf.Tensor(7, shape=(), dtype=int64)
tf.Tensor(2, shape=(), dtype=int64)
tf.Tensor(0, shape=(), dtype=int64)
tf.Tensor(6, shape=(), dtype=int64)
tf.Tensor(5, shape=(), dtype=int64)
tf.Tensor(3, shape=(), dtype=int64)
tf.Tensor(9, shape=(), dtype=int64)
tf.Tensor(1, shape=(), dtype=int64)


<_ShuffleDataset element_spec=TensorSpec(shape=(), dtype=tf.int32, name=None)>
<class 'tensorflow.python.data.ops.shuffle_op._ShuffleDataset'>
tf.Tensor(8, shape=(), dtype=int32)
tf.Tensor(4, shape=(), dtype=int32)
tf.Tensor(7, shape=(), dtype=int32)
tf.Tensor(2, shape=(), dtype=int32)
tf.Tensor(0, shape=(), dtype=int32)
tf.Tensor(6, shape=(), dtype=int32)

# Using shuffle(), repeat() and batch() method together.

In [None]:
dataset = tf.data.Dataset.range(5).repeat(2).batch(4)
print(dataset)
print(type(dataset))

for item in dataset:
  print(item)
  # print(type(item))

# Now using shuffle() method too.
ds = dataset.shuffle(buffer_size = 4, reshuffle_each_iteration = False)
print("\n")
print(ds)
print(type(ds))
for item in ds:
  print(item)
  # print(type(item))

dataset = tf.data.Dataset.range(5).repeat(2)
print("\n")
print(dataset)
print(type(dataset))
ds = dataset.shuffle(buffer_size = 10, reshuffle_each_iteration = False, seed = 42).repeat(2).batch(10)
print(ds)
print(type(ds))
for item in ds:
  print(item)
  # print(type(item))

dataset = tf.data.Dataset.range(5).repeat(2)
print("\n")
print(dataset)
print(type(dataset))
ds = dataset.shuffle(buffer_size = 10, reshuffle_each_iteration = True).repeat(2).batch(10)
print(ds)
print(type(ds))
for item in ds:
  print(item)
  # print(type(item))


<_BatchDataset element_spec=TensorSpec(shape=(None,), dtype=tf.int64, name=None)>
<class 'tensorflow.python.data.ops.batch_op._BatchDataset'>
tf.Tensor([0 1 2 3], shape=(4,), dtype=int64)
tf.Tensor([4 0 1 2], shape=(4,), dtype=int64)
tf.Tensor([3 4], shape=(2,), dtype=int64)


<_ShuffleDataset element_spec=TensorSpec(shape=(None,), dtype=tf.int64, name=None)>
<class 'tensorflow.python.data.ops.shuffle_op._ShuffleDataset'>
tf.Tensor([4 0 1 2], shape=(4,), dtype=int64)
tf.Tensor([0 1 2 3], shape=(4,), dtype=int64)
tf.Tensor([3 4], shape=(2,), dtype=int64)


<_RepeatDataset element_spec=TensorSpec(shape=(), dtype=tf.int64, name=None)>
<class 'tensorflow.python.data.ops.repeat_op._RepeatDataset'>
<_BatchDataset element_spec=TensorSpec(shape=(None,), dtype=tf.int64, name=None)>
<class 'tensorflow.python.data.ops.batch_op._BatchDataset'>
tf.Tensor([0 4 2 3 2 1 4 3 1 0], shape=(10,), dtype=int64)
tf.Tensor([0 4 2 3 2 1 4 3 1 0], shape=(10,), dtype=int64)


<_RepeatDataset element_spec=TensorS

# Using os, shutil and pathlib modules to create a file named xyz.txt that helps us understand shuffling in large datasets.

In [None]:
import os
import shutil
import pathlib
import random
from pathlib import Path

In [None]:
base = os.getcwd()
print(base)
file_name = "xyz.txt"
file_path = os.path.join(base, file_name)

# Open in write mode ('w'). This creates the file if it doesn't exist,
# and overwrites it if it does exist.

with open(file_path, "w") as f:
  # Write header
  f.write("Feature,Target\n")

  for i in range(1, 51):
    target = 1 if (i % 2 == 1) else 0 # Here, 0 == even and 1 == odd.
    f.write(f"{i},{target}\n")

/content


In [None]:
# Now reading multiple lines from the xyz.txt file as list with the help of readlines() method and shuffling them using random.shuffle(x)

random.seed(42) # For reproducibility
base = os.getcwd()
xyz_file = os.path.join(base, "xyz.txt")
shuffle_file = os.path.join(base, "shuffle.txt")

with open (xyz_file, "r") as f:
  lines = f.readlines()
  print(type(lines))

header = lines[0]
data_lines = lines[1:]
print(data_lines) # You can check this it's pretty awesome.

random.shuffle(data_lines)
print(data_lines)

with open(shuffle_file, "w") as f:
  f.writelines(data_lines)

# Split into 5 files with 10 pairs each, let's use Path class for learning and entertaintment purposes:
base = Path(base)
print(type(base)) # <class 'pathlib.PosixPath'> --> I guess jupyter notebook is based on linux.

files_no = 5
pairs_per_file = 10
for i in range(files_no):
  start_index = i * pairs_per_file
  end_index = start_index + pairs_per_file
  file_path = base / f"file_path{i + 1}.txt"
  print(type(file_path))
  print(file_path)
  with file_path.open("w") as f:
    f.write(header)
    f.writelines(data_lines[start_index:end_index])

<class 'list'>
['1,1\n', '2,0\n', '3,1\n', '4,0\n', '5,1\n', '6,0\n', '7,1\n', '8,0\n', '9,1\n', '10,0\n', '11,1\n', '12,0\n', '13,1\n', '14,0\n', '15,1\n', '16,0\n', '17,1\n', '18,0\n', '19,1\n', '20,0\n', '21,1\n', '22,0\n', '23,1\n', '24,0\n', '25,1\n', '26,0\n', '27,1\n', '28,0\n', '29,1\n', '30,0\n', '31,1\n', '32,0\n', '33,1\n', '34,0\n', '35,1\n', '36,0\n', '37,1\n', '38,0\n', '39,1\n', '40,0\n', '41,1\n', '42,0\n', '43,1\n', '44,0\n', '45,1\n', '46,0\n', '47,1\n', '48,0\n', '49,1\n', '50,0\n']
['26,0\n', '24,0\n', '20,0\n', '12,0\n', '5,1\n', '46,0\n', '27,1\n', '10,0\n', '30,0\n', '17,1\n', '32,0\n', '22,0\n', '13,1\n', '4,0\n', '40,0\n', '39,1\n', '11,1\n', '25,1\n', '36,0\n', '1,1\n', '44,0\n', '19,1\n', '34,0\n', '49,1\n', '42,0\n', '31,1\n', '29,1\n', '21,1\n', '23,1\n', '43,1\n', '47,1\n', '37,1\n', '33,1\n', '45,1\n', '14,0\n', '50,0\n', '48,0\n', '3,1\n', '28,0\n', '38,0\n', '6,0\n', '35,1\n', '7,1\n', '9,1\n', '15,1\n', '16,0\n', '18,0\n', '2,0\n', '8,0\n', '41,1\n']
<

In [None]:
file_path = tf.data.Dataset.list_files("file_path*.txt", seed = 42)
print(file_path)
print(type(file_path))
for item in file_path:
  print(item)
  # print(type(item))

<_ShuffleDataset element_spec=TensorSpec(shape=(), dtype=tf.string, name=None)>
<class 'tensorflow.python.data.ops.shuffle_op._ShuffleDataset'>
tf.Tensor(b'./file_path4.txt', shape=(), dtype=string)
tf.Tensor(b'./file_path2.txt', shape=(), dtype=string)
tf.Tensor(b'./file_path3.txt', shape=(), dtype=string)
tf.Tensor(b'./file_path1.txt', shape=(), dtype=string)
tf.Tensor(b'./file_path5.txt', shape=(), dtype=string)


In [None]:
file_path = tf.data.Dataset.list_files("file_path*.txt", seed = 42, shuffle = False)
print(file_path)
print(type(file_path))
for item in file_path:
  print(item)
  # print(type(item))

<_TensorSliceDataset element_spec=TensorSpec(shape=(), dtype=tf.string, name=None)>
<class 'tensorflow.python.data.ops.from_tensor_slices_op._TensorSliceDataset'>
tf.Tensor(b'./file_path1.txt', shape=(), dtype=string)
tf.Tensor(b'./file_path2.txt', shape=(), dtype=string)
tf.Tensor(b'./file_path3.txt', shape=(), dtype=string)
tf.Tensor(b'./file_path4.txt', shape=(), dtype=string)
tf.Tensor(b'./file_path5.txt', shape=(), dtype=string)


# Let's learn about interleave() method in input pipeline

In [None]:
file_dataset = tf.data.Dataset.list_files("file_path*.txt", shuffle = False)
print(file_dataset)
print(type(file_dataset))
for item in file_dataset:
  print(item)
  # print(type(item))

ds = file_dataset.interleave(
    lambda file_path: tf.data.TextLineDataset(file_path).skip(1), # skipping header that is feature,target
    cycle_length = 5,
    block_length = 1,
    # num_parallel_calls = None
    num_parallel_calls = tf.data.AUTOTUNE # for paralllelism using threads, Note --> Comment out this line so you can see the difference.
)
# To be clear, at this stage there will be seven datasets in all:
# the filepath dataset, the interleave dataset, and the five TextLineDatasets
# created internally by the interleave dataset.

print("\n")
print(ds)
print(type(ds))
for item in ds.take(10):
  print(item)
  # print(type(item))
print(ds.element_spec)

<_TensorSliceDataset element_spec=TensorSpec(shape=(), dtype=tf.string, name=None)>
<class 'tensorflow.python.data.ops.from_tensor_slices_op._TensorSliceDataset'>
tf.Tensor(b'./file_path1.txt', shape=(), dtype=string)
tf.Tensor(b'./file_path2.txt', shape=(), dtype=string)
tf.Tensor(b'./file_path3.txt', shape=(), dtype=string)
tf.Tensor(b'./file_path4.txt', shape=(), dtype=string)
tf.Tensor(b'./file_path5.txt', shape=(), dtype=string)


<_ParallelInterleaveDataset element_spec=TensorSpec(shape=(), dtype=tf.string, name=None)>
<class 'tensorflow.python.data.ops.interleave_op._ParallelInterleaveDataset'>
tf.Tensor(b'26,0', shape=(), dtype=string)
tf.Tensor(b'32,0', shape=(), dtype=string)
tf.Tensor(b'44,0', shape=(), dtype=string)
tf.Tensor(b'47,1', shape=(), dtype=string)
tf.Tensor(b'6,0', shape=(), dtype=string)
tf.Tensor(b'24,0', shape=(), dtype=string)
tf.Tensor(b'22,0', shape=(), dtype=string)
tf.Tensor(b'19,1', shape=(), dtype=string)
tf.Tensor(b'37,1', shape=(), dtype=string)
tf.Te

# Let's learn about tf.data.TextLineDataset

In [None]:
 import os
from pathlib import Path

# 1. Get the current working directory path
base = os.getcwd()

# 2. Create the Path object for the directory
base_path = Path(base)

# 3. Join the directory path with a NEW filename
file_path = base_path / "data.txt"

# 4. Open the NEW file for writing
with file_path.open(mode="w") as f:
    f.writelines(["Hello, World!\n", "This is line two.\n", "The final line.\n"])

print(f"Successfully wrote data to: {file_path}")

# Now using tf.data.TextLineDataset(

dataset = tf.data.TextLineDataset("data.txt")
print(dataset)
print(type(dataset))

print("\n")
for item in dataset:
  print(item)
  print(type(item))

print("\n")
for item in dataset:
  print(item.numpy())

print("\n")
for item in dataset:
  print(item.numpy().decode("utf-8"))

Successfully wrote data to: /content/data.txt
<TextLineDatasetV2 element_spec=TensorSpec(shape=(), dtype=tf.string, name=None)>
<class 'tensorflow.python.data.ops.readers.TextLineDatasetV2'>


tf.Tensor(b'Hello, World!', shape=(), dtype=string)
<class 'tensorflow.python.framework.ops.EagerTensor'>
tf.Tensor(b'This is line two.', shape=(), dtype=string)
<class 'tensorflow.python.framework.ops.EagerTensor'>
tf.Tensor(b'The final line.', shape=(), dtype=string)
<class 'tensorflow.python.framework.ops.EagerTensor'>


b'Hello, World!'
b'This is line two.'
b'The final line.'


Hello, World!
This is line two.
The final line.


In [None]:
def working():
  defs = [0.] * 8 + [tf.constant([], dtype = tf.float32)]
  print(defs)
  print(type(defs))
working()

[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, <tf.Tensor: shape=(0,), dtype=float32, numpy=array([], dtype=float32)>]
<class 'list'>


In [None]:
def working_element_spec():
  # sam = (tf.constant([5, 6], dtype = tf.float32), tf.constant([6])) # ValueError: Dimensions 2 and 1 are not compatible
  sam = (tf.constant([[5, 6], [5, 5]], dtype = tf.float32), tf.constant([1, 0]))
  dataset = tf.data.Dataset.from_tensor_slices(sam)
  print(dataset.element_spec)
  for item in dataset:
    print(item)
    print(type(item))
working_element_spec()

(TensorSpec(shape=(2,), dtype=tf.float32, name=None), TensorSpec(shape=(), dtype=tf.int32, name=None))
(<tf.Tensor: shape=(2,), dtype=float32, numpy=array([5., 6.], dtype=float32)>, <tf.Tensor: shape=(), dtype=int32, numpy=1>)
<class 'tuple'>
(<tf.Tensor: shape=(2,), dtype=float32, numpy=array([5., 5.], dtype=float32)>, <tf.Tensor: shape=(), dtype=int32, numpy=0>)
<class 'tuple'>


# Let's understand the tf.io.decode_csv() function

In [None]:
file_names = tf.data.Dataset.list_files("file_path*.txt", shuffle = True, seed = 19)
print(file_names)
print(file_names.element_spec)
for item in file_names:
  print(item)

# Now using interleave method
dataset = file_names.interleave(
    lambda sam: tf.data.TextLineDataset(sam).skip(1),
    cycle_length = 5,
    block_length = 1,
    num_parallel_calls = None
)
print("\n")
for item in dataset.take(10):
  print(item)

def parse_csv_line(line):
  defs = [0.] + [tf.constant([], dtype = tf.float32)]
  fields = tf.io.decode_csv(line, record_defaults = defs)
  return tf.stack(fields[:-1]), tf.stack(fields[-1:])

def preprocess(line):
  X, y = parse_csv_line(line)
  return X, y

preprocess(b"30,0")

<_ShuffleDataset element_spec=TensorSpec(shape=(), dtype=tf.string, name=None)>
TensorSpec(shape=(), dtype=tf.string, name=None)
tf.Tensor(b'./file_path1.txt', shape=(), dtype=string)
tf.Tensor(b'./file_path4.txt', shape=(), dtype=string)
tf.Tensor(b'./file_path5.txt', shape=(), dtype=string)
tf.Tensor(b'./file_path2.txt', shape=(), dtype=string)
tf.Tensor(b'./file_path3.txt', shape=(), dtype=string)


tf.Tensor(b'26,0', shape=(), dtype=string)
tf.Tensor(b'47,1', shape=(), dtype=string)
tf.Tensor(b'32,0', shape=(), dtype=string)
tf.Tensor(b'44,0', shape=(), dtype=string)
tf.Tensor(b'6,0', shape=(), dtype=string)
tf.Tensor(b'24,0', shape=(), dtype=string)
tf.Tensor(b'37,1', shape=(), dtype=string)
tf.Tensor(b'22,0', shape=(), dtype=string)
tf.Tensor(b'19,1', shape=(), dtype=string)
tf.Tensor(b'35,1', shape=(), dtype=string)


(<tf.Tensor: shape=(1,), dtype=float32, numpy=array([30.], dtype=float32)>,
 <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.], dtype=float32)>)

In [None]:
def working_abovecell(line):
  # defs = [0] + [tf.constant([], dtype = tf.float32)] # This changes the dtype to int32
  defs = [0.] + [tf.constant([], dtype = tf.float32)] # This changes the dtype to float32
  print(defs)
  fields = tf.io.decode_csv(line, record_defaults = defs)
  print(fields)
  print(type(fields))
  return tf.stack(fields[:-1]), tf.stack(fields[-1:])
hello = working_abovecell(b"31,1")
print(hello)
print(type(hello))

[0.0, <tf.Tensor: shape=(0,), dtype=float32, numpy=array([], dtype=float32)>]
[<tf.Tensor: shape=(), dtype=float32, numpy=31.0>, <tf.Tensor: shape=(), dtype=float32, numpy=1.0>]
<class 'list'>
(<tf.Tensor: shape=(1,), dtype=float32, numpy=array([31.], dtype=float32)>, <tf.Tensor: shape=(1,), dtype=float32, numpy=array([1.], dtype=float32)>)
<class 'tuple'>
