<a href="https://colab.research.google.com/github/CHRISJUMA-9/Tensor-Flow-Fundamentals/blob/main/03_All_about_Tensors_and_Variables.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# All about Tensors and Variables
Outline
- Basics
- Initialization and Casting
- Indexing
- Maths operations
- Linear Algebra operations
- Common TensorFlow functions
- Ragged Tensors
- Sparse Tensors
- String Tensors

#TENSORS

## Basics, initialization and casting

In [None]:
import tensorflow as tf
import numpy as np

In [None]:
# Set random seed, basically random set seed ensures you get the same output each time you run the code
tf.random.set_seed(5)
print(tf.random.uniform(shape=[3,], maxval=5, dtype=tf.int32, seed=10))
print(tf.random.uniform(shape=[3,], maxval=5, dtype=tf.int32, seed=10))
print(tf.random.uniform(shape=[3,], maxval=5, dtype=tf.int32, seed=10))
print(tf.random.uniform(shape=[3,], maxval=5, dtype=tf.int32, seed=10))

tf.Tensor([4 3 1], shape=(3,), dtype=int32)
tf.Tensor([4 3 2], shape=(3,), dtype=int32)
tf.Tensor([1 1 1], shape=(3,), dtype=int32)
tf.Tensor([1 3 3], shape=(3,), dtype=int32)


In [None]:
tensor_zero_d = tf.constant(4)
print(tensor_zero_d)

tf.Tensor(4, shape=(), dtype=int32)


In [None]:
tensor_one_d = tf.constant([2,0.,-3,8,90],dtype=tf.float32)
casted_tensor_one_d = tf.cast(tensor_one_d,dtype=tf.bool)
print(tensor_one_d)
print(casted_tensor_one_d)

tf.Tensor([ 2.  0. -3.  8. 90.], shape=(5,), dtype=float32)
tf.Tensor([ True False  True  True  True], shape=(5,), dtype=bool)


In [None]:
tensor_bool = tf.constant([True, True, False])
print(tensor_bool)

tf.Tensor([ True  True False], shape=(3,), dtype=bool)


In [None]:
tensor_string = tf.constant(["hello Chris, welcome to the AI world", "tschuss"])
print(tensor_string)

tf.Tensor([b'hello Chris, welcome to the AI world' b'tschuss'], shape=(2,), dtype=string)


In [None]:
tensor_two_d = tf.constant([
    [1,2,6],
    [3,5,-1],
    [1,5,6],
    [2,3,8]
])
print(tensor_two_d)

tf.Tensor(
[[ 1  2  6]
 [ 3  5 -1]
 [ 1  5  6]
 [ 2  3  8]], shape=(4, 3), dtype=int32)


In [None]:
tensor_three_d = tf.constant([
    [[1,2,0],
     [3,5,-1]],

    [[10,2,0],
     [1,0,2]],

    [[5,8,0],
     [2,7,0]],

    [[2,1,9],
     [4,-3,32]]

])
print(tensor_three_d)

tf.Tensor(
[[[ 1  2  0]
  [ 3  5 -1]]

 [[10  2  0]
  [ 1  0  2]]

 [[ 5  8  0]
  [ 2  7  0]]

 [[ 2  1  9]
  [ 4 -3 32]]], shape=(4, 2, 3), dtype=int32)


In [None]:
tensor_three_d.ndim

3

In [None]:
tensor_two_d.shape

TensorShape([4, 3])

In [None]:
tensor_four_d = tf.constant([
    [
     [[1,2,0],
     [3,5,-1]],

    [[10,2,0],
     [1,0,2]],

    [[5,8,0],
     [2,7,0]],

    [[2,1,9],
     [4,-3,32]]

],

      [[[13,26,0],
     [3,5,-1]],

    [[10,2,0],
     [1,0,23]],

    [[5,8,0],
     [2,73,0]],

    [[2,1,9],
     [4,-30,32]]

 ],
       [[[103,26,0],
     [3,5,-1]],

    [[10,2,0],
     [1,70,23]],

    [[5,8,0],
     [2,73,0]],

    [[28,1,9],
     [4,-30,32]]
],
])
print(tensor_four_d)

tf.Tensor(
[[[[  1   2   0]
   [  3   5  -1]]

  [[ 10   2   0]
   [  1   0   2]]

  [[  5   8   0]
   [  2   7   0]]

  [[  2   1   9]
   [  4  -3  32]]]


 [[[ 13  26   0]
   [  3   5  -1]]

  [[ 10   2   0]
   [  1   0  23]]

  [[  5   8   0]
   [  2  73   0]]

  [[  2   1   9]
   [  4 -30  32]]]


 [[[103  26   0]
   [  3   5  -1]]

  [[ 10   2   0]
   [  1  70  23]]

  [[  5   8   0]
   [  2  73   0]]

  [[ 28   1   9]
   [  4 -30  32]]]], shape=(3, 4, 2, 3), dtype=int32)


In [None]:
# How to convert numpy array into a tensor
import numpy as np

np_array = np.array([1,2,4])
print(np_array)

[1 2 4]


In [None]:
converted_tensor = tf.convert_to_tensor(np_array)
print(converted_tensor)

tf.Tensor([1 2 4], shape=(3,), dtype=int64)


In [None]:
# The eye identity for constructing identity matrix, or a batch of matrices
eye_tensor = tf.eye(
    num_rows=6,
    num_columns=None,
    batch_shape=[3,2],
    dtype=tf.dtypes.bool,
    name=None
)
print(eye_tensor)

tf.Tensor(
[[[[ True False False False False False]
   [False  True False False False False]
   [False False  True False False False]
   [False False False  True False False]
   [False False False False  True False]
   [False False False False False  True]]

  [[ True False False False False False]
   [False  True False False False False]
   [False False  True False False False]
   [False False False  True False False]
   [False False False False  True False]
   [False False False False False  True]]]


 [[[ True False False False False False]
   [False  True False False False False]
   [False False  True False False False]
   [False False False  True False False]
   [False False False False  True False]
   [False False False False False  True]]

  [[ True False False False False False]
   [False  True False False False False]
   [False False  True False False False]
   [False False False  True False False]
   [False False False False  True False]
   [False False False False False  Tru

In [None]:
# tf.fill method which creates a tensor filled with a scalar value
# tf.fill(
   # dims, value, name=None, layout=None
#)
fill_tensor = tf.fill(
    [2,3,4],9, name=None
)
print(fill_tensor)

tf.Tensor(
[[[9 9 9 9]
  [9 9 9 9]
  [9 9 9 9]]

 [[9 9 9 9]
  [9 9 9 9]
  [9 9 9 9]]], shape=(2, 3, 4), dtype=int32)


In [None]:
# tf.ones which creates a tensor with all elements set to one
ones_tensor = tf.ones(
    [5,4],
    dtype=tf.dtypes.float32,
    name=None
)
print(ones_tensor)

tf.Tensor(
[[1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]], shape=(5, 4), dtype=float32)


In [None]:
#tf.ones_like which creates tensor of all ones that has the same shape as input
ones_like_tensor = tf.ones_like(fill_tensor)
print(ones_like_tensor)

tf.Tensor(
[[[1 1 1 1]
  [1 1 1 1]
  [1 1 1 1]]

 [[1 1 1 1]
  [1 1 1 1]
  [1 1 1 1]]], shape=(2, 3, 4), dtype=int32)


In [None]:
# tf.zeros which creates a tensor with all eleents set to zero
zeros_tensor = tf.zeros(
    [3,6],
    dtype=tf.dtypes.float32,
    name=None
)
print(zeros_tensor)

tf.Tensor(
[[0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]], shape=(3, 6), dtype=float32)


In [None]:
# tf.shape returns a tensor shape containing the shape of the input tensor
tensor_shape = tf.shape(
    [4,2,3],
    out_type=tf.dtypes.int32,
    name=None
)
print(tensor_shape)

tf.Tensor([3], shape=(1,), dtype=int32)


In [None]:
# tf.rank which returns the rank of a tensor
t = tf.constant([[1,1,1], [2,2,2]])
tf.rank(t)

<tf.Tensor: shape=(), dtype=int32, numpy=2>

In [None]:
# tf.size method which returns the size of the tensor
tensor_size = tf.constant([2,3,4])
tf.size(tensor_size,out_type=tf.float32)

<tf.Tensor: shape=(), dtype=float32, numpy=3.0>

In [None]:
# Creating our own random tensors using tf.random.normal
# The output values are drawn from a normal distribution
random_tensor = tf.random.normal(
    [3,2],
    mean=100.0,
    stddev=1.0,
    dtype=tf.dtypes.float32,
    seed=None,
    name=None
)
print(random_tensor)

tf.Tensor(
[[ 99.819695  99.04971 ]
 [ 99.96036   99.25746 ]
 [101.32315   99.381454]], shape=(3, 2), dtype=float32)


In [None]:
# tf.random.uniform which outputs random values from a uniform distribution
random_tensor = tf.random.uniform(
    [5,4],
    minval=0,
    maxval=999,
    dtype=tf.dtypes.int32,
    seed=None,
    name=None
)
print(random_tensor)

tf.Tensor(
[[646  16 602 635]
 [ 69  91 647 141]
 [418 449 571 755]
 [899 180 816 655]
 [859 386 806 599]], shape=(5, 4), dtype=int32)


In [None]:
# Using the tf.random.set_seed function


## Indexing

In [None]:
# Determining the tensor indexing
tensor_indexed = tf.constant([3,6,2,4,6,66,7])
print(tensor_indexed)
print(tensor_indexed[0:4])
print(tensor_indexed[1:5])
print(tensor_indexed[1:5+1])
print(tensor_indexed[1:6:2]) # start at index 1 value 6, skip index 2, next index 3 value 4, skip index 4, next index 5 value 66, stop because next would be 7 outside the 1:6 range
print(tensor_indexed[3:-1]) # start at index 3 then stop before the last index which is 7

tf.Tensor([ 3  6  2  4  6 66  7], shape=(7,), dtype=int32)
tf.Tensor([3 6 2 4], shape=(4,), dtype=int32)
tf.Tensor([6 2 4 6], shape=(4,), dtype=int32)
tf.Tensor([ 6  2  4  6 66], shape=(5,), dtype=int32)
tf.Tensor([ 6  4 66], shape=(3,), dtype=int32)
tf.Tensor([ 4  6 66], shape=(3,), dtype=int32)


In [None]:
# NOTE getting the index of rows and column ([rows],[column])
tensor_two_d = tf.constant([
    [1,2,6], # From row 0 → columns 0 & 1 → [1, 2]
    [3,5,-1], # From row 1 → columns 0 & 1 → [3, 5]
    [1,5,6], # From row 2 → columns 0 & 1 → [1, 5]
    [2,3,8]
])
print(tensor_two_d[0:3,0:2]) # tensor[row_start:row_end, col_start:col_end]

tf.Tensor(
[[1 2]
 [3 5]
 [1 5]], shape=(3, 2), dtype=int32)


In [None]:
# Indexing in a 3d tensor
tensor_three_d = tf.constant([
    [[1,2,0],
     [3,5,-1]],

    [[10,2,0],
     [1,0,2]],

    [[5,8,0],
     [2,7,0]],

    [[2,1,9],
     [4,-3,32]]

])
print(tensor_three_d[0:2, ... , 2 ]) #

tf.Tensor(
[[ 0 -1]
 [ 0  2]], shape=(2, 2), dtype=int32)


## Maths operations

###tf.abs (absolute value)

In [None]:
# Getting the absolute (positive values only) value using x-abs
x_abs = tf.constant([-2.25, 3.25])
tf.abs(x_abs) # returns absolute value of each of the elemnts in the tensor

<tf.Tensor: shape=(2,), dtype=float32, numpy=array([2.25, 3.25], dtype=float32)>

In [None]:
x_abs_complex = tf.constant([-2.25 + 4.75j])
tf.abs(x_abs_complex) # basically squares the values then finds teh square root

<tf.Tensor: shape=(1,), dtype=float64, numpy=array([5.25594901])>

In [None]:
# Prove of above through square root
tf.sqrt(((-2.25)**2 + 4.75**2))

<tf.Tensor: shape=(), dtype=float32, numpy=5.255949020385742>

###tf.add, multiplication, division, subraction

In [None]:
# Addition, multiplication, division, subraction
x_1 = tf.constant([5,3,6,6,4,6], dtype = tf.int32)
x_2 = tf.constant([7,6,2,6,7,11], dtype = tf.int32)
print(tf.add(x_1, x_2)) # you can also use (subtract, multiply, divided) in place of add

tf.Tensor([12  9  8 12 11 17], shape=(6,), dtype=int32)


###tf.divide_no_nan()

In [None]:
# divide no nan which avoids division by zero errors
x_1 = tf.constant([5,3,6,6,4,6], dtype = tf.float32)
x_2 = tf.constant([7,6,2,6,7,11], dtype = tf.float32)
print(tf.math.divide_no_nan(x_1, x_2)) # i.e it divideds element wise if value is 0/0 it returns 0.0 instaed of error or NaN

tf.Tensor([0.71428573 0.5        3.         1.         0.5714286  0.54545456], shape=(6,), dtype=float32)


In [None]:
# Broadcasting, if the tensors are of different shapes
x_1 = tf.constant([5,3,6,6,4,6], dtype = tf.float32)
x_2 = tf.constant([7], dtype = tf.float32)
x_2_stretched = tf.constant([7,7,7,7,7,7], dtype = tf.float32)
print(tf.math.add(x_1, x_2))
print(tf.math.add(x_1, x_2_stretched))

tf.Tensor([12. 10. 13. 13. 11. 13.], shape=(6,), dtype=float32)
tf.Tensor([12. 10. 13. 13. 11. 13.], shape=(6,), dtype=float32)


###Different shapes multiplication

In [None]:
# different shapes multiplication
x_1 = tf.constant([[5,3,6,6,4,6],], dtype = tf.float32)
x_2 = tf.constant([[7], [5], [3]], dtype = tf.float32)

print(x_1.shape)
print(x_2.shape)
print(tf.math.multiply(x_1, x_2))

(1, 6)
(3, 1)
tf.Tensor(
[[35. 21. 42. 42. 28. 42.]
 [25. 15. 30. 30. 20. 30.]
 [15.  9. 18. 18. 12. 18.]], shape=(3, 6), dtype=float32)


###Min and maximum arguments (tf.math.maximum/minimum)

In [None]:
# min and maximum arguments
# tf.math.maximum/minimum which gives the maximum/minimum of x and y element wise it gives the position of the index of the maximum value
x_argmax = tf.constant([[2, 20, 30, 3, 6],
                        [3, 11, 16, 1, 8],
                        [14, 45, 23, 5, 27]])

print(x_argmax.shape) # for minimum value uses (argmin)
print(tf.math.argmax(x_argmax, 0)) # axis=0 → find the index (row number) of the maximum value in each column.

(3, 5)
tf.Tensor([2 2 0 2 2], shape=(5,), dtype=int64)


###Power matricx [tf.pow()]

In [None]:
# Let's look at power matrix
x = tf.constant([[2, 2], [3, 3]])
y = tf.constant([[3, 0], [1, 4]]) # basically its 2^3, 2^0, 3^1, 3^4
tf.pow(x, y)

<tf.Tensor: shape=(2, 2), dtype=int32, numpy=
array([[ 8,  1],
       [ 3, 81]], dtype=int32)>

In [None]:
tf.pow(tf.constant(2) ,tf.constant(3))

<tf.Tensor: shape=(), dtype=int32, numpy=8>

### tf.math.reduce_sum()

In [None]:
# Let's look at the tf.math.reduce_sum which computes the sum of elements across dimensions of a tensor
tensor_two_d = tf.constant([ # basically sums up evertthing to get 35
    [1,2,0], # you can also use max, min, etc
    [3,5,-1],
    [1,5,6],
    [2,3,8]
])

print(tf.math.reduce_sum(tensor_two_d, axis=None, keepdims=False, name=None))

tf.Tensor(35, shape=(), dtype=int32)


In [None]:
# Let's look at the tf.math.reduce_sum which computes the sum of elements across dimensions of a tensor
tensor_two_d = tf.constant([ # basically sums up everything to get 35
    [1,-2,0], # you can also use max, min, mean, standaerd diviation e.t.c
    [3,5,100],
    [1,5,6],
    [2,3,8]
])
# With axis = 0 we will be adding column wise
print(tensor_two_d.shape)
print(tf.math.reduce_sum(tensor_two_d, axis=0, keepdims=False, name=None))

(4, 3)
tf.Tensor([  7  11 114], shape=(3,), dtype=int32)


###tf.math.top_k()

In [None]:
# Let's look at tf.math.top_k which finds values and indices of the k largest entries for the last dimension
# returns the top k largest elements along the last dimension of the 2D tensor tensor_two_d
tf.math.top_k(tensor_two_d, k = 2) # Generally it returns the top 1 value and it's index

TopKV2(values=<tf.Tensor: shape=(4, 2), dtype=int32, numpy=
array([[  1,   0],
       [100,   5],
       [  6,   5],
       [  8,   3]], dtype=int32)>, indices=<tf.Tensor: shape=(4, 2), dtype=int32, numpy=
array([[0, 2],
       [2, 1],
       [2, 1],
       [2, 1]], dtype=int32)>)

## Linear Algebra Operations

###tf.linalg.matmul()

In [None]:
# Let's look at the tf.linalg.matmul, note that this is different from normal multiplication
# It multiplies matrix a by matrix b, producing a*b
x_1 = tf.constant([[1,2,0],
                   [3,5,-1]])
x_2 = tf.constant([[1,2,0,9],
                   [3,5,-1,6],
                  [4,5,6,3]])

print(x_1.shape, x_2.shape)
tf.linalg.matmul(
    x_1, x_2, transpose_a=False, transpose_b=False, adjoint_a=False,
    adjoint_b=False, a_is_sparse=False, b_is_sparse=False, output_type=None, name=None
)

(2, 3) (3, 4)


<tf.Tensor: shape=(2, 4), dtype=int32, numpy=
array([[  7,  12,  -2,  21],
       [ 14,  26, -11,  54]], dtype=int32)>

In [None]:
# We can also do matrix multiplication using @
print(x_1 @ x_2) # this is matrix wise multiplication. For elemt wise multiplication we use print(x_1 * x_2)

tf.Tensor(
[[  7  12  -2  21]
 [ 14  26 -11  54]], shape=(2, 4), dtype=int32)


###tf.transpose()

In [None]:
# Now lets look at matrix transpose which is obtained by flipping the original matrix over its main diagonal
x_1

<tf.Tensor: shape=(2, 3), dtype=int32, numpy=
array([[ 1,  2,  0],
       [ 3,  5, -1]], dtype=int32)>

In [None]:
tf.transpose(x_1) # i.e rows becomes columns and columns become rows

<tf.Tensor: shape=(3, 2), dtype=int32, numpy=
array([[ 1,  3],
       [ 2,  5],
       [ 0, -1]], dtype=int32)>

###tf.linalg.band_part

In [None]:
# Let's look at the tf.linalg.band_part matrix
# Copys a tensor setting everything outside a central band in each inner most matrix to zero
tensor_two_d = tf.constant([[1,-2,0],
                            [3,5,100],
                            [1,5,6],
                            [2,3,8]], dtype=tf.float32) # Changed dtype to tf.float32


#### ( m-n <= lower) and ( n-m <= upper)
# m -- rows, n --- columns
tf.linalg.band_part(tensor_two_d, 0, 0)

<tf.Tensor: shape=(4, 3), dtype=float32, numpy=
array([[1., 0., 0.],
       [0., 5., 0.],
       [0., 0., 6.],
       [0., 0., 0.]], dtype=float32)>

###tf.linalg.inv() inverse of a matrix

In [None]:
# Now lets look at the inverse of a matrix
tensor_two_d

<tf.Tensor: shape=(4, 3), dtype=float32, numpy=
array([[  1.,  -2.,   0.],
       [  3.,   5., 100.],
       [  1.,   5.,   6.],
       [  2.,   3.,   8.]], dtype=float32)>

In [None]:
# Determining the inverse of tensor_two_d
tensor_two_d_inv = tf.constant([[1,21,3],
                           [9,11,6],
                           [7,8,9]], dtype=tf.float32)
tf.linalg.inv(tensor_two_d_inv) # for us to get an inverse of a matrix it has to be a square matrix, the number or rows must equal the number of columns

<tf.Tensor: shape=(3, 3), dtype=float32, numpy=
array([[-0.06513409,  0.21072799, -0.11877395],
       [ 0.04980843,  0.01532567, -0.02681992],
       [ 0.00638569, -0.17752236,  0.22733079]], dtype=float32)>

####Singular Value Decomposition (svd)
𝐴
A, the SVD is:

𝐴
=
𝑈
⋅
𝑆
⋅
𝑉
𝑇
A=U⋅S⋅V
T

Where:

𝑈
U = left singular vectors (orthogonal)

𝑆
S = singular values (diagonal matrix or vector)

𝑉
𝑇
V
T
  = transpose of the right singular vectors (orthogonal)



In [None]:
# Lets look at tf.linalg.svd
# Computes the singular value decompositions of one or more matrices
tensor_two_d

<tf.Tensor: shape=(4, 3), dtype=float32, numpy=
array([[  1.,  -2.,   0.],
       [  3.,   5., 100.],
       [  1.,   5.,   6.],
       [  2.,   3.,   8.]], dtype=float32)>

In [None]:
# Singular Value Decomposition
s,u,v = tf.linalg.svd(tensor_two_d)
print(s)
print(u)
print(v)

tf.Tensor([100.70144     5.8190866   1.8329673], shape=(3,), dtype=float32)
tf.Tensor(
[[-7.74652755e-04 -2.99280852e-01  7.63974667e-01]
 [ 9.94706869e-01 -9.16284993e-02 -4.46390249e-02]
 [ 6.25059605e-02  8.12258899e-01 -1.08075924e-01]
 [ 8.15519020e-02  4.92210150e-01  6.34563029e-01]], shape=(4, 3), dtype=float32)
tf.Tensor(
[[ 0.03186604  0.21008667  0.9771634 ]
 [ 0.05493734  0.97581375 -0.21158805]
 [ 0.99798125 -0.06042523 -0.01955372]], shape=(3, 3), dtype=float32)


##Einsum Operator
- lets you define element-wise, matrix, or batch operations using subscripts to describe the dimensions being used or reduced.
- A = shape 2,2 and B = shape 2,2
- Matrix multiplication A@B
- result = tf.einsum('ik,kj->ij', A, B)

In [None]:
# Let's see how Einsum Operator works
import numpy as np

In [None]:
A = np.array([[2, 6, 5,2],
              [2, -2, 2, 3],
              [1, 5, 4, 0]])

B = np.array([[2, 9, 0, 3, 0],
              [3, 6, 8, -2, 2],
              [1, 3, 5, 0, 1],
              [3, 0, 2, 0, 5]])

print(A.shape)
print(B.shape)

print("Matmul C = 1 \n")
print(np.matmul(A,B), "\n")

print("Einsum C =: \n")
print(np.einsum('ij,jk -> ik', A,B))

(3, 4)
(4, 5)
Matmul C = 1 

[[33 69 77 -6 27]
 [ 9 12  0 10 13]
 [21 51 60 -7 14]] 

Einsum C =: 

[[33 69 77 -6 27]
 [ 9 12  0 10 13]
 [21 51 60 -7 14]]


###Multiply matrix with same shape

In [None]:
# But we can also multiply a matrix with same shape as seen below
A = np.array([[2, 6, 5, 2],
              [2, -2, 2, 3],
              [1, 5, 4, 0]])

B =  np.array([[2, 9, 0, 3],
               [3, 6, 8, -2],
               [1, 3, 5, 0]])

print(A.shape, B.shape)

print("Hardamond C =: \n")
print(A*B, "\n")

print("Einsum C =:\n")
print(np.einsum('ij,ij -> ij', A,B)) # Multiply matching indices from both matrices and output a matrix of same shape.


(3, 4) (3, 4)
Hardamond C =: 

[[  4  54   0   6]
 [  6 -12  16  -6]
 [  1  15  20   0]] 

Einsum C =:

[[  4  54   0   6]
 [  6 -12  16  -6]
 [  1  15  20   0]]


###Transpose using einsum

In [None]:
# We can also transpose our matrix using the same method
A = np.array([[2, 6, 5, 2],
              [2, -2, 2, 3],
              [1, 5, 4, 0]])

print("Transposed A =: \n")
print(A.T, "\n")

print("Einsum Transpose A =: \n")
print(np.einsum("ij -> ji", A))

Transposed A =: 

[[ 2  2  1]
 [ 6 -2  5]
 [ 5  2  4]
 [ 2  3  0]] 

Einsum Transpose A =: 

[[ 2  2  1]
 [ 6 -2  5]
 [ 5  2  4]
 [ 2  3  0]]


###Working with 3D arrays using einsum

####Batch multiplication

In [None]:
A = np.array([
    [[2, 6, 5, 2],
     [2, -2, 2, 3],
     [1, 5, 4, 0]],

    [[1, 3, 1, 22],
     [0, 2, 2, 0],
     [1, 5, 4, 1]]
])

B = np.array([
    [[2, 9, 0, 3, 0],
     [3, 6, 8, -2, 2],
     [1, 3, 5, 0, 1],
     [3, 0, 1, 1, 0]]
])

print("Batch Multiplication C =: \n")
print(np.matmul(A,B), "\n")

print("Einsum C =: \n")
print(np.einsum('bij,bjk -> bik', A,B))

Batch Multiplication C =: 

[[[33 69 75 -4 17]
  [ 9 12 -3 13 -2]
  [21 51 60 -7 14]]

 [[78 30 51 19  7]
  [ 8 18 26 -4  6]
  [24 51 61 -6 14]]] 

Einsum C =: 

[[[33 69 75 -4 17]
  [ 9 12 -3 13 -2]
  [21 51 60 -7 14]]

 [[78 30 51 19  7]
  [ 8 18 26 -4  6]
  [24 51 61 -6 14]]]


###summing

In [None]:
# Summing
A = np.array([
    [[2, 6, 5, 2],
     [2, -2, 2, 3],
     [1, 5, 4, 0]],

    [[1, 3, 1, 22],
     [0, 2, 2, 0],
     [1, 5, 4, 1]]
])

print("Sum A =: \n")
print(np.sum(A), "\n")

print("Einsum A =: \n")
print(np.einsum('bij -> ', A)) # The space between (-> and ') means taht we are summing up all the values
# In that space we can also put a letter to denote only sum up elemnt of that column e.g. "j"

Sum A =: 

72 

Einsum A =: 

72


In [None]:
# For softmax Q query and K key we use the matrix computing dot produc
# Q.K^T/scaling
# Q = batchsize, s_q, modelsize
# K = batchsize, s_k, modelsize

In [None]:
Q = np.random.randn(32, 64, 512) # Queries
K = np.random.randn(32, 128, 512) # Keys

In [None]:
np.einsum("bqm,bkm -> bqk", Q, K).shape

(32, 64, 128)

###Transpose

In [None]:
A = np.random.randn(2, 4, 4, 2)# bcij
B = np.random.randn(2, 4, 4, 1)# bcik

In [None]:
np.einsum("bcik,bcij -> bckj", B, A)

array([[[[ 0.05937029,  0.42758002]],

        [[ 1.23185766,  2.57193061]],

        [[-2.04515308, -1.41332738]],

        [[ 0.93346501,  1.15455629]]],


       [[[ 0.52232111, -3.30105119]],

        [[ 0.19183529,  1.0738897 ]],

        [[ 0.90255255, -0.19420842]],

        [[-0.45649071,  0.07310286]]]])

In [None]:
np.matmul(np.transpose(B, (0,1,3,2)), A)

array([[[[ 0.05937029,  0.42758002]],

        [[ 1.23185766,  2.57193061]],

        [[-2.04515308, -1.41332738]],

        [[ 0.93346501,  1.15455629]]],


       [[[ 0.52232111, -3.30105119]],

        [[ 0.19183529,  1.0738897 ]],

        [[ 0.90255255, -0.19420842]],

        [[-0.45649071,  0.07310286]]]])

##Common Tensorflow Functions


###tf.expand_dims
- It returns a tensor with a length 1 axis inserted at index axis

In [None]:
tensor_three_d = tf.constant([
    [[1,2,0],
     [3,5,-1]],

    [[10,2,0],
     [1,0,2]],

    [[5,8,0],
     [2,7,0]],

    [[2,1,9],
     [4,-3,32]]

])
print(tensor_three_d.shape)
print(tf.expand_dims(tensor_three_d, axis = 0))

(4, 2, 3)
tf.Tensor(
[[[[ 1  2  0]
   [ 3  5 -1]]

  [[10  2  0]
   [ 1  0  2]]

  [[ 5  8  0]
   [ 2  7  0]]

  [[ 2  1  9]
   [ 4 -3 32]]]], shape=(1, 4, 2, 3), dtype=int32)


###tf.squeeze
- Removes dimensions of size 1 from the shape of a tensor

In [None]:
x = tf.constant([[[2,3,4,5]]])
print(x.shape)
x_expanded = tf.expand_dims(x, axis = 0)
print(x_expanded.shape)

(1, 1, 4)
(1, 1, 1, 4)


In [None]:
x_squeezed = tf.squeeze(x_expanded, axis = 0)
print(x_squeezed.shape)

(1, 1, 4)


###tf.reshape
- Basically reshapes a tensor

In [None]:
x_squeezed = tf.squeeze(x_expanded, axis = 0)
print(x_squeezed.shape)
print(tf.reshape(x_squeezed, [4]).shape) # NOTE you need to provide a target shape that has same number of elemnts as the imnput tensor


(1, 1, 4)
(4,)


In [None]:
x_reshape = tf.constant([[3,5,6,6],
                         [4,6,-1,2]])

tf.reshape(x_reshape, [8,1]) # Basically ensure when you multiply the shapes you get same answer e.g 4*2 and 1*8

<tf.Tensor: shape=(8, 1), dtype=int32, numpy=
array([[ 3],
       [ 5],
       [ 6],
       [ 6],
       [ 4],
       [ 6],
       [-1],
       [ 2]], dtype=int32)>

###tf.concat
- It concentr
- NOTE axis = 0 means across the rows, axis = 1 means across the columns

In [None]:
t1 = [[1,2,3],
      [4,5,6]]

t2 = [[7,8,9],
      [10,11,12]]

print(tf.constant(t1).shape)
print(tf.constant(t2).shape)

tf.concat([t1, t2], axis = 0) # It's like satcking the 2x3 matrices on top of each other


(2, 3)
(2, 3)


<tf.Tensor: shape=(4, 3), dtype=int32, numpy=
array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]], dtype=int32)>

###tf.stack( )
- Adds a new dimension (axis) at the specified position

In [None]:
tf.stack([t1,t2, t1], axis = 1)

<tf.Tensor: shape=(2, 3, 3), dtype=int32, numpy=
array([[[ 1,  2,  3],
        [ 7,  8,  9],
        [ 1,  2,  3]],

       [[ 4,  5,  6],
        [10, 11, 12],
        [ 4,  5,  6]]], dtype=int32)>

###tf.stack( ) is same as tf.concat([tf.expand_dims(t, axis)

In [None]:
tf.concat([tf.expand_dims(t, axis = 0) for t in [t1,t2,t1]], axis = 1)

<tf.Tensor: shape=(1, 6, 3), dtype=int32, numpy=
array([[[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9],
        [10, 11, 12],
        [ 1,  2,  3],
        [ 4,  5,  6]]], dtype=int32)>

###tf.pad
- Used to add padding (extra values like zeros) to a tensor along one or more dimensions.

In [None]:
t = tf.constant([[1,2,3], [4,5,6]])
paddings = tf.constant([[1,1,], # pad 1 row on top, 1 on bottom (adustable)
                        [2,2]]) # pad 2 columns left, 2 right (adjustable)

tf.pad(t, paddings, "CONSTANT", constant_values = 0)
# we can also use ("REFLECT","SYMMETRIC") in place of constant

<tf.Tensor: shape=(4, 7), dtype=int32, numpy=
array([[0, 0, 0, 0, 0, 0, 0],
       [0, 0, 1, 2, 3, 0, 0],
       [0, 0, 4, 5, 6, 0, 0],
       [0, 0, 0, 0, 0, 0, 0]], dtype=int32)>

###tf.gather
- Used to extract specific indices from a tensor along an axis (default is axis 0)
- Gather slices from params axis according to indices
- Indeces must be an integer tensors of any dimension often 1 - D

In [None]:
params = tf.constant(['p0', 'p1', 'p2', 'p3', 'p4', 'p5'])
params[1:3+1] # is shorthand for params[1:4]
# This slices from index 1 (inclusive) to index 4 (exclusive)

<tf.Tensor: shape=(3,), dtype=string, numpy=array([b'p1', b'p2', b'p3'], dtype=object)>

In [None]:
tf.gather(params, [1,2,3])

<tf.Tensor: shape=(3,), dtype=string, numpy=array([b'p1', b'p2', b'p3'], dtype=object)>

In [None]:
tf.gather(params, tf.range(1,4))

<tf.Tensor: shape=(3,), dtype=string, numpy=array([b'p1', b'p2', b'p3'], dtype=object)>

In [None]:
params = tf.constant([[0,1.0,2.0],
                      [10.0,11.0,12.0],
                      [20.0,21.0,22.0],
                      [30.0,31.0,32.0]])

print(params.shape)
tf.gather(params, [0, 2], axis = 1)
# remenber axis = 0 signifies rows and axis = 1 signifies columns
# Note; adding more brackes increases the shape of the tensor

(4, 3)


<tf.Tensor: shape=(4, 2), dtype=float32, numpy=
array([[ 0.,  2.],
       [10., 12.],
       [20., 22.],
       [30., 32.]], dtype=float32)>

###tf.gather_nd
- Let's you gather values using multidimensional indices
- like doing fancy indexing or advanced slicing in NumPy.
Gather slices from params into a Tensor with shape specified by indices

In [None]:
# is a list of row indices, each as a 1-element list:
indices = [[0], # refers to the 0th row
           [1]] # refers to the 1st row

# a 2D tensor of shape (2, 2)
params = [['a', 'b'],
          ['c', 'd']]

tf.gather_nd(params, indices)

<tf.Tensor: shape=(2, 2), dtype=string, numpy=
array([[b'a', b'b'],
       [b'c', b'd']], dtype=object)>

##Ragged Tensors
- Tensors with non-uniform shapes along one or more dimensions
- tf.ragged

In [None]:
tensor_two_d = tf.constant([
    [1,2,6],
    [3,5,-1],
    [1,5,6],
    [2,3,8]
])
print(tensor_two_d.shape)

(4, 3)


In [None]:
tensor_two_d = [[1,2,6],
                [3,],
                [1,5,6],
                [2,3]]

tensor_ragged = tf.ragged.constant(tensor_two_d)
print(tensor_ragged)
print(tensor_ragged.shape)

<tf.RaggedTensor [[1, 2, 6], [3], [1, 5, 6], [2, 3]]>
(4, None)


###tf.ragged.boolean_mask
- Applies a boolean mask to data without flattening the mask dimensions
- Allows you to select elements based on a condition
- Same as tf.boolean_mask for dense tensors, but specifically designed to handle ragged (non-uniform) data


In [None]:
data = tf.ragged.constant([[1, 2, 3], [4, 5], [6]])
mask = tf.ragged.constant([[True, False, True], [False, True], [True]])

result = tf.ragged.boolean_mask(data, mask)
print(result) # Note, only values where mask is true are retained

<tf.RaggedTensor [[1, 3], [5], [6]]>


###Creating a ragged tensor

In [None]:
print(tf.RaggedTensor.from_row_lengths(
    values=[3, 1, 4, 1, 5, 9, 2, 6],
    row_lengths=[4, 0, 3, 1, 0] # Takes the first 4, then 0, then 3 etc
))

<tf.RaggedTensor [[3, 1, 4, 1], [], [5, 9, 2], [6], []]>


##sparse tensors
- This is where most elements are zeros or missing values
- Tensor flows then stores only the non-zeros and there position making memory efficient and faster

In [None]:
tensor_sparse = tf.sparse.SparseTensor(
    indices = [[1,1],[3,4]], values = [11,56], dense_shape = [5,6]
)

print(tensor_sparse)
# indices: locations of the non-zero elements
# values: actual non-zero values at those indices
# dense_shape: the full shape if all values were included (with zeros where not explicitly set)

SparseTensor(indices=tf.Tensor(
[[1 1]
 [3 4]], shape=(2, 2), dtype=int64), values=tf.Tensor([11 56], shape=(2,), dtype=int32), dense_shape=tf.Tensor([5 6], shape=(2,), dtype=int64))


In [None]:
tf.sparse.to_dense(tensor_sparse)

<tf.Tensor: shape=(5, 6), dtype=int32, numpy=
array([[ 0,  0,  0,  0,  0,  0],
       [ 0, 11,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0, 56,  0],
       [ 0,  0,  0,  0,  0,  0]], dtype=int32)>

##String Tensors
- They hold string values instaed of numeric data
- Useful in natural language processing(NLP), text classification, tokenization, etc

In [None]:
tensor_string = tf.constant(["hello", "I am", "the best",  "AI Engineer", "to ever exist"])
print(tensor_string)
# The b prefix in Python stands for byte strings.
# It means that the strings are stored as raw bytes rather than Unicode objects.

tf.Tensor([b'hello' b'I am' b'the best' b'AI Engineer' b'to ever exist'], shape=(5,), dtype=string)


###Join method
- Used to concatenate strings along a specified axis or with a separator.
- Similar to Python’s "sep".join(list) but works on TensorFlow string tensors, including batches and RaggedTensors.
- tf.strings.join



In [None]:
tensor_string

<tf.Tensor: shape=(5,), dtype=string, numpy=
array([b'hello', b'I am', b'the best', b'AI Engineer', b'to ever exist'],
      dtype=object)>

In [None]:
tf.strings.join(tensor_string, separator=" + ")

<tf.Tensor: shape=(), dtype=string, numpy=b'hello + I am + the best + AI Engineer + to ever exist'>

#VARIABLES
- tf.Variable is a special kind of tensor whose value can be changed during execution
- unlike tf.constant or tf.Tensor, which are immutable

Essential for:
- Storing weights in neural networks
- Keeping track of training parameters
- Building trainable models

In [None]:
x_var = tf.Variable(x, name = 'var1')
print(x_var)

<tf.Variable 'var1:0' shape=(1, 1, 4) dtype=int32, numpy=array([[[2, 3, 4, 5]]], dtype=int32)>


###Assign subtraction
- assign_sub()
- Used to subtract a value from a tf.Variable and update the variable in place.

In [None]:
# You need to subtract a tensor with the same shape [1,1,4].
x_var.assign_sub(tf.constant([[[3,6,0,0]]], dtype=tf.int32))

<tf.Variable 'UnreadVariable' shape=(1, 1, 4) dtype=int32, numpy=array([[[-4, -9,  4,  5]]], dtype=int32)>

In [None]:
with tf.device('CPU:0'): # Place any new tensors or variables created inside this block on the first CPU (CPU:0) if one is available
  x_var = tf.Variable(0.2)
  x_tensor = tf.constant(0.2)

print(x_var.device)
print(x_tensor.device)

/job:localhost/replica:0/task:0/device:CPU:0
/job:localhost/replica:0/task:0/device:CPU:0


In [None]:
with tf.device('CPU:0'):
  x_1 = tf.constant([1,3,4])
  x_2 = tf.constant([1])

  with tf.device('CPU:0'):
    x_3 = x_1 + x_2

  print(x_1, x_1.device)
  print(x_2, x_2.device)
  print(x_3, x_3.device)

tf.Tensor([1 3 4], shape=(3,), dtype=int32) /job:localhost/replica:0/task:0/device:CPU:0
tf.Tensor([1], shape=(1,), dtype=int32) /job:localhost/replica:0/task:0/device:CPU:0
tf.Tensor([2 4 5], shape=(3,), dtype=int32) /job:localhost/replica:0/task:0/device:CPU:0
