In [1]:
import tensorflow as tf
import numpy as np

### Matrix Multiplication

In [2]:
a = tf.constant([[1, 2, 3], [3, 4, 5]])
b = tf.constant([[5, 6, 7], [7, 8, 9], [9, 10, 11]])

print(a.shape, b.shape)

tf.linalg.matmul(
    a, b, transpose_a=False, transpose_b=False, adjoint_a=False, adjoint_b=False, a_is_sparse=False, b_is_sparse=False, output_type=None, name=None
)

(2, 3) (3, 3)


<tf.Tensor: shape=(2, 3), dtype=int32, numpy=
array([[ 46,  52,  58],
       [ 88, 100, 112]])>

In [3]:
# Perform element wise multiplication using the '*' operator
# Perform matrix multiplication using the '@' operator

print(a @ b)

tf.Tensor(
[[ 46  52  58]
 [ 88 100 112]], shape=(2, 3), dtype=int32)


### Matrix Transpose

In [4]:
# transpose of the matrix

print(tf.transpose(a))
print(tf.transpose(b))

tf.Tensor(
[[1 3]
 [2 4]
 [3 5]], shape=(3, 2), dtype=int32)
tf.Tensor(
[[ 5  7  9]
 [ 6  8 10]
 [ 7  9 11]], shape=(3, 3), dtype=int32)


In [5]:
a = tf.constant([[1,2,3]])

b = tf.constant([[1, 2, 3, 4],
                 [5, 6, 7, 8],
                 [9, 1, 2, 3]])

c = tf.constant([[1, 2, 3, 4],
                 [5, 6, 7, 8]])

# Perform matrix multiplication of 'c' with the transpose of 'b'
print(a.shape, b.shape, c.shape, tf.transpose(b).shape)

print(c @ tf.transpose(b))
tf.linalg.matmul(
    c, b, transpose_a=False, transpose_b=True, adjoint_a=False, adjoint_b=False, a_is_sparse=False, b_is_sparse=False, output_type=None, name=None
)

(1, 3) (3, 4) (2, 4) (4, 3)
tf.Tensor(
[[ 30  70  29]
 [ 70 174  89]], shape=(2, 3), dtype=int32)


<tf.Tensor: shape=(2, 3), dtype=int32, numpy=
array([[ 30,  70,  29],
       [ 70, 174,  89]])>

In [6]:
a = tf.constant([[1,2,3]])

b = tf.constant([[1, 2],
                 [5, 6],
                 [9, 1]])

c = tf.constant([[1, 2, 3, 4],
                 [5, 6, 7, 8]])

print(a.shape, b.shape, c.shape, tf.transpose(b).shape)

# Perform matrix multiplication of 'c' with the transpose of 'b'
print(tf.transpose(c) @ tf.transpose(b))

tf.linalg.matmul(
    c, b, transpose_a=True, transpose_b=True, adjoint_a=False, adjoint_b=False, a_is_sparse=False, b_is_sparse=False, output_type=None, name=None
)

(1, 3) (3, 2) (2, 4) (2, 3)
tf.Tensor(
[[11 35 14]
 [14 46 24]
 [17 57 34]
 [20 68 44]], shape=(4, 3), dtype=int32)


<tf.Tensor: shape=(4, 3), dtype=int32, numpy=
array([[11, 35, 14],
       [14, 46, 24],
       [17, 57, 34],
       [20, 68, 44]])>

### Adjoining Matrix

In [7]:
a = tf.constant([[1,2],
                  [4,5]])

b = tf.constant([[1,2,3],
                  [4,5,6]])

print(tf.linalg.matmul(a, b))

tf.Tensor(
[[ 9 12 15]
 [24 33 42]], shape=(2, 3), dtype=int32)


In [8]:
A = tf.constant([[4, 7],
                 [2, 6]])

# Compute the adjoint (which is the conjugate transpose for complex numbers)
adjoint_A = tf.linalg.adjoint(A)

print(adjoint_A)


tf.Tensor(
[[4 2]
 [7 6]], shape=(2, 2), dtype=int32)


In [9]:
# Create a 3x3 adjacency matrix
a = tf.constant([[0, 1, 1],
                [1, 0, 1],
                [1, 1, 0]])

# Print the adjacency matrix
print(a)

adjoint_A = tf.linalg.adjoint(a)

print(adjoint_A)

# Example of using the adjacency matrix in a simple TensorFlow operation
# For instance, performing matrix multiplication
result = tf.matmul(a, a)
print("Matrix Multiplication Result:")
print(result.numpy())


tf.Tensor(
[[0 1 1]
 [1 0 1]
 [1 1 0]], shape=(3, 3), dtype=int32)
tf.Tensor(
[[0 1 1]
 [1 0 1]
 [1 1 0]], shape=(3, 3), dtype=int32)
Matrix Multiplication Result:
[[2 1 1]
 [1 2 1]
 [1 1 2]]


### extracts the lower or upper triangular part of a matrix

In [10]:
x = tf.constant([
    [1, 2, 3, 4],
    [5, 6, 7, 8],
    [9, 10, 11, 12],
    [13, 14, 15, 16]])

# Extract the lower triangular part including the main diagonal
lower_triangle = tf.linalg.band_part(x, num_lower=3, num_upper=0)

# Extract the upper triangular part including the main diagonal
upper_triangle = tf.linalg.band_part(x, num_lower=0, num_upper=3)

print("Original Matrix:")
print(x.numpy())

print("Lower Triangular Part (including main diagonal):")
print(lower_triangle.numpy())

print("Upper Triangular Part (including main diagonal):")
print(upper_triangle.numpy())


Original Matrix:
[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]
 [13 14 15 16]]
Lower Triangular Part (including main diagonal):
[[ 1  0  0  0]
 [ 5  6  0  0]
 [ 9 10 11  0]
 [13 14 15 16]]
Upper Triangular Part (including main diagonal):
[[ 1  2  3  4]
 [ 0  6  7  8]
 [ 0  0 11 12]
 [ 0  0  0 16]]


In [11]:
x = tf.constant([
    [1, 2, 3, 4],
    [5, 6, 7, 8],
    [9, 10, 11, 12],
    [13, 14, 15, 16]], dtype=tf.float32)

# Extract the main diagonal
tf.linalg.band_part(x, 0, 0)

<tf.Tensor: shape=(4, 4), dtype=float32, numpy=
array([[ 1.,  0.,  0.,  0.],
       [ 0.,  6.,  0.,  0.],
       [ 0.,  0., 11.,  0.],
       [ 0.,  0.,  0., 16.]], dtype=float32)>

### Inverse Matrix

In [12]:
x = tf.constant([
    [1.0, 2.0],
    [3.0, 4.0]
], dtype=tf.float32)

# Compute the inverse of the matrix
inverse_matrix = tf.linalg.inv(x)

print("Original Matrix:")
print(x.numpy())

print("\nInverse Matrix:")
print(inverse_matrix.numpy())


Original Matrix:
[[1. 2.]
 [3. 4.]]

Inverse Matrix:
[[-2.0000002   1.0000001 ]
 [ 1.5000001  -0.50000006]]


### Singular Value Decomposition

In [13]:

a = tf.constant([
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 9]
], dtype=tf.float32)

# Compute the SVD
s, u, v = tf.linalg.svd(a)

print(s)
print(u)
print(v)

tf.Tensor([1.6848103e+01 1.0683696e+00 2.8763120e-07], shape=(3,), dtype=float32)
tf.Tensor(
[[ 0.21483716  0.8872305  -0.40824857]
 [ 0.5205872   0.24964423  0.8164965 ]
 [ 0.8263376  -0.3879429  -0.4082481 ]], shape=(3, 3), dtype=float32)
tf.Tensor(
[[ 0.47967106 -0.77669096  0.40824836]
 [ 0.5723676  -0.07568647 -0.81649655]
 [ 0.6650643   0.62531805  0.40824822]], shape=(3, 3), dtype=float32)


### Einstein summation notation

In [14]:
A = tf.constant([
    [1, 2, 3],
    [3, 4, 5]])

B = tf.constant([
    [5, 6, 7, 8],
    [7, 8, 9, 10],
    [1, 2, 3, 4]])

# Matrix multiplication using einsum
C = tf.einsum('ij,jk->ik', A, B)

print("Matrix A:")
print(A.numpy())

print("\nMatrix B:")
print(B.numpy())

print("\nMatrix C (result of A * B):")
print(C.numpy())

Matrix A:
[[1 2 3]
 [3 4 5]]

Matrix B:
[[ 5  6  7  8]
 [ 7  8  9 10]
 [ 1  2  3  4]]

Matrix C (result of A * B):
[[22 28 34 40]
 [48 60 72 84]]


In [15]:
tensor = tf.constant([
    [1, 2, 3],
    [4, 5, 6]])

# Sum over axis 0
sum_axis_0 = tf.einsum('ij->j', tensor)

# Sum over axis 1
sum_axis_1 = tf.einsum('ij->i', tensor)

print("Tensor:")
print(tensor.numpy())

print("\nSum over axis 0:")
print(sum_axis_0.numpy())

print("\nSum over axis 1:")
print(sum_axis_1.numpy())

Tensor:
[[1 2 3]
 [4 5 6]]

Sum over axis 0:
[5 7 9]

Sum over axis 1:
[ 6 15]


In [16]:
a = np.array([
    [[1,2,3,4],
     [5,6,7,8],
     [1,2,3,4]],

    [[1,2,3,4],
     [5,6,7,8],
     [1,2,3,4]]])

b = np.array([
    [[1,2,3,4,5],
     [5,6,7,8,9],
     [1,2,3,4,5],
     [6,7,8,9,1]],

     [[1,2,3,4,5],
     [5,6,7,8,9],
     [1,2,3,4,5],
     [6,7,8,9,1]]])

print(a.shape, b.shape)

# Perform batch matrix multiplication
print("Batch Multiplication C =: \n")

# Here, `a` has shape (2, 3, 4) and `b` has shape (2, 4, 5)
# We perform batch matrix multiplication on these two arrays
print(np.matmul(a, b), "\n")


# Perform the same operation using einsum
print("Einsum C =: \n")

# Corrected einsum notation for batch matrix multiplication
print(np.einsum('bij,bjk->bik', a, b))

(2, 3, 4) (2, 4, 5)
Batch Multiplication C =: 

[[[ 38  48  58  68  42]
  [ 90 116 142 168 122]
  [ 38  48  58  68  42]]

 [[ 38  48  58  68  42]
  [ 90 116 142 168 122]
  [ 38  48  58  68  42]]] 

Einsum C =: 

[[[ 38  48  58  68  42]
  [ 90 116 142 168 122]
  [ 38  48  58  68  42]]

 [[ 38  48  58  68  42]
  [ 90 116 142 168 122]
  [ 38  48  58  68  42]]]


In [17]:
a = np.array([
    [[1,2,3,4],
     [5,6,7,8],
     [1,2,3,4]],

    [[1,2,3,4],
     [5,6,7,8],
     [1,2,3,4]]])

print("Sum A =: \n")
print(np.sum(a), "\n")

print("Einsum A =: \n")

print(np.einsum('bij->', a))

Sum A =: 

92 

Einsum A =: 

92


In [18]:
a = np.array([
    [1,2,3,4],
    [5,6,7,8],
    [1,2,3,4]])

# Use einsum to sum over the rows
print("Einsum A =: \n")
print(np.einsum('ij -> j', a))

print("Einsum A =: \n")
print(np.einsum('ij->i', a))

Einsum A =: 

[ 7 10 13 16]
Einsum A =: 

[10 26 10]


In [19]:

# Define Q and K with random values
Q = np.random.randn(32, 64, 512)   # Q = batchsize, s_q, modelsize
K = np.random.randn(32, 128, 512)   # K = batchsize, s_k, modelsize

# Perform the dot product along the model size dimension
# `Q` has shape (32, 64, 512), `K` has shape (32, 128, 512)
# We want to get a result with shape (32, 64, 128)
# This means summing over the last dimension of Q and K
np.einsum("bqm,bkm -> bqk", Q,K).shape

(32, 64, 128)

In [20]:
A = np.random.randn(2,4,4,2)  #bcij
B = np.random.randn(2,4,4,1)  #bcik

np.einsum("bcik,bcij -> bckj", B, A).shape
np.einsum("bcik,bcij -> bckj", B, A)

array([[[[ 0.61534821,  0.62813828]],

        [[ 0.34404419, -0.40600019]],

        [[-3.6637371 , -0.69914974]],

        [[-5.62717854,  0.52922271]]],


       [[[ 1.28550177, -4.16697973]],

        [[-0.36503349, -1.50640348]],

        [[-1.25987635,  3.12841559]],

        [[-1.79871216,  0.37094244]]]])

In [21]:
np.matmul(np.transpose(B, (0,1,3,2)), A).shape
np.matmul(np.transpose(B, (0,1,3,2)), A)

array([[[[ 0.61534821,  0.62813828]],

        [[ 0.34404419, -0.40600019]],

        [[-3.6637371 , -0.69914974]],

        [[-5.62717854,  0.52922271]]],


       [[[ 1.28550177, -4.16697973]],

        [[-0.36503349, -1.50640348]],

        [[-1.25987635,  3.12841559]],

        [[-1.79871216,  0.37094244]]]])

In [22]:
A = np.random.randn(2,4,6,4,2)  #bcij
B = np.random.randn(2,4,6,4,1)  #bcik

np.einsum("bcdik,bcdij -> bcdkj", B, A).shape


(2, 4, 6, 1, 2)

In [23]:
np.einsum("bcdik,bcdij -> bcdkj", B, A)

array([[[[[-0.6160982 ,  1.08888309]],

         [[ 1.160282  ,  0.28960134]],

         [[ 0.47699666,  1.18982735]],

         [[-2.88001308,  0.43434562]],

         [[-1.38101628,  3.13622184]],

         [[-1.66754104, -0.61744906]]],


        [[[ 2.80192204, -1.09732722]],

         [[-0.01767265, -0.82277838]],

         [[-0.15735092, -2.17926713]],

         [[ 2.29969997, -3.03637005]],

         [[ 4.09074366,  1.66537965]],

         [[-0.74404539, -0.37202687]]],


        [[[ 0.78415972,  1.54503949]],

         [[ 1.93826178,  1.33135563]],

         [[-0.6624809 , -1.96889554]],

         [[-0.39731574, -3.01252033]],

         [[-3.7684737 , -3.63590288]],

         [[ 3.25570569, -2.40164579]]],


        [[[ 1.05106578, -0.25927642]],

         [[-1.08456052, -0.37261672]],

         [[ 0.60656744, -1.21147885]],

         [[ 0.36448934, -0.62183485]],

         [[-2.42376117, -0.28552063]],

         [[-3.8603699 ,  0.16298472]]]],



       [[[[-4.31327338,  2.924