## Linear Algebra Operations

### Matmul Method

* Multiplies matrix a by matrix b, producing a * b.
* [Document](https://www.tensorflow.org/api_docs/python/tf/linalg/matmul?hl=en)

#### The Number of Columns in First Tensor Should Match the Number of Rows in Second Tensor

In [None]:
x_1 = tf.constant([[1, 2, 0], [3, 5, -1]])
x_2 = tf.constant([[1, 2, 0], [3, 5, -1]])

print(x_1.shape, x_2.shape)

tf.linalg.matmul(
    x_1,
    x_2,
    transpose_a=False,
    transpose_b=False,
    adjoint_a=False,
    adjoint_b=False,
    a_is_sparse=False,
    b_is_sparse=False,
    output_type=None,
    name=None
)

(2, 3) (2, 3)


InvalidArgumentError: {{function_node __wrapped__MatMul_device_/job:localhost/replica:0/task:0/device:CPU:0}} Matrix size-incompatible: In[0]: [2,3], In[1]: [2,3] [Op:MatMul]

In [None]:
x_1 = tf.constant([[1, 2, 0], [3, 5, -1]])
x_2 = tf.constant([[1, 2, 0], [3, 5, -1], [4, 5, 6]])

print(x_1.shape, x_2.shape)

tf.linalg.matmul(
    x_1,
    x_2,
    transpose_a=False,
    transpose_b=False,
    adjoint_a=False,
    adjoint_b=False,
    a_is_sparse=False,
    b_is_sparse=False,
    output_type=None,
    name=None
)

(2, 3) (3, 3)


<tf.Tensor: shape=(2, 3), dtype=int32, numpy=
array([[  7,  12,  -2],
       [ 14,  26, -11]])>

In [None]:
print(x_1 @ x_2)

tf.Tensor(
[[  7  12  -2]
 [ 14  26 -11]], shape=(2, 3), dtype=int32)


### Transpose Method

* Transposes a, where a is a Tensor.
* [Document](https://www.tensorflow.org/api_docs/python/tf/transpose?hl=en)

In [None]:
print(x_1)
print(tf.transpose(x_1))

tf.Tensor(
[[ 1  2  0]
 [ 3  5 -1]], shape=(2, 3), dtype=int32)
tf.Tensor(
[[ 1  3]
 [ 2  5]
 [ 0 -1]], shape=(3, 2), dtype=int32)


In [None]:
print(x_2)
print(tf.transpose(x_2))

tf.Tensor(
[[ 1  2  0]
 [ 3  5 -1]
 [ 4  5  6]], shape=(3, 3), dtype=int32)
tf.Tensor(
[[ 1  3  4]
 [ 2  5  5]
 [ 0 -1  6]], shape=(3, 3), dtype=int32)


### Bind Part Method

* Copy a tensor setting everything outside a central band in each innermost matrix to zero
* [Document](https://www.tensorflow.org/api_docs/python/tf/linalg/band_part?hl=en)

band[i, j, k, ..., m, n] = in_band(m, n) * input[i, j, k, ..., m, n].

in_band(m, n) = (num_lower < 0 || (m-n) <= num_lower) && (num_upper < 0 || (n-m) <= num_upper).

In [None]:
tensor_two_d = tf.constant(
    [[1, -2, 0], 
    [3, 5, 100], 
    [1, 5, 6], 
    [2, 3, 8]], dtype= tf.float16)

#### (lower < 0 or m-n <= lower) and (upper < 0 or n-m <= upper)
#### m -- rows, n -- columns

print(tf.linalg.band_part(tensor_two_d, 0, 0))

tf.Tensor(
[[1. 0. 0.]
 [0. 5. 0.]
 [0. 0. 6.]
 [0. 0. 0.]], shape=(4, 3), dtype=float16)


In [None]:
tensor_two_d_m_n = tf.constant([[0, -1, -2],
                                [1, 0, -1],
                                [2, 1, 0],
                                [3, 2, 1]], dtype= tf.float16)

tensor_two_d_n_m = tf.constant([[0, 1, 2],
                                [-1, 0, 1],
                                [-2, -1, 0],
                                [-3, -2, -1]], dtype= tf.float16)

#### Useful Special Cases
* tf.linalg.band_part(input, 0, -1) ==> Upper triangular part.
* tf.linalg.band_part(input, -1, 0) ==> Lower triangular part.
* tf.linalg.band_part(input, 0, 0) ==> Diagonal.

### Inv Method

* Computes the inverse of one or more square invertible matrices or their adjoints (conjugate transposes).
* [Document](https://www.tensorflow.org/api_docs/python/tf/linalg/inv?hl=en)

#### The Matrix Must Be A Square Matrix

In [None]:
tensor_two_d = tf.constant(
    [[1, -2, 0], 
    [3, 5, 100], 
    [1, 5, 6]], dtype= tf.float32)

tensor_two_d_inv = tf.linalg.inv(tensor_two_d)
print(tensor_two_d_inv)

tf.Tensor(
[[ 0.7413249  -0.01892742  0.3154574 ]
 [-0.12933755 -0.00946371  0.1577287 ]
 [-0.01577287  0.01104101 -0.01735016]], shape=(3, 3), dtype=float32)


In [None]:
tensor_two_d @ tensor_two_d_inv

<tf.Tensor: shape=(3, 3), dtype=float32, numpy=
array([[ 1.0000000e+00,  7.4505806e-09,  0.0000000e+00],
       [ 1.6391277e-07,  1.0000000e+00,  4.4703484e-08],
       [-5.2154064e-08,  7.4505806e-08,  1.0000000e+00]], dtype=float32)>

### Svd Method

* Computes the singular value decompositions of one or more matrices.
* [Document](https://www.tensorflow.org/api_docs/python/tf/linalg/svd?hl=en)

In [None]:
tensor_two_d = tf.constant(
    [[1, -2, 0], 
    [3, 5, 100], 
    [1, 5, 6]], dtype= tf.float32)

s, u, v = tf.linalg.svd(tensor_two_d)

# s is a tensor of singular values.
# u is a tensor of left singular vectors.
# v is a tensor of right singular vectors.

print(s)
print(u)
print(v)

tf.Tensor([100.3663      5.1059036   1.2371687], shape=(3,), dtype=float32)
tf.Tensor(
[[-7.4980810e-04 -3.7569830e-01  9.2674178e-01]
 [ 9.9803799e-01 -5.8300879e-02 -2.2827482e-02]
 [ 6.2606096e-02  9.2490643e-01  3.7500489e-01]], shape=(3, 3), dtype=float32)
tf.Tensor(
[[ 0.03044816  0.07330841  0.99684453]
 [ 0.0528536   0.9957936  -0.07484547]
 [ 0.998138   -0.05496572 -0.02644547]], shape=(3, 3), dtype=float32)


### Einsum Method

* Tensor contraction over specified indices and outer product.
* [Document](https://www.tensorflow.org/api_docs/python/tf/einsum?hl=en)

In [None]:
import numpy as np

In [None]:
A = np.array([[2, 6, 5, 2],
             [2, -2, 2, 3],
             [3, 5, 4, 0]])

B = np.array([[2, 9, 0, 3, 0],
             [3, 6, 8, -2, 2],
             [1, 3, 5, 0, 1],
             [3, 0, 2, 0, 5]])

print(A.shape)
print(B.shape)

print("Matmul C =: \n")
print(np.matmul(A, B), "\n")

print("Einsum C =: \n")
print(np.einsum("ij, jk -> ik", A, B), "\n")

(3, 4)
(4, 5)
Matmul C =: 

[[33 69 77 -6 27]
 [ 9 12  0 10 13]
 [25 69 60 -1 14]] 

Einsum C =: 

[[33 69 77 -6 27]
 [ 9 12  0 10 13]
 [25 69 60 -1 14]] 



In [None]:
A = np.array([[2, 6, 5, 2],
             [2, -2, 2, 3],
             [1, 5, 4, 0]])

B = np.array([[2, 9, 0, 3],
             [3, 6, 8, -2],
             [1, 3, 5, 0]])

print(A.shape)
print(B.shape)

print("Hadamard C =: \n")
print(A * B, "\n")

print("Einsum C =: \n")
print(np.einsum("ij, ij -> ij", A, B), "\n")

(3, 4)
(3, 4)
Hadamard C =: 

[[  4  54   0   6]
 [  6 -12  16  -6]
 [  1  15  20   0]] 

Einsum C =: 

[[  4  54   0   6]
 [  6 -12  16  -6]
 [  1  15  20   0]] 



In [None]:
A = np.array([[2, 6, 5, 2],
             [2, -2, 2, 3],
             [1, 5, 4, 0]])

print("Transposed A =: \n")
print(A.T, "\n")

print("Einsum Transposed A =: \n")
print(np.einsum("ij -> ji", A))

Transposed A =: 

[[ 2  2  1]
 [ 6 -2  5]
 [ 5  2  4]
 [ 2  3  0]] 

Einsum Transposed A =: 

[[ 2  2  1]
 [ 6 -2  5]
 [ 5  2  4]
 [ 2  3  0]]


In [None]:
A = np.array([[[2, 6, 5, 2],
             [2, -2, 2, 3],
             [1, 5, 4, 0]],
             [[1, 3, 1, 22],
              [0, 2, 2, 0],
              [1, 5, 4, 1]]])

B = np.array([[[2, 9, 0, 3, 0],
             [3, 6, 8, -2, 2],
             [1, 3, 5, 0, 1],
             [3, 0, 2, 0, 5]],
             [[1, 0, 0, 3, 0],
              [3, 0, 4, -2, 2],
              [1, 0, 2, 0, 0],
              [3, 0, 1, 1, 0]]])

print(A.shape, B.shape)

print("Batch Multiplication C =: \n")
print(np.matmul(A, B), "\n")

print("Einsum C =: \n")
print(np.einsum("bij, bjk -> bik", A, B))

(2, 3, 4) (2, 4, 5)
Batch Multiplication C =: 

[[[33 69 77 -6 27]
  [ 9 12  0 10 13]
  [21 51 60 -7 14]]

 [[77  0 36 19  6]
  [ 8  0 12 -4  4]
  [23  0 29 -6 10]]] 

Einsum C =: 

[[[33 69 77 -6 27]
  [ 9 12  0 10 13]
  [21 51 60 -7 14]]

 [[77  0 36 19  6]
  [ 8  0 12 -4  4]
  [23  0 29 -6 10]]]


In [None]:
A = np.array([[[2, 6, 5, 2],
             [2, -2, 2, 3],
             [1, 5, 4, 0]],
             [[1, 3, 1, 22],
              [0, 2, 2, 0],
              [1, 5, 4, 1]]])

print("Sum A =: \n")
print(np.sum(A), "\n")

print("Einsum A =: \n")
print(np.einsum("bij ->", A))

Sum A =: 

72 

Einsum A =: 

72


In [None]:
A = np.array([[2, 6, 5, 2],
             [2, -2, 2, 3],
             [1, 5, 4, 0]])

print("Axis 0 Sum A =: \n")
print(np.sum(A, axis=0), "\n")

print("Einsum A =: \n")
print(np.einsum("ij -> j", A))
print("-----------------------------")

print("Axis 1 Sum A =: \n")
print(np.sum(A, axis=1), "\n")

print("Einsum A =: \n")
print(np.einsum("ij -> i", A))

Axis 0 Sum A =: 

[ 5  9 11  5] 

Einsum A =: 

[ 5  9 11  5]
-----------------------------
Axis 1 Sum A =: 

[15  5 10] 

Einsum A =: 

[15  5 10]


#### Papper Example

In [None]:
Q = batchsize, s_q, modelsize
K = batchsize, s_k, modelsize

In [None]:
Q = np.random.randn(32, 64, 512) # bqm
K = np.random.randn(32, 128, 512) # bkm

In [None]:
np.einsum("bqm, bkm -> bqk", Q, K).shape

(32, 64, 128)

In [None]:
A = np.random.randn(2, 4, 4, 2) # bcij
B = np.random.randn(2, 4, 4, 1) # bcik

In [None]:
np.einsum("bcik, bcij -> bckj", B, A).shape

(2, 4, 1, 2)

In [None]:
np.matmul(np.transpose(B, (0, 1, 3, 2)), A).shape

(2, 4, 1, 2)