In [2]:
#@title Imports { display-mode: "form" }
from __future__ import print_function
from __future__ import division
from __future__ import absolute_import

import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

try:
  tf.enable_eager_execution()
  print('Eager execution enabled')
except ValueError:
  print('Already running in Eager mode')

tfe = tf.contrib.eager
  

Eager execution enabled


## Matrix Multiplication

In [3]:
# Define matrix A
A = np.array(
  [[1.0, 3.0],
   [2.0, 1.0],
   [4.0, 2.0]]
)

# Define matrix B
B = np.array(
  [[6.0, 2.0, 1.0],
   [3.0, 4.0, 5.0]]
)

# Define vector x
x = np.array([3.0, 2.0])

print('A.shape is:', A.shape, 'B.shape is:', B.shape, 'x.shape is:', x.shape)
A

A.shape is: (3, 2) B.shape is: (2, 3) x.shape is: (2,)


array([[1., 3.],
       [2., 1.],
       [4., 2.]])

### Matrix-vector multiplication

In [4]:
# Using numpy dot
y = A.dot(x)

print('Using dot:\t y =', y, '\t y.shape =', y.shape)

# Using einsum
y = np.einsum('ij, j', A, x)

print('Using einsum:\t y =', y, '\t y.shape =', y.shape)

# Manual version 1
y = np.array([
    A[0,0] * x[0] + A[0,1] * x[1],
    A[1,0] * x[0] + A[1,1] * x[1],
    A[2,0] * x[0] + A[2,1] * x[1],
    ])
print('Manual 1:\t y =', y, '\t y.shape =', y.shape)

# Manual version 2: 
# Matrix-vector multiplication can be thought of as a linear combination of the columns of of the matrix
y = x[0] * A[:,0]  +  x[1] * A[:, 1]

print('Manual 2:\t y =', y, '\t y.shape =', y.shape)

Using dot:	 y = [ 9.  8. 16.] 	 y.shape = (3,)
Using einsum:	 y = [ 9.  8. 16.] 	 y.shape = (3,)
Manual 1:	 y = [ 9.  8. 16.] 	 y.shape = (3,)
Manual 2:	 y = [ 9.  8. 16.] 	 y.shape = (3,)


### Matrix-matrix multiplication

In [5]:
# Using numpy dot
C = A.dot(B)

print('Using DOT: C= \n\n', C, '\n\nC.shape =', C.shape)

# Using einsum
C = np.einsum('ik, kj', A, B)
print('\n\nUsing einsum: C= \n\n', C, '\n\nC.shape =', C.shape)

# Note, the above einsum notation is equivalent to the following
C = np.einsum('ik, kj -> ij', A, B)

# And in Tensorflow
C = tf.matmul(A, B)
print('\n\nUsing Tensorflow: C= \n\n', C, '\n\nC.shape =', C.shape)

Using DOT: C= 

 [[15. 14. 16.]
 [15.  8.  7.]
 [30. 16. 14.]] 

C.shape = (3, 3)


Using einsum: C= 

 [[15. 14. 16.]
 [15.  8.  7.]
 [30. 16. 14.]] 

C.shape = (3, 3)


Using Tensorflow: C= 

 tf.Tensor(
[[15. 14. 16.]
 [15.  8.  7.]
 [30. 16. 14.]], shape=(3, 3), dtype=float64) 

C.shape = (3, 3)


Matrix multiplication is not commutative

In [6]:
# Matrix multiplication is not commutative:
C = B.dot(A)
print('C: \n', C)
print()
print('C.shape:', C.shape)

C: 
 [[14. 22.]
 [31. 23.]]

C.shape: (2, 2)


## Computing gradients with TensorFlow
$y = Ax$

In the code below, we use Tensorflow to calculate the following derivatives:

$\frac{dy}{dx}$ 

and 

$\frac{\partial y}{\partial A}$ 

In [7]:
A_tensor = tfe.Variable(A)
x_tensor = tfe.Variable(x)

with tf.GradientTape() as tape:
  y = tf.einsum('ij,j', A_tensor, x_tensor)

dydx, dydA = tape.gradient(y, [x_tensor, A_tensor])

print('dy/dx =', dydx)
print()
print('dy/dA =', dydA)

dy/dx = tf.Tensor([7. 6.], shape=(2,), dtype=float64)

dy/dA = tf.Tensor(
[[3. 2.]
 [3. 2.]
 [3. 2.]], shape=(3, 2), dtype=float64)


# Neural Network Gradient Example
In the following example, we compute the output of a 1 layer neural network and the gradients with respect to its parameters. We define an example input vector and parameters, but keep the computation generic. You can change the values and shapes of x, A and b below and run the rest of the code to compute the output and gradients for your own example.

In [8]:
x = np.array([[-1.], [0.1], [2.1]])  # X has shape (3, 1)
A = np.array([              # A has shape (2, 3)
    [ 1.1, -2.5,  0.3],
    [-2.1,  0.2, -1.1]
])  
b = np.array([[-1.0], [2.0]])      # b has shape (2)

Compute the neural network output
$\mathbf{f} = \operatorname{tanh}(A\mathbf{x} + \mathbf{b})$

In [9]:
M, N = A.shape
z = A.dot(x) + b
f = np.tanh(z)

print('f =', f)

f = [[-0.93786303]
 [ 0.94783185]]


Compute the partial derivatives:

\begin{align}
\frac{d\mathbf{f}}{d\mathbf{z}} ; \frac{\partial\mathbf{z}}{\partial\mathbf{x}} ; \frac{\partial\mathbf{z}}{\partial\mathbf{b}} ; \frac{\partial\mathbf{z}}{\partial\mathbf{A}}
\end{align}

In [10]:
# partial derivatives
dfdz = 1-f**2       # (derivative of tanh is 1-tanh^2)
print('df/dz =', dfdz, '\nshape:', dfdz.shape)
print()

dzdx = A
print('dz/dx =\n', dzdx, '\n\nshape:', dzdx.shape)
print()

dzdb = np.eye(M)
print('dz/db =\n', dzdb, '\n\nshape:', dzdb.shape)
print()

dzdA = np.zeros((M, M, N))  # Start with a tensor of zeros of the correct shape
for i in range(M):          # Then set the diagonal elements of dzdA
  dzdA[i,i,:] = x.T  

print('dz/dA =\n', dzdA, '\n\nshape:', dzdA.shape)



df/dz = [[0.12041293]
 [0.10161478]] 
shape: (2, 1)

dz/dx =
 [[ 1.1 -2.5  0.3]
 [-2.1  0.2 -1.1]] 

shape: (2, 3)

dz/db =
 [[1. 0.]
 [0. 1.]] 

shape: (2, 2)

dz/dA =
 [[[-1.   0.1  2.1]
  [ 0.   0.   0. ]]

 [[ 0.   0.   0. ]
  [-1.   0.1  2.1]]] 

shape: (2, 2, 3)


Finally, we compute the gradients of the neural network output $f$ with respect to the parameters $A$ and $\mathbf{b}$ and the input $\mathbf{x}$ using the chain rule:

\begin{align}
\frac{\partial \mathbf{f}}{\partial \mathbf{x}} &= \frac{d \mathbf{f}}{d \mathbf{z}} \frac{\partial \mathbf{z}}{\partial \mathbf{x}} \ ; \ 
\frac{\partial \mathbf{f}}{\partial \mathbf{b}} = \frac{d \mathbf{f}}{d \mathbf{z}} \frac{\partial \mathbf{z}}{\partial \mathbf{b}} \ ; \ 
\frac{\partial \mathbf{f}}{\partial A} = \frac{d \mathbf{f}}{d \mathbf{z}} \frac{\partial \mathbf{z}}{\partial A} 
\end{align}

In [11]:
dfdx = np.einsum('il, lj', dfdz, dzdx)
print('df/dx =\n', dfdx, '\n\nshape:', dfdx.shape)
print()

dfdb = np.einsum('il, lj', dfdz, dzdb)
print('df/db =\n', dfdb, '\n\nshape:', dfdb.shape)
print()

dfdA = np.einsum('il, ljk', dfdz, dzdA)
print('df/dA =\n', dfdA, '\n\nshape:', dfdA.shape)

ValueError: ("Size of label '%s' for operand %d does not match previous terms.", 'l', 1)

# Numpy VS Tensorflow: speed on Matrix calculations

In [2]:
import numpy
import timeit
i = 2000
A = numpy.random.rand(i, i).astype(numpy.float32)
B = numpy.random.rand(i, i).astype(numpy.float32)

timer = timeit.Timer("numpy.dot(A, B)", "import numpy; from __main__ import A, B")
numpy_times_list = timer.repeat(num_repeats, 1)


NameError: name 'num_repeats' is not defined