In [1]:
import tensorflow as tf
from time import time

### Build a methods that implement a function:
`[⍺1,⍺2 + ⍺1·𝑝,⍺3 + (⍺2 + ⍺1·𝑝)·𝑝,⍺4 + (⍺3 + (⍺2 + ⍺1·𝑝)·𝑝)]`

#### Base method
Simple TensorArray.write each item from the function.

In [2]:
tf.function
def compute_tensor_base(T, p):

    result = tf.TensorArray(dtype=tf.float32, size=4)
    result = result.write(0, T[0])
    result = result.write(1, T[1] + T[0] * p)
    result = result.write(2, T[2] + (T[1] + T[0] * p) * p)
    result = result.write(3, T[3] + (T[2] + (T[1] + T[0] * p) * p))
    
    return result.stack()

#### Intermidate_0
There is common algebraic expression within a function. To optimize we are going to let it calculate only once and apply results into the rest of expressions.

In [3]:
tf.function
def compute_tensor_intermidate_0(T, p):

    result = tf.TensorArray(dtype=tf.float32, size=4)
    result = result.write(0, T[0])
    intermidate = T[1] + T[0] * p
    result = result.write(1, intermidate)
    result = result.write(2, T[2] + (intermidate) * p)
    result = result.write(3, T[3] + (T[2] + (intermidate) * p))

    return result.stack()

#### Intermidate_1
There is two common algebraic expressions within a function. To optimize we are going to let it calculate only once and apply results into the rest of expressions.

In [4]:
tf.function
def compute_tensor_intermidate_1(T, p):

    result = tf.TensorArray(dtype=tf.float32, size=4)
    result = result.write(0, T[0])
    intermidate = T[1] + T[0] * p
    result = result.write(1, intermidate)
    intermidate_1 = T[2] + (intermidate) * p
    result = result.write(2, intermidate_1)
    result = result.write(3, T[3] + (intermidate_1))

    return result.stack()

#### Optimized
The same as Intermidate_1 but wrote more programmar way.

In [5]:
tf.function
def compute_tensor_optimized(T, p):
    
    result_1 = T[1] + T[0] * p
    result_2 = T[2] + result_1 * p
    result_3 = T[3] + result_2

    return tf.stack([T[0], result_1, result_2, result_3])

#### Optimized v2
The same as optimized but used TensorArray.write that makes computation more structured.

In [6]:
@tf.function
def compute_tensor_optimized_v2(T, p):
    result = tf.TensorArray(dtype=tf.float32, size=4)
    
    result = result.write(0, T[0])
    result_1 = T[1] + T[0] * p
    result = result.write(1, result_1)
    result_2 = T[2] + result_1 * p
    result = result.write(2, result_2)
    result_3 = T[3] + result_2
    result = result.write(3, result_3)
    
    return result.stack()

Create tensor T = [⍺1,⍺2,⍺3,⍺4] and tensor p = [scalar] ~ (0,1)

In [7]:
T = tf.convert_to_tensor([1.0, 2.0, 3.0, 4.0], dtype=tf.float32)
p = tf.convert_to_tensor(0.5, dtype=tf.float32)

T, p

(<tf.Tensor: shape=(4,), dtype=float32, numpy=array([1., 2., 3., 4.], dtype=float32)>,
 <tf.Tensor: shape=(), dtype=float32, numpy=0.5>)

Compare time execution of four different computing methods for function:
* [⍺1,⍺2 + ⍺1·𝑝,⍺3 + (⍺2 + ⍺1·𝑝)·𝑝,⍺4 + (⍺3 + (⍺2 + ⍺1·𝑝)·𝑝)] 

In [8]:
list_of_times_base = []
list_of_times_intermidate_0 = []
list_of_times_intermidate_1 = []
list_of_times_optimized = []
list_of_times_optimized_v2 = []

for i in range(1000):

    start = time()
    output = compute_tensor_base(T, p)
    end = time()
    list_of_times_base.append(end - start)
    
    start = time()
    output = compute_tensor_intermidate_0(T, p)
    end = time()
    list_of_times_intermidate_0.append(end - start)

    start = time()
    output = compute_tensor_intermidate_1(T, p)
    end = time()
    list_of_times_intermidate_1.append(end - start)

    start = time()
    output = compute_tensor_optimized(T, p)
    end = time()
    list_of_times_optimized.append(end - start)

    start = time()
    output = compute_tensor_optimized_v2(T, p)
    end = time()
    list_of_times_optimized_v2.append(end - start)
    
print(f"Computing time base: {sum(list_of_times_base)/1000}")
print(f"Computing time intermidate_0: {sum(list_of_times_intermidate_0)/1000}")
print(f"Computing time intermidate_1: {sum(list_of_times_intermidate_1)/1000}")
print(f"Computing time optimized: {sum(list_of_times_optimized)/1000}")
print(f"Computing time optimized_v2: {sum(list_of_times_optimized_v2)/1000}")

Computing time base: 0.0019917376041412352
Computing time intermidate_0: 0.0012110278606414795
Computing time intermidate_1: 0.0009918577671051026
Computing time optimized: 0.0009197704792022705
Computing time optimized_v2: 0.00040234923362731935


Calculating the derivatives on both the tensor T and the scalar p. We are going to record tensors [T, p] all operations using tf.GradientTape.

After calling method compute_tensor_optimized we use computational graph to compute derivatives with tape.gradient.

In [9]:
with tf.GradientTape(persistent=True) as tape:
    tape.watch([T, p])
    result = compute_tensor_optimized_v2(T, p)

# Gradients with respect to T
grad_T = tape.gradient(result, T)
print("Gradient w.r.t. T:", grad_T, result)

# Gradients with respect to p
grad_p = tape.gradient(result, p)
print("Gradient w.r.t. p:", grad_p, result)

Gradient w.r.t. T: tf.Tensor([2. 2. 2. 1.], shape=(4,), dtype=float32) tf.Tensor([1.   2.5  4.25 8.25], shape=(4,), dtype=float32)
Gradient w.r.t. p: tf.Tensor(7.0, shape=(), dtype=float32) tf.Tensor([1.   2.5  4.25 8.25], shape=(4,), dtype=float32)
