In [1]:
#import some libraries
import os
import cv2
import numpy as np
import tensorflow as tf
import keras

## Using TensorFlow like Numpy
### A tensor is usally a multidimensonal array, but it can also hold a scalar (vô hướng) (a simple value, such as 42). These tensors will be important when we create custom cost functions, custom metrics, custom layers, ...

In [2]:
tf.constant([[1., 2., 3.], [4., 5., 6.]]) #Matrix  

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[1., 2., 3.],
       [4., 5., 6.]], dtype=float32)>

**Notes**: These value above show us the shape of matrix with 2 rows and 3 columns (shape =(rows, columns)), and data types=float32 as well as the numpy version

In [4]:
#Indexing works much like in Numpy
t = tf.constant([[1., 2., 3.], [4., 5., 6.]])
print(t[0, :]) #[1. 2. 3.]
print(t[:, 1:]) #[[2. 3.], [5. 6.]] (columns 1 and 2)


tf.Tensor([1. 2. 3.], shape=(3,), dtype=float32)
tf.Tensor(
[[2. 3.]
 [5. 6.]], shape=(2, 2), dtype=float32)


**Notes**: We can see the difference betweem two shape of array. With the shape of (3,), it is a 1-ranked array not a dimensional array (e.g shape=(3,1))

In [5]:
# All sorts of tensor operations are available:
print(t + 10) #add 10 to each element
print(tf.square(t)) #square each element
print(tf.reduce_sum(t)) #sum all elements
print(tf.reduce_sum(t, 0)) #sum each column
print(tf.reduce_sum(t, 1)) #sum each row
print(tf.reduce_mean(t)) #mean of all elements
print(tf.reduce_mean(t, 0)) #mean of each column
print(tf.reduce_mean(t, 1)) #mean of each row
print(tf.transpose(t)) #swap rows and columns

tf.Tensor(
[[11. 12. 13.]
 [14. 15. 16.]], shape=(2, 3), dtype=float32)
tf.Tensor(
[[ 1.  4.  9.]
 [16. 25. 36.]], shape=(2, 3), dtype=float32)
tf.Tensor(21.0, shape=(), dtype=float32)
tf.Tensor([5. 7. 9.], shape=(3,), dtype=float32)
tf.Tensor([ 6. 15.], shape=(2,), dtype=float32)
tf.Tensor(3.5, shape=(), dtype=float32)
tf.Tensor([2.5 3.5 4.5], shape=(3,), dtype=float32)
tf.Tensor([2. 5.], shape=(2,), dtype=float32)
tf.Tensor(
[[1. 4.]
 [2. 5.]
 [3. 6.]], shape=(3, 2), dtype=float32)


## Tensors and Numpy
### You can create a tensor from a NumPy array, and vice versa

In [6]:
a = np.array([2., 4., 5.])
print(tf.constant(a)) #[2. 4. 5.]
print(t.numpy()) #[1. 2. 3.]

tf.Tensor([2. 4. 5.], shape=(3,), dtype=float64)
[[1. 2. 3.]
 [4. 5. 6.]]


In [7]:
print(tf.square(a)) #[4. 16. 25.]
print(np.square(t)) #[[1. 4. 9.], [16. 25. 36.]]

tf.Tensor([ 4. 16. 25.], shape=(3,), dtype=float64)
[[ 1.  4.  9.]
 [16. 25. 36.]]


**Warning**: Numpy uses 64-bit precision by default, while TensorFlow uses 32-bit precision (more than enough for neural networks, plus it faster and uses less RAM). So when we create a tensor from a NumPy array, make sure to set dtype=tf.float32

## Type conversions

In [8]:
# Using tf.cast() to convert between types
t2 = tf.constant(40., dtype=tf.float64)
tf.constant(2.0) + tf.cast(t2, tf.float32) #42

<tf.Tensor: shape=(), dtype=float32, numpy=42.0>

## Variables
### The tf.Tensor values we have seen so far ar immutable (you can not modify them). This means that we cannot use regular tensors to implement weights in a neural network, since they need to be tweaked by backpropagation. That's why we need tf.Variable

In [9]:
v = tf.Variable([[1., 2., 3.], [4., 5., 6.]])
v

<tf.Variable 'Variable:0' shape=(2, 3) dtype=float32, numpy=
array([[1., 2., 3.],
       [4., 5., 6.]], dtype=float32)>

In [10]:
"""
A Tf.Variable acts much like a tf.Tensor: you can perform the same operation with it, it plays nicely with NumPy as well.
But it can also be modified in place using tf.assign().
"""
v.assign(2 *  v)    # v is now [[2., 4., 6.], [8., 10., 12.]]
v[0,1].assign(42.)  # v is now [[2., 42., 6.], [8., 10., 12.]]
v[:, 2].assign([0., 1.])    # v is now [[2., 42., 0.], [8., 10., 1.]]
v.scatter_nd_update([[0, 0], [1, 2]], updates=[100., 200.]) # v is now [[100., 42., 0.], [8., 10., 200.]]

<tf.Variable 'UnreadVariable' shape=(2, 3) dtype=float32, numpy=
array([[100.,  42.,   0.],
       [  8.,  10., 200.]], dtype=float32)>

## Other Data Structures

In [5]:
#Sparse tensors
#A sparse tensor is a tensor that has a very small number of elements, and most of them are zero.
t = tf.SparseTensor([[0, 0], [1, 2]], [100., 200.], [3, 3])
print(t) #SparseTensor(indices=[[0 0], [1 2]], values=[100. 200.], dense_shape=[3 3])

SparseTensor(indices=tf.Tensor(
[[0 0]
 [1 2]], shape=(2, 2), dtype=int64), values=tf.Tensor([100. 200.], shape=(2,), dtype=float32), dense_shape=tf.Tensor([3 3], shape=(2,), dtype=int64))


In [11]:
#Tensor Arrays
#A tensor array is a tensor that can be dynamically resized.
ta = tf.TensorArray(tf.float32, size=3)
ta = ta.write(0, [[1., 2.], [3., 4.]])
print(ta.read) #TensorArray(size=3, dtype=float32, dynamic_size=False, clear_after_read=True)

<bound method TensorArray.read of <tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x0000022D86020488>>


In [17]:
#Ragged Tensor
#A ragged tensor is a tensor that has a variable number of dimensions.
rt = tf.RaggedTensor.from_row_splits(values=[1, 2, 3, 4, 5, 6], row_splits=[0, 2, 3, 5, 6])
print(rt) #RaggedTensor(values=Tensor(values, dtype=int32), row_splits=Tensor(row_splits, dtype=int64))



<tf.RaggedTensor [[1, 2], [3], [4, 5], [6]]>


In [18]:
#String Tensors
"""
A string tensor are regular tensors of type tf.string.
These represent byte strings, not Unicode strings. Alternatively, you can respresent Unicode strings a using tf.int32, where each item represents a Unicode code point (a 32-bit integer). 
"""
st = tf.constant(["Hello", "World"])
print(st) #Tensor("Const:0", shape=(2,), dtype=string)

tf.Tensor([b'Hello' b'World'], shape=(2,), dtype=string)


In [23]:
# Queues
"""
Store tensors across multiple steps in a computation.
First In, First Out (FIFO) queues.
"""
q = tf.queue.FIFOQueue(3, "float")
print(q.shapes) #FIFOQueue(3, dtype=float32, shapes=None, shared_name=None, name=None)

[TensorShape(None)]


# Customizing Models and Training Algorithms

## Custom Loss Function
Suppose you start by trying to clean up your dataset by removing or fixing the outliers, but that turns out to be unsufficient; the dataset is still noisy.Which loss function should you use?

    1. Mean Squared Error (MSE) --> might penalize large errors to much and cause the model to be imprecise
    
    2. Mean Absolute Error (MAE) --> would not penalize outliers as much, but training might take a while to converge, and the trained model might not be very precise

    3. Huber Loss --> a combination of MSE and MAE, is quadratic when the error is smaller than a threshold but linear when the error is larger than threshold. The linear part makes it less sensitive to outliers than the MSE, and the quadratic part allows it to converge faster and be more precise than the MAE

In [6]:
#Huber loss 
def create_huber(threshold=1.0):
    def huber_fn(y_true, y_pred):
        error = y_true - y_pred
        is_small_error = tf.abs(error) < threshold
        squared_loss = tf.square(error) / 2
        linear_loss = threshold * tf.abs(error) - threshold ** 2 / 2
        return tf.where(is_small_error, squared_loss, linear_loss)
    return huber_fn

t = create_huber()
t(1., 2.)

<tf.Tensor: shape=(), dtype=float32, numpy=0.5>

**Notes**: For better performance, you should use a vectorized implementation.

In [None]:
"""
When you save the model, the threshold will not be saved.
You have to set it again when loading the model.
To solve this problem, you create a subclass of the keras.losses.Loss class and then implementing its get_config() method.
"""

class HuberLoss(keras.losses.Loss):
    def __init__(self, threshold=1.0, **kwargs):
        super(HuberLoss, self).__init__(**kwargs)
        self.threshold = threshold
    def call(self, y_true, y_pred):
        error = y_true - y_pred
        is_small_error = tf.abs(error) < self.threshold
        squared_loss = tf.square(error) / 2
        linear_loss = self.threshold * tf.abs(error) - self.threshold ** 2 / 2
        return tf.where(is_small_error, squared_loss, linear_loss)
    def get_config(self):
        config = {'threshold': self.threshold}
        base_config = super(HuberLoss, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

In [None]:
"""
when you save the model, the threshold will be saved along with it; and when you load the model, you just need to map the class name to the class itself.
"""

model = keras.models.load_model('my_model_with_a_custom_class.h5', custom_objects={'HuberLoss': HuberLoss})

**Notes**: Brief notation about *args and **kwargs

    - args are used to pass *non-keyword* arguments, which are usually used as a measure to prevent the program from crashing if we don't know how many arguments will be passed to the function

    - **kwargs is a dicitonary of keyword arguments. The ** allows us to pass any number of keyword arguments. A keyword argument is basically a dictionary

## Custom Actovation Functions, Initializers, Regularizers and Constaints

In [None]:
#Custom Soft plus activation function
def my_softplus(x): # return value is just tf.nn.softplus(x)
    return tf.math.log(1. + tf.exp(x))

#A custom Glorot Initializer
def my_glorot_initializer(shape, dtype=tf.float32):
    stddev = tf.sqrt(2.0 / (shape[0] + shape[1]))
    return tf.random.normal(shape, stddev=stddev, dtype=dtype)

#A custom L1 regularizer
def my_l1_regularizer(weights):
    return tf.reduce_sum(tf.abs(weights * 0.01))

#A custom weight
def my_positive_weights(weights):
    return tf.where(weights < 0, tf.zeros_like(weights), weights)

## Custom Metrics

In [6]:
"""
Some differences between losses and metrics:

    1. Losses are used by Gradient Descent to train a model, so they must be differentiable, and their gradients should not be 0 everywhere.
    To recall, when we train the model, the model will perform backpropagation to update the weights and minimize the loss. So if the gradient is 0 everywhere, the model will not update the weights.

    2. Metrics are used to evaluate the model, so they are not differentiable or have 0 gradients everywhere.
"""
#An object that can keep track of the number of true positives and false positives and that can compute their ratio when requested.
precision = keras.metrics.Precision()
precision([0, 1, 1, 1, 0, 1, 0, 1], [1, 1, 0, 1, 0, 1, 0, 1]) # first batch

<tf.Tensor: shape=(), dtype=float32, numpy=0.8>

In [7]:
precision([0, 1, 0, 0, 1, 0, 1, 1], [1, 0, 1, 1, 0, 0, 0, 0]) # second batch

<tf.Tensor: shape=(), dtype=float32, numpy=0.5>

**Notes**: After the first batch, it returns a precision of 80%; then after the second batch, it return 50% (which is the overall precision so far, not the second batch's precision) --> a.k.a *streaming metric* (or *stateful metric*), as it is gradually updated, batch after batch

In [8]:
precision.result() # 0.5

<tf.Tensor: shape=(), dtype=float32, numpy=0.5>

In [9]:
precision.variables

[<tf.Variable 'true_positives:0' shape=(1,) dtype=float32, numpy=array([4.], dtype=float32)>,
 <tf.Variable 'false_positives:0' shape=(1,) dtype=float32, numpy=array([4.], dtype=float32)>]

## Custom Layers

In [None]:
#A simplified version of the custom Dense layer
class MyDense(keras.layers.Layer):
    def __init__(self, units, activation=None, **kwargs):
        super(MyDense, self).__init__(**kwargs)
        self.units = units
        self.activation = keras.activations.get(activation)
    def build(self, batch_input_shape):
        self.kernel = self.add_weight(name='kernel',
                                      shape=[batch_input_shape[-1], self.units],
                                      initializer=my_glorot_initializer)
        self.bias = self.add_weight(name='bias',
                                    shape=[self.units],
                                    initializer='zeros')
        super().build(batch_input_shape) # must be at the end
    def call(self, inputs):
        return self.activation(tf.matmul(inputs, self.kernel) + self.bias)
    def compute_output_shape(self, batch_input_shape):
        return tf.TensorShape([batch_input_shape[0], self.units])
    def get_config(self):
        base_config = super(MyDense, self).get_config()
        return dict(list(base_config.items()) + list({'units': self.units}.items()) + {"activation": keras.activations.serialize(self.activation)})


**Conclusion**: A custom layer class will have 5 main functions:

    1. The constructor (__init__) --> takes all the hyperparameters as arguments, and importantly it also takes a **kwargs argument

    2. The build() method --> Its role is to create the layer's variables by calling the add_weight() method for each weight.

    3. The call() method --> performs the desired operations. For example, we will compute the output of the layer by performing the linear equation (Wx + b) and applying the activation to it.

    4.The compute_output_shape() method --> simply returns the shape of this layer's outputs

    5. The get_config() method -- returns a dictionary mapping each hyperparameter name to its value. It first calls the parent class's get_config() method, then adds the new hyperparameters to this dictionary

## Custom models

Suppose we want to build a custome model represented in the below picture like this:

![Custom_models.png](Custom_models.png)

In [None]:
#Let's create a custom ResidualBlock layer
class ResidualBlock(keras.layers.Layer):
    def __init__(self, n_layers, n_neurons, **kwargs):
        super(ResidualBlock, self).__init__(**kwargs)
        self.hidden = [keras.layers.Dense(n_neurons, activation='elu',kernel_initializer=my_glorot_initializer, bias_initializer='zeros') for _ in range(n_layers)]
    
    def call(self, inputs):
        Z = inputs
        for layer in self.hidden:
            Z = layer(Z)
        return inputs + Z
    
    def  get_config(self):
        return super().get_config()

class ResidualRegressor(keras.Model):
    def __init__(self, output_dim, **kwargs):
        super(ResidualRegressor, self).__init__(**kwargs)
        self.hidden1 = keras.layers.Dense(30, activation='elu', kernel_initializer='he_normal')
        self.block1 = ResidualBlock(2, 30)
        self.block2 = ResidualBlock(2, 30)
        self.out = keras.layers.Dense(output_dim)

    def call(self, inputs):
        Z = self.hidden1(inputs)
        for _ in range(1 + 3):
            Z = self.block1(Z)
        Z = self.block2(Z)
        return self.out(Z)
    
    def get_config(self):
        return super().get_config()

**Notes**: The Model class is a subclass of the Layer class, so models can be defined and used exactly like layers

## Losses and Metrics Based on Model Internals

In [None]:
#A custom regression MLP model composed of a stack of five hidden layers plus an output layer
"""
This custom model will also have an auxiliary output on top of the upper hidden layer. The loss associated to this auxiliary output will be called reconstruction loss
Reconstruction loss is the mean squared difference between the reconstruction and the inputs.
"""

class ReconstructingRegressor(keras.Model):
    def __init__(self, output_dim, **kwargs):
        super().__init__(**kwargs)
        self.hidden = [keras.layers.Dense(30, activation='selu', kernel_initializer='lecun_normal') for _ in range(5)]
        self.out = keras.layers.Dense(output_dim)

    def build(self, batch_input_shape):
        n_inputs = batch_input_shape[-1]
        self.reconstruct = keras.layers.Dense(n_inputs)
        super().build(batch_input_shape)
        super().build(batch_input_shape)
    
    def call (self, inputs):
        Z = inputs
        for layer in self.hidden:
            Z = layer(Z)
        reconstruction = self.reconstruct(Z)
        recon_loss = tf.reduce_mean(tf.square(reconstruction - inputs))
        self.add_loss(0.05 * recon_loss)
        return self.out(Z)

## Computing Gradients Using Autodiff

In neural network, the function would be much more complex, typically with tens thousands of parameters, and finding the partial derivatives anylytically by hand would be an almost impossible task. One solution could be to compute an approximation of each partial derivative by measuring how much the function's output changes when you tweak the corresponding parameter

In [5]:
def f(w1, w2):
    return 3 * w1 ** 2 + 2  * w1 * w2

w1, w2 =  5, 3
eps = 1e-6
print((f(w1 + eps, w2) - f(w1, w2)) / eps)

36.000003007075065


Needing to call f() at least once per parameter makes this approach intractable for large neural network --> we should use autodiff

In [6]:
w1, w2 = tf.Variable(5.), tf.Variable(3.)
with tf.GradientTape() as tape:
    z = f(w1, w2)

gradients = tape.gradient(z, [w1, w2])
print(gradients)

[<tf.Tensor: shape=(), dtype=float32, numpy=36.0>, <tf.Tensor: shape=(), dtype=float32, numpy=10.0>]


The tape automatically erased immediately after gradient() method was called, so exception will throw if you call twice.

--> Solution: make the tape persistent with tf.GradientTape(persistent=True) and delete it each time you are done with it to free resources

In [7]:
with tf.GradientTape(persistent=True) as tape:
    z = f(w1, w2)

dz_dw1 = tape.gradient(z, w1)
dz_dw2 = tape.gradient(z, w2)
del tape

You can force the tape to watch any tensors you like, to record every operation that involves them

In [8]:
c1, c2 = tf.constant(5.), tf.constant(3.) 
with tf.GradientTape() as tape:
    tape.watch(c1)
    tape.watch(c2)
    z = f(c1, c2)

gradients = tape.gradient(z, [c1, c2])


**Notes**: 

    1. The tape's jabobian() method: it will perform reverse-mode autodiff once for each loss in the vector.

    2. Reverse-mode autodiff: it just needs to do one forward pass and one reverse pass to get all the gradients at once

    3. The Hessians method: the partial derivatives of the partial derivatives

## Custom Training Loops

In [18]:
#Loading and preparing the Fashion MNIST dataset
(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.fashion_mnist.load_data()
X_train_full = X_train_full.astype(np.float32) / 255.
X_train, X_valid = X_train_full[5000:], X_train_full[:5000]
y_train, y_valid = y_train_full[5000:], y_train_full[:5000]
X_test = X_test.astype(np.float32) / 255.

In [14]:
l2_reg = keras.regularizers.l2(0.01)
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]),
    keras.layers.Dense(100, activation='relu', kernel_initializer='he_normal', kernel_regularizer=l2_reg),
    keras.layers.Dense(10, activation='softmax')
])

In [15]:
# Random batch_size function
def random_batch(X, y, batch_size=32):
    idx = np.random.randint(len(X), size=batch_size)
    return X[idx], y[idx]


In [24]:
#Disply the training status function
def print_status_bar(iteration, total, loss, metrics=None):
    metrics = ' - '.join(["{}: {:.4f}".format(m.name, m.result()) for m in [loss] + (metrics or [])])
    end = "" if iteration < total else "\n"
    print("\r{}/{} - ".format(iteration + total) + metrics, end=end)

In [17]:
n_epochs = 5
batch_size = 32
n_steps = len(X_train) // batch_size
optimizer = keras.optimizers.Nadam(learning_rate=0.01)
loss_fn = keras.losses.sparse_categorical_crossentropy
mean_loss = keras.metrics.Mean()
metrics = [keras.metrics.SparseCategoricalAccuracy()]

In [25]:
for epoch in range(1 , n_epochs + 1):
    print("Epoch {}/{}".format(epoch, n_epochs))
    for step in range(1, n_steps + 1):
        X_batch, y_batch = random_batch(X_train, y_train)
        with tf.GradientTape() as tape:
            y_pred = model(X_batch)
            main_loss = tf.reduce_mean(loss_fn(y_batch, y_pred))
            loss = tf.add_n([main_loss] + model.losses)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        for variable in model.variables:
            if variable.constraint is not None:
                variable.assign(variable.constraint(variable))
        mean_loss(loss)
        for metric in metrics:
            metric(y_batch, y_pred)
        print_status_bar(step * batch_size, len(y_train), mean_loss, metrics)
    print_status_bar(len(y_train), len(y_train), mean_loss, metrics)
    for metric in [mean_loss] + metrics:
        metric.reset_states()

Epoch 1/5


IndexError: tuple index out of range

## TensorFlow Functions and Graphs

In [4]:
#Creating simple function to visualize how to use tf.function()
#Simple function without tf.function()
def cube(x):
    return x ** 3 

In [5]:
cube(2)

8

In [6]:
#Let's try with tf.constant()
cube(tf.constant(2))

<tf.Tensor: shape=(), dtype=int32, numpy=8>

In [7]:
#Let's use tf.function() to convert this Python function to TensorFlow function
tf_cube = tf.function(cube)
print(tf_cube)

<tensorflow.python.eager.def_function.Function object at 0x0000021985152208>


In [8]:
tf_cube(2)

<tf.Tensor: shape=(), dtype=int32, numpy=8>

In [9]:
tf_cube(tf.constant(2.0))

<tf.Tensor: shape=(), dtype=float32, numpy=8.0>

**Notes**: TF function can then be used exactly like the original Python function, and it will return the same result (but as tensors)

In [10]:
#Using tf.function as a decorator
@tf.function
def tf_cube(x):
    return x ** 3

tf_cube(2)

<tf.Tensor: shape=(), dtype=int32, numpy=8>

**Notes**: As a result, a TF function will usually run much faster than the original Python function, especially if it perform complex computations.

In [11]:
tf_cube(tf.constant(10))

<tf.Tensor: shape=(), dtype=int32, numpy=1000>

In [12]:
tf_cube(tf.constant(20))

<tf.Tensor: shape=(), dtype=int32, numpy=8000>

In [13]:
tf_cube(tf.constant([10, 20]))

<tf.Tensor: shape=(2,), dtype=int32, numpy=array([1000, 8000])>

**Warning**: if a TF function was called many times with different numerical Python values, then any graph will be generated, slowing down the program and using up a lot of RAM

## AutoGraph and Tracing

*AutoGraph*: TensorFlow generates graphs by analyzing th Python function's source code to capture all the control flow statements (for, while loops, and if statements, break, continue, return statments).

**Question**: What is the reason TensorFlow has analyze this source code?

**Answer**: The reason TensorFlow has analyze the source code is that Python does not provide any other way to capture control flow statements

In [14]:
@tf.function
def sum_squares(n):
    s = 0
    for i in tf.range(n + 1):
        s += i ** 2
    return s

In [17]:
sum_squares(tf.constant(5))

<tf.Tensor: shape=(), dtype=int32, numpy=55>

In [18]:
sum_squares.get_concrete_function(tf.constant(5)).graph.get_operations()

[<tf.Operation 'n' type=Placeholder>,
 <tf.Operation 'add/y' type=Const>,
 <tf.Operation 'add' type=AddV2>,
 <tf.Operation 'range/start' type=Const>,
 <tf.Operation 'range/delta' type=Const>,
 <tf.Operation 'range' type=Range>,
 <tf.Operation 'sub' type=Sub>,
 <tf.Operation 'floordiv' type=FloorDiv>,
 <tf.Operation 'mod' type=FloorMod>,
 <tf.Operation 'zeros_like' type=Const>,
 <tf.Operation 'NotEqual' type=NotEqual>,
 <tf.Operation 'Cast' type=Cast>,
 <tf.Operation 'add_1' type=AddV2>,
 <tf.Operation 'zeros_like_1' type=Const>,
 <tf.Operation 'Maximum' type=Maximum>,
 <tf.Operation 'Const' type=Const>,
 <tf.Operation 'while/maximum_iterations' type=Const>,
 <tf.Operation 'while/loop_counter' type=Const>,
 <tf.Operation 'while' type=StatelessWhile>,
 <tf.Operation 'Identity' type=Identity>]

In [20]:
print(tf.autograph.to_code(sum_squares.python_function))

def tf__sum_squares(n):
    with ag__.FunctionScope('sum_squares', 'fscope', ag__.ConversionOptions(recursive=True, user_requested=True, optional_features=(), internal_convert_user_code=True)) as fscope:
        do_return = False
        retval_ = ag__.UndefinedReturnValue()
        s = 0

        def get_state():
            return (s,)

        def set_state(vars_):
            nonlocal s
            (s,) = vars_

        def loop_body(itr):
            nonlocal s
            i = itr
            s = ag__.ld(s)
            s += (i ** 2)
        i = ag__.Undefined('i')
        ag__.for_stmt(ag__.converted_call(ag__.ld(tf).range, ((ag__.ld(n) + 1),), None, fscope), None, loop_body, get_state, set_state, ('s',), {'iterate_names': 'i'})
        try:
            do_return = True
            retval_ = ag__.ld(s)
        except:
            do_return = False
            raise
        return fscope.ret(retval_, do_return)



## TF Function Rules

There are a few rules to respect:

    . A TensorFlow graph **can** only include TensorFlow constructs (tensors, operations, variables, datasets,...) --> Make sure use tf.reduce_sum() instead of np.sum()

    . Other Python functions or TF functions can be called, but they should follow the same rules, as TensorFlow will capture their operations in the computation graph

    . If the function creates a TensorFlow variable (or any other stateful TensorFLow object, such as a dataset or a queue), it must do so upon the very first call, and only then, or else an exception will be thrown.

    . TensorFlow will only capture for loops that iterate over a tensor or a dataset.So make so using *for i in tf.range(x)* rather than *for i in range(x)*, or else the loop will not be capture in the graph