##### Copyright 2019 The TensorFlow Authors.

In [2]:
#@title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# TensorFlow 2 quickstart for experts

<table class="tfo-notebook-buttons" align="left">
  <td>
    <a target="_blank" href="https://www.tensorflow.org/tutorials/quickstart/advanced"><img src="https://www.tensorflow.org/images/tf_logo_32px.png" />View on TensorFlow.org</a>
  </td>
  <td>
    <a target="_blank" href="https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/tutorials/quickstart/advanced.ipynb"><img src="https://www.tensorflow.org/images/colab_logo_32px.png" />Run in Google Colab</a>
  </td>
  <td>
    <a target="_blank" href="https://github.com/tensorflow/docs/blob/master/site/en/tutorials/quickstart/advanced.ipynb"><img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />View source on GitHub</a>
  </td>
  <td>
    <a href="https://storage.googleapis.com/tensorflow_docs/docs/site/en/tutorials/quickstart/advanced.ipynb"><img src="https://www.tensorflow.org/images/download_logo_32px.png" />Download notebook</a>
  </td>
</table>

This is a [Google Colaboratory](https://colab.research.google.com/notebooks/welcome.ipynb) notebook file. Python programs are run directly in the browser—a great way to learn and use TensorFlow. To follow this tutorial, run the notebook in Google Colab by clicking the button at the top of this page.

1. In Colab, connect to a Python runtime: At the top-right of the menu bar, select *CONNECT*.
2. Run all the notebook code cells: Select *Runtime* > *Run all*.

Download and install TensorFlow 2. Import TensorFlow into your program:

Note: Upgrade `pip` to install the TensorFlow 2 package. See the [install guide](https://www.tensorflow.org/install) for details.

Import TensorFlow into your program:

In [3]:
import tensorflow as tf

from tensorflow.keras.layers import Dense, Flatten, Conv2D
from tensorflow.keras import Model

Load and prepare the [MNIST dataset](http://yann.lecun.com/exdb/mnist/).

In [4]:
mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

# Add a channels dimension

# > tf.newaxis same as expand_dims but different syntax

print(tf.expand_dims(x_train, axis=-1).shape)

x_train = x_train[..., tf.newaxis].astype("float32")
x_test = x_test[..., tf.newaxis].astype("float32")

print(x_train.shape)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
(60000, 28, 28, 1)
(60000, 28, 28, 1)


Use `tf.data` to batch and shuffle the dataset:

In [5]:
# tf.data.Dataset deals with formatting of datasets
# from_tensor_slices returns individual tensors for each row (vs from_tensors returning just one tensor, & hence one more dim)

# Shuffle will create a random buffer from which elements are chosen
# In this case it creates a buffer of the first 10,000 elements among which one is then randomly chosen
  # > Then the buffer size decreases by one and it takes the 10,001th element added to the 10,000 buffer
  # > Hence the first element it outputs is forced to be in the first 10,000
  # For perfect shuffling buffer size must >= dataset size (60K for us), but this is more expensive
train_ds = tf.data.Dataset.from_tensor_slices(
    (x_train, y_train)).shuffle(10000).batch(32)

test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32)

In [6]:
# 60000 / 32
print(len(train_ds))

1875


In [7]:
# You can then iterate through it
for dict_slice, target in train_ds:
     print(dict_slice.shape, target.shape)
     break

# take > create new dataset with less datapoints
for dict_slice, target in train_ds.take(1):
     print(dict_slice.shape, target.shape)

(32, 28, 28, 1) (32,)
(32, 28, 28, 1) (32,)


Build the `tf.keras` model using the Keras [model subclassing API](https://www.tensorflow.org/guide/keras#model_subclassing):

$$Out = ((W−F+2P)/S)+1$$

$$26 = ((28−3+2 * 0)/1)+1$$

In [8]:

class MyModel(Model):
  def __init__(self):
    super(MyModel, self).__init__()

    # In Torch: self.conv = nn.Conv2d(1, 32, kernel_size=3) > 1 in_channel (for rgb image in_channel will be 3), 32 out channels
    # > For 28x28 images, outputs (BS, 32, 26, 26)
    # Padding is a string for TF of valid or same; but int for Torch
    # Above torch code is in TF: 
    self.conv1 = Conv2D(32, 3, activation='relu') # 32 filters, 3 kernel size

    self.flatten = Flatten() # > (BS, 32*26*26)
    self.d1 = Dense(128, activation='relu') # > Output: (BS, 128)
    self.d2 = Dense(10) # > No activation; > Output: (BS, 10)

  def call(self, x):
    x = self.conv1(x)
    x = self.flatten(x)
    x = self.d1(x)
    return self.d2(x)

# Create an instance of the model
model = MyModel()

Choose an optimizer and loss function for training: 

In [9]:
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

optimizer = tf.keras.optimizers.Adam()

Select metrics to measure the loss and the accuracy of the model. These metrics accumulate the values over epochs and then print the overall result.

In [10]:
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')

Use `tf.GradientTape` to train the model:

## Eager vs Lazy Execution // Dynamic vs Static Comp. Graphs


Originally, Torch was Dynamic Comp. Geaphs & TF only static. Dynamic allows changing the input while running & is more flexible, python native. It makes debugging also a whole lot easier. Static means you need to define placeholders for everything expected to happen. It may be much less code for simpler symmetric models and provide accelerations not possible for dynamic graphs. <br>
With TF 2.0, there is now support for dynamic ocmp. graphs in TF. 
<br> <br>

### @tf.function

Introduced in TF2.0 for dynamic computation. It turns a function into a graph. You can do so by adding the decorator or calling it directly, via function_that_uses_graph = tf.function(callable_func).

Once the function is called the first time it will be traced, i.e. via tf.autograph it is turned into a computation graph.

#### Pecularities

Python code gets only executed once, as it constructs a tf graph > Use tf.print instead of print.


https://stackoverflow.com/questions/46154189/what-is-the-difference-of-static-computational-graphs-in-tensorflow-and-dynamic
https://www.machinelearningplus.com/deep-learning/how-use-tf-function-to-speed-up-python-code-tensorflow/#:~:text=Google%20Bookmarks%20Share-,tf.,to%20create%20portable%20Tensorflow%20models.



In [12]:
# @tf.function converts python code to callable TF Graph function

# tf.GradientTape() is needed for eager mode to record parts for differentiation

@tf.function
def train_step(images, labels):
  with tf.GradientTape() as tape:
    # training=True is only needed if there are layers with different
    # behavior during training versus inference (e.g. Dropout).
    predictions = model(images, training=True)
    loss = loss_object(labels, predictions)
  gradients = tape.gradient(loss, model.trainable_variables)
  optimizer.apply_gradients(zip(gradients, model.trainable_variables))

  # Equivalent of the above 4 lines of code in torch:
  #loss = compute_loss(model, x)
  #optimizer.zero_grad()
  #loss.backward()
  #optimizer.step()

  train_loss(loss)
  train_accuracy(labels, predictions)

Test the model:

In [13]:
@tf.function
def test_step(images, labels):
  # training=False is only needed if there are layers with different
  # behavior during training versus inference (e.g. Dropout).

  # No gradient tape here as we do not want to save gradients; It's like with torch.no_grad() but the other way round
  predictions = model(images, training=False)
  t_loss = loss_object(labels, predictions)

  test_loss(t_loss)
  test_accuracy(labels, predictions)


  

In [14]:
EPOCHS = 5

for epoch in range(EPOCHS):
  # Reset the metrics at the start of the next epoch
  train_loss.reset_states()
  train_accuracy.reset_states()
  test_loss.reset_states()
  test_accuracy.reset_states()

  for images, labels in train_ds:
    train_step(images, labels)

  for test_images, test_labels in test_ds:
    test_step(test_images, test_labels)

  print(
    f'Epoch {epoch + 1}, '
    f'Loss: {train_loss.result()}, '
    f'Accuracy: {train_accuracy.result() * 100}, '
    f'Test Loss: {test_loss.result()}, '
    f'Test Accuracy: {test_accuracy.result() * 100}'
  )

Epoch 1, Loss: 0.13406850397586823, Accuracy: 95.97166442871094, Test Loss: 0.059919048100709915, Test Accuracy: 98.00999450683594
Epoch 2, Loss: 0.042148616164922714, Accuracy: 98.69166564941406, Test Loss: 0.0505760982632637, Test Accuracy: 98.43000030517578
Epoch 3, Loss: 0.021483521908521652, Accuracy: 99.27832794189453, Test Loss: 0.06270965188741684, Test Accuracy: 98.20999908447266
Epoch 4, Loss: 0.013704432174563408, Accuracy: 99.57666015625, Test Loss: 0.053873177617788315, Test Accuracy: 98.50999450683594
Epoch 5, Loss: 0.009670468047261238, Accuracy: 99.67832946777344, Test Loss: 0.06497903913259506, Test Accuracy: 98.33999633789062


The image classifier is now trained to ~98% accuracy on this dataset. To learn more, read the [TensorFlow tutorials](https://www.tensorflow.org/tutorials).