<a href="https://colab.research.google.com/github/Davidxswang/ML/blob/master/Note_2_TensorFlow_Keras_Basics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# A easy demo provided by TensorFlow tutorial

## Import the packages

In [0]:
import tensorflow as tf

def println(*arg, **argm):
  print(*arg, **argm)
  print('\n')

In [2]:
# In note 1, we import the data from TensorFlow Datasets. In this notebook, MNIST dataset is imported from tensorflow.keras
mnist = tf.keras.datasets.mnist

# Let's see what type mnist is and what methods it has
println('Class of mnist: ', type(mnist))
println('Methods: ', dir(mnist))

# Load data
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Let's take a look at the x and y data
println('Type of X:', type(x_train),'Type of Y:', type(y_train))
println('Shape of train data X and Y:', x_train.shape, y_train.shape)
println('Shape of test data X and Y:', x_test.shape, y_test.shape)

Class of mnist:  <class 'module'>


Methods:  ['__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__', '_sys', 'load_data']


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
Type of X: <class 'numpy.ndarray'> Type of Y: <class 'numpy.ndarray'>


Shape of train data X and Y: (60000, 28, 28) (60000,)


Shape of test data X and Y: (10000, 28, 28) (10000,)




## Preprocess the data

In [0]:
# Normalize the data
x_train, x_test = x_train / 255.0, x_test / 255.0

## Build a simple model

In [4]:
# Using a sequential model
model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(10)
])

# Take a look at the model
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 784)               0         
_________________________________________________________________
dense (Dense)                (None, 128)               100480    
_________________________________________________________________
dropout (Dropout)            (None, 128)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 10)                1290      
Total params: 101,770
Trainable params: 101,770
Non-trainable params: 0
_________________________________________________________________


In [5]:
# Model can be used directly to make predictions
predictions = model(x_train[:5])
println('Type of model output:',type(predictions))
# Make the eager tensor a numpy array
predictions = predictions.numpy()
println('Shape of output:', predictions.shape)
# Predictions in logits
println('Predictions in logits:\n',predictions)

# We can convert it to softmax
softmax = tf.math.softmax(predictions)
println('Softmax scores:\n', softmax.numpy())

# To extract the final answer, use argmax
answers = tf.math.argmax(softmax)
println('answers:\n', answers.numpy())

Type of model output: <class 'tensorflow.python.framework.ops.EagerTensor'>


Shape of output: (5, 10)


Predictions in logits:
 [[-0.00230959  0.2214871  -0.4115647   0.2289016   0.26367307 -0.03993828
   0.64832723  0.43176267  0.3787859  -0.5045202 ]
 [-0.07647131  0.65674615 -0.44338775 -0.38423407  0.6609993  -0.27736318
   0.6365055   0.30101457 -0.00919661 -0.3983376 ]
 [ 0.00448105  0.2176928  -0.2755806  -0.42820334  0.32588345 -0.31509233
   0.23399732  0.30821893  0.19844379 -0.1295808 ]
 [ 0.12579493  0.07337835 -0.09894928  0.0238513  -0.00281115  0.18687044
   0.51239896  0.07370916 -0.07527938 -0.6443805 ]
 [-0.7268912   0.5398196  -0.5318779  -0.29351652 -0.34794202  0.02278585
   0.18510334 -0.11922857 -0.14946903  0.31888437]]


Softmax scores:
 [[0.08352656 0.10447641 0.05547373 0.10525393 0.10897814 0.08044197
  0.16010046 0.12892579 0.12227347 0.05054956]
 [0.07866941 0.16377147 0.05450749 0.05782908 0.1644695  0.06435166
  0.16048995 0.11474822 0.08414396 0.057019

In [6]:
# We can calculate the loss by compare softmax with y, or by compare logits with y
# Logits with y
sparselogitloss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
loss_1 = sparselogitloss(y_train[:5], predictions).numpy()
println('Loss calculated by logits and y:',loss_1)

# softmax with y
sparsesoftmaxloss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False)
loss_2 = sparsesoftmaxloss(y_train[:5], softmax).numpy()
println('Loss calculated by softmax and y:', loss_2)

# This type of loss should be negative log possibility
loss_3_guess = -tf.math.log(1/10).numpy()
println('If we guess, the loss should be:', loss_3_guess)
# So it's very close to guess randomly when the network is not trained at all

Loss calculated by logits and y: 2.2622967


Loss calculated by softmax and y: 2.262297


If we guess, the loss should be: 2.3025851




In [7]:
# designate the optimizer, loss function and metrics
model.compile(optimizer='adam', loss=sparselogitloss, metrics='acc')

# train the network
# default batch_size is 32 (batch_size=None)
history = model.fit(x_train, y_train, epochs=5, batch_size=60000)

# let's take a look at history
println('Type of history:',history)
println('Method of history:', dir(history))
println('Commonly used history:', type(history.history),'keys of dict:',list(history.history.keys()))
println('Type of loss and acc:', type(history.history['loss']),type(history.history['acc']))
println('Type of epoch:', type(history.epoch))


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Type of history: <tensorflow.python.keras.callbacks.History object at 0x7fdba0785b00>


Method of history: ['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_chief_worker_only', '_implements_predict_batch_hooks', '_implements_test_batch_hooks', '_implements_train_batch_hooks', '_keras_api_names', '_keras_api_names_v1', 'epoch', 'history', 'model', 'on_batch_begin', 'on_batch_end', 'on_epoch_begin', 'on_epoch_end', 'on_predict_batch_begin', 'on_predict_batch_end', 'on_predict_begin', 'on_predict_end', 'on_test_batch_begin', 'on_test_batch_end', 'on_test_begin', 'on_test_end', 'on_train_batch_begin', 'on_train_batch_end', 'on_train_begin', 'on_trai

In [8]:
evaluate = model.evaluate(x_test, y_test)
println('Type of evaluate:', type(evaluate), 'content:', evaluate)
loss, acc = evaluate
println(f'Loss is: {loss}, accuracy is: {acc}')

Type of evaluate: <class 'list'> content: [1.6870594024658203, 0.6169000267982483]


Loss is: 1.6870594024658203, accuracy is: 0.6169000267982483




## A harder demo provided by TensorFlow tutorial

In [0]:
from tensorflow.keras.layers import Dense, Flatten, Conv2D
from tensorflow.keras import Model

In [10]:
# We will continue using mnist dataset
# But as we can see the mnist dataset Keras provides doesn't have a channel dimension, we need to add one at the tail
x_train = tf.expand_dims(x_train, -1)
x_test = tf.expand_dims(x_test, -1)
println(f'Shape of x_train is {x_train.shape}, shape of x_test is {x_test.shape}')

Shape of x_train is (60000, 28, 28, 1), shape of x_test is (10000, 28, 28, 1)




In [22]:
# from_tensor_slices can take slice in tuple or dictionary, and make every slice an element of the dataset.
train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(60000).batch(60000)
test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(10000)
for example in train_ds.take(1):
  println(f'this example is a {type(example)}, first element is a {type(example[0])}, second element is a {type(example[1])}')
  println(f'shape of first element is {example[0].shape}, shape of second element is {example[1].shape}')



this example is a <class 'tuple'>, first element is a <class 'tensorflow.python.framework.ops.EagerTensor'>, second element is a <class 'tensorflow.python.framework.ops.EagerTensor'>


shape of first element is (60000, 28, 28, 1), shape of second element is (60000,)




In [0]:
# This is another extremity here, to subclass the Keras model. This is not used commonly, but good to know.
# This regime is to separate the definition part and the call part.
class MyModel(Model):
  def __init__(self):
    super(MyModel, self).__init__()
    self.conv1 = Conv2D(32, 3, activation='relu')
    self.flatten = Flatten()
    self.d1 = Dense(128, activation='relu')
    self.d2 = Dense(10)

  # call is used in forward pass, both in training and inference.
  # We can pass a training argument to call, so that we can let it behave differently in training and inference time.
  # x is the input of the model
  def call(self, x, training=False):
    x = self.conv1(x)
    x = self.flatten(x)
    x = self.d1(x)
    return self.d2(x)

# Create an instance of the model
model_1 = MyModel()


# This is 
