<a href="https://colab.research.google.com/github/Koruvika/Koruvika.github.io/blob/master/EXERCISES.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
import tensorflow.keras as keras

In [2]:
import numpy as np
import matplotlib.pyplot as plt

plt.style.use('seaborn')

In [24]:
from tqdm.notebook import trange
from collections import OrderedDict

## 12

In [3]:
x = tf.random.normal(shape=(7,3))

In [4]:
layer = keras.layers.LayerNormalization()

In [5]:
y = layer(x)
y

<tf.Tensor: shape=(7, 3), dtype=float32, numpy=
array([[ 0.2096258 ,  1.1005968 , -1.3102226 ],
       [ 0.41919854,  0.9587953 , -1.3779937 ],
       [ 1.1641012 ,  0.11079092, -1.2748921 ],
       [ 0.9463024 ,  0.4295322 , -1.3758347 ],
       [-0.9405673 , -0.44391766,  1.3844848 ],
       [ 1.4109097 , -0.63248986, -0.7784198 ],
       [-0.58080405, -0.825213  ,  1.4060172 ]], dtype=float32)>

In [8]:
class MyLayerNormalization(keras.layers.Layer):
  def __init__(self):
    super().__init__()
    self.epsilon = 0.001
  
  def build(self, input_shape):
    self.alpha = self.add_weight(name='alpha', shape=input_shape[-1:], initializer='ones',  dtype=tf.float32)
    self.beta  = self.add_weight(name='beta',  shape=input_shape[-1:], initializer='zeros', dtype=tf.float32)
    super().build(input_shape)

  def call(self, X):
    mean, var = tf.nn.moments(X, axes=1, keepdims=True)
    std = tf.sqrt(var)
    return tf.multiply(self.alpha, X - mean)/(std + self.epsilon) + self.beta
  
  def compute_output_shape(self, batch_input_shape):
    return batch_input_shape
  def get_config(self):
    base_config = super().get_config()
    return {**base_config, 'epsilon' : self.epsilon}

In [9]:
mylayer = MyLayerNormalization()
mylayer(x)

<tf.Tensor: shape=(7, 3), dtype=float32, numpy=
array([[ 0.20996791,  1.102393  , -1.3123609 ],
       [ 0.41902763,  0.9584043 , -1.377432  ],
       [ 1.1636989 ,  0.11075262, -1.2744514 ],
       [ 0.94774354,  0.43018633, -1.3779298 ],
       [-0.9401393 , -0.44371563,  1.383855  ],
       [ 1.4102072 , -0.6321749 , -0.7780322 ],
       [-0.58053446, -0.82482994,  1.4053645 ]], dtype=float32)>

## 13

### a.

#### Data

In [10]:
(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.fashion_mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz


In [15]:
X_train = X_train_full[:50000].reshape(50000, -1).astype(np.float32)
X_valid = X_train_full[50000:].reshape(10000, -1).astype(np.float32)
X_test = X_test.reshape(10000, -1).astype(np.float32)

In [16]:
y_train = y_train_full[:50000]
y_valid = y_train_full[50000:]

In [17]:
mean = np.mean(X_train, axis=0)
std = np.std(X_train, axis=0)

X_train_std = (X_train - mean)/std
X_test_std = (X_test - mean)/std
X_valid_std = (X_valid - mean)/std

#### Model

In [23]:
def random_batch(X, y, batch_size=256):
  idx = np.random.randint(len(X), size=batch_size)
  return X[idx], y[idx]

In [19]:
keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)

In [20]:
model = keras.models.Sequential([
  keras.layers.InputLayer(input_shape=(784,)),
  keras.layers.Dense(100, activation='relu'),
  keras.layers.Dense(10, activation='softmax')
])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 100)               78500     
                                                                 
 dense_1 (Dense)             (None, 10)                1010      
                                                                 
Total params: 79,510
Trainable params: 79,510
Non-trainable params: 0
_________________________________________________________________


In [27]:
n_epochs = 5
batch_size = 512
n_steps = len(X_train) // batch_size
optimizer = keras.optimizers.Nadam(learning_rate=0.01)
loss_fn = keras.losses.sparse_categorical_crossentropy
mean_loss = keras.metrics.Mean()
metrics = [keras.metrics.SparseCategoricalAccuracy()]

In [39]:
with trange(1, n_epochs + 1, desc='All epochs') as epochs: # tạo thanh tiến trình trên cùng
  for epoch in epochs:
    with trange(1, n_steps + 1, desc="Epoch {}/{}".format(epoch, n_epochs)) as steps: # tạo các thành tiến trình cho từng epochs
      for step in steps:
        X_batch, y_batch = random_batch(X_train_std, y_train) # lấy batch size
        with tf.GradientTape() as tape: # khai báo đạo hàm tự động
          y_pred = model(X_batch) # tính giá trị predict
          main_loss = tf.reduce_mean(loss_fn(y_batch, y_pred)) # tính loss trên cả batch
          loss = tf.add_n([main_loss] + model.losses) # cộng loss mới tính được ở dòng trên vào loss của model
        gradients = tape.gradient(loss, model.trainable_variables) # tính đạo hàm của loss trên các biến trainables
        optimizer.apply_gradients(zip(gradients, model.trainable_variables)) # yêu cầu optimizer áp dụng vào các gradient đã xử lí
        for variable in model.variables: # áp dụng constaint lên các variables
          if variable.constraint is not None:
            variable.assign(variable.constraint(variable))
        status = OrderedDict() # khai báo một OrderdedDict
        mean_loss(loss) # lấy mean của các loss của model
        status['loss'] = mean_loss.result().numpy() # gán loss vào OrderdedDict
        for metric in metrics: # tính metric cho step
          metric(y_batch, y_pred)
          status[metric.name] = metric.result().numpy()
        steps.set_postfix(status) # in OrderdedDict ở cuối mỗi thanh tiến trình vào cuối mỗi step
      y_pred = model(X_valid_std) # predict validation
      status["val_loss"] = np.mean(loss_fn(y_valid, y_pred)) # tính val_loss vào cuối mỗi epoch
      status['val_accuracy'] = np.mean(keras.metrics.sparse_categorical_accuracy( # tính val_accuracy vào cuối mỗi epoch
          tf.constant(y_valid, dtype=np.float32), y_pred
      ))
      steps.set_postfix(status) # in val_loss, val_accuracy ở cuối mỗi epoch
    for metric in [mean_loss] + metrics: # cập nhật metric vào cuối mỗi epoch
      metric.reset_states()

All epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1/5:   0%|          | 0/97 [00:00<?, ?it/s]

Epoch 2/5:   0%|          | 0/97 [00:00<?, ?it/s]

Epoch 3/5:   0%|          | 0/97 [00:00<?, ?it/s]

Epoch 4/5:   0%|          | 0/97 [00:00<?, ?it/s]

Epoch 5/5:   0%|          | 0/97 [00:00<?, ?it/s]

### b.


In [40]:
keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)

In [42]:
lower_layers = keras.models.Sequential([
    keras.layers.InputLayer(input_shape=(784,)),
    keras.layers.Dense(100, activation='relu')
])

upper_layers = keras.models.Sequential([
    keras.layers.Dense(10, activation='softmax')
])

model = keras.models.Sequential([
    lower_layers, upper_layers
])

In [43]:
lower_optimizer = keras.optimizers.SGD(learning_rate=1e-4)
upper_optimizer = keras.optimizers.Nadam(learning_rate=1e-3)

In [44]:
n_epochs = 5
batch_size = 512
n_steps = len(X_train) // batch_size
loss_fn = keras.losses.sparse_categorical_crossentropy
mean_loss = keras.metrics.Mean()
metrics = [keras.metrics.SparseCategoricalAccuracy()]

In [81]:
with trange(1, n_epochs + 1, desc='All epochs') as epochs:
    for epoch in epochs:
        with trange(1, n_steps + 1, desc='Epoch {}/{}'.format(epoch, n_epochs)) as steps:
            for step in steps:
                X_batch, y_batch = random_batch(X_train_std, y_train)
                with tf.GradientTape(persistent=True) as tape:
                    y_pred = model(X_batch)
                    main_loss = tf.reduce_mean(loss_fn(y_batch, y_pred))
                    loss = tf.add_n([main_loss] + model.losses)
                for layers, optimizer in ((lower_layers, lower_optimizer), (upper_layers, upper_optimizer)):
                    gradients = tape.gradient(loss, layers.trainable_variables)
                    optimizer.apply_gradients(zip(gradients, layers.trainable_variables))
                del tape
                for variable in model.trainable_variables:
                    if variable.constraint is not None:
                        variable.assign(variable.constraint(variable))
                status = OrderedDict()
                mean_loss(loss)
                status['loss'] = mean_loss.result().numpy()
                for metric in metrics:
                    metric(y_batch, y_pred)
                    status['accuracy'] = metric.result().numpy()
                steps.set_postfix(status)
            y_pred = model(X_valid_std)
            status['val_loss'] = tf.reduce_mean(loss_fn(y_valid, y_pred)).numpy()
            status['val_accuracy'] = tf.reduce_mean(keras.metrics.sparse_categorical_accuracy(
                tf.constant(y_valid, dtype=tf.float32), y_pred
            )).numpy()
            steps.set_postfix(status)
        for metric in [mean_loss] + metrics:
            metric.reset_states()
                

All epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1/5:   0%|          | 0/97 [00:00<?, ?it/s]

Epoch 2/5:   0%|          | 0/97 [00:00<?, ?it/s]

Epoch 3/5:   0%|          | 0/97 [00:00<?, ?it/s]

Epoch 4/5:   0%|          | 0/97 [00:00<?, ?it/s]

Epoch 5/5:   0%|          | 0/97 [00:00<?, ?it/s]

In [80]:
status

OrderedDict([('val_loss', 0.5069213), ('val_accuracy', 0.8225)])