# Chapter 11 - Training Deep Neural Networks Code Reproduction

In [1]:
# Impor umum
import numpy as np
import os
import matplotlib as mpl
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras

# Konfigurasi plot
%matplotlib inline
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

In [2]:
# Contoh penggunaan inisialisasi He
keras.layers.Dense(10, activation="relu", kernel_initializer="he_normal")

# Contoh penggunaan inisialisasi LeCun (untuk aktivasi SELU)
keras.layers.Dense(10, activation="selu", kernel_initializer="lecun_normal")

<Dense name=dense_1, built=False>

In [3]:
# Menggunakan LeakyReLU sebagai layer terpisah
model = keras.models.Sequential([
    # ... (layer sebelumnya)
    keras.layers.Dense(10),
    keras.layers.LeakyReLU(alpha=0.2), # alpha adalah hyperparameter
    # ... (layer selanjutnya)
])

# Menggunakan SELU
# Untuk self-normalization, gunakan aktivasi 'selu' dan inisialisasi 'lecun_normal'
layer_selu = keras.layers.Dense(10, activation="selu",
                                kernel_initializer="lecun_normal")



In [4]:
# Membangun model dengan Batch Normalization
model_bn = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]),
    keras.layers.BatchNormalization(),
    keras.layers.Dense(300, activation="relu"),
    keras.layers.BatchNormalization(),
    keras.layers.Dense(100, activation="relu"),
    keras.layers.BatchNormalization(),
    keras.layers.Dense(10, activation="softmax")
])

# Menampilkan ringkasan model untuk melihat layer BN
model_bn.summary()

  super().__init__(**kwargs)


In [5]:
# Mengatur clipvalue pada optimizer
optimizer_clipval = keras.optimizers.SGD(clipvalue=1.0)
# model.compile(loss="...", optimizer=optimizer_clipval)

# Mengatur clipnorm pada optimizer
optimizer_clipnorm = keras.optimizers.SGD(clipnorm=1.0)
# model.compile(loss="...", optimizer=optimizer_clipnorm)

In [6]:
# Memuat dataset Fashion MNIST
fashion_mnist = keras.datasets.fashion_mnist
(X_train_full, y_train_full), (X_test, y_test) = fashion_mnist.load_data()

# Membagi dan melakukan scaling data
# Dataset A untuk model pre-trained, Dataset B untuk transfer learning
X_valid, X_train = X_train_full[:5000] / 255.0, X_train_full[5000:] / 255.0
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]
X_test = X_test / 255.0

# Membagi data menjadi dataset A (item 0-4) dan B (item 5-9)
def split_dataset(X, y):
    y_5_or_less = (y < 5)
    y_6_or_more = (y >= 5)
    return (X[y_5_or_less], y[y_5_or_less]), \
           (X[y_6_or_more], y[y_6_or_more] - 5)

(X_train_A, y_train_A), (X_train_B, y_train_B) = split_dataset(X_train, y_train)
(X_valid_A, y_valid_A), (X_valid_B, y_valid_B) = split_dataset(X_valid, y_valid)
(X_test_A, y_test_A), (X_test_B, y_test_B) = split_dataset(X_test, y_test)

# 1. Melatih model A pada dataset A
model_A = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]),
    keras.layers.Dense(300, activation="selu", kernel_initializer="lecun_normal"),
    keras.layers.Dense(100, activation="selu", kernel_initializer="lecun_normal"),
    keras.layers.Dense(5, activation="softmax")
])
model_A.compile(loss="sparse_categorical_crossentropy", optimizer=keras.optimizers.SGD(learning_rate=1e-3), metrics=["accuracy"])
# history_A = model_A.fit(X_train_A, y_train_A, epochs=20, validation_data=(X_valid_A, y_valid_A))
# model_A.save("my_model_A.h5") # Simpan model A


# 2. Membuat model B yang menggunakan kembali layer dari model A
# model_A = keras.models.load_model("my_model_A.h5") # Muat kembali model A
model_A_clone = keras.models.clone_model(model_A)
model_A_clone.set_weights(model_A.get_weights())

model_B_on_A = keras.models.Sequential(model_A_clone.layers[:-1]) # Ambil semua layer kecuali output layer
model_B_on_A.add(keras.layers.Dense(5, activation="softmax")) # Tambahkan output layer baru untuk dataset B

# 3. Membekukan (freeze) layer yang digunakan kembali
for layer in model_B_on_A.layers[:-1]:
    layer.trainable = False

# 4. Melatih model B (hanya melatih output layer baru)
model_B_on_A.compile(loss="sparse_categorical_crossentropy",
                     optimizer=keras.optimizers.SGD(learning_rate=1e-3),
                     metrics=["accuracy"])
# history_B = model_B_on_A.fit(X_train_B, y_train_B, epochs=4, validation_data=(X_valid_B, y_valid_B))

# 5. Fine-tuning: membuka kembali layer dan melatih dengan learning rate rendah
for layer in model_B_on_A.layers[:-1]:
    layer.trainable = True

optimizer = keras.optimizers.SGD(learning_rate=1e-4) # learning rate sangat kecil
model_B_on_A.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])
history_fine_tune = model_B_on_A.fit(X_train_B, y_train_B, epochs=4, validation_data=(X_valid_B, y_valid_B))

evaluasi = model_B_on_A.evaluate(X_test_B, y_test_B)
print("\nHasil evaluasi setelah fine-tuning:", evaluasi)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
[1m29515/29515[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
[1m26421880/26421880[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
[1m5148/5148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
[1m4422102/4422102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


  super().__init__(**kwargs)


Epoch 1/4
[1m860/860[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - accuracy: 0.4787 - loss: 1.3808 - val_accuracy: 0.7530 - val_loss: 0.9531
Epoch 2/4
[1m860/860[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7649 - loss: 0.8950 - val_accuracy: 0.8007 - val_loss: 0.7474
Epoch 3/4
[1m860/860[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7977 - loss: 0.7197 - val_accuracy: 0.8208 - val_loss: 0.6425
Epoch 4/4
[1m860/860[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8198 - loss: 0.6263 - val_accuracy: 0.8396 - val_loss: 0.5765
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.8246 - loss: 0.5848

Hasil evaluasi setelah fine-tuning: [0.5801527500152588, 0.8276000022888184]


In [7]:
# Momentum optimization
optimizer_momentum = keras.optimizers.SGD(learning_rate=0.001, momentum=0.9)

# Nesterov Accelerated Gradient
optimizer_nesterov = keras.optimizers.SGD(learning_rate=0.001, momentum=0.9, nesterov=True)

# AdaGrad
optimizer_adagrad = keras.optimizers.Adagrad(learning_rate=0.001)

# RMSProp
optimizer_rmsprop = keras.optimizers.RMSprop(learning_rate=0.001, rho=0.9)

# Adam
optimizer_adam = keras.optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999)

In [8]:
# ### 7.1 Power Scheduling
# lr = lr0 / (1 + steps / s)**c. Di Keras, c=1 dan s = 1/decay.
optimizer_power = keras.optimizers.SGD(learning_rate=0.01, decay=1e-4)

# ### 7.2 Exponential Scheduling
def exponential_decay_fn(epoch):
    return 0.01 * 0.1**(epoch / 20)

# Cara 1: Menggunakan fungsi dan callback LearningRateScheduler
lr_scheduler = keras.callbacks.LearningRateScheduler(exponential_decay_fn)
# history = model.fit(..., callbacks=[lr_scheduler])

# ### 7.3 Piecewise Constant Scheduling
def piecewise_constant_fn(epoch):
    if epoch < 5:
        return 0.01
    elif epoch < 15:
        return 0.005
    else:
        return 0.001

lr_scheduler_piecewise = keras.callbacks.LearningRateScheduler(piecewise_constant_fn)
# history = model.fit(..., callbacks=[lr_scheduler_piecewise])


# ### 7.4 Performance Scheduling
# Mengurangi learning rate ketika validation loss berhenti membaik
lr_scheduler_perf = keras.callbacks.ReduceLROnPlateau(factor=0.5, patience=5)
# history = model.fit(..., callbacks=[lr_scheduler_perf])

