In [30]:
import tensorflow as tf
from tensorflow import keras

In [31]:
fmnist_data = keras.datasets.fashion_mnist

In [32]:
(x_train, y_train), (x_test, y_test) = fmnist_data.load_data()

In [33]:
x_train, x_test = x_train/255.0, x_test/255.0

In [34]:
init_1 = keras.initializers.VarianceScaling(scale=2.0, mode='fan_avg')

In [35]:
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]),
    keras.layers.BatchNormalization(),
    keras.layers.Dense(100, kernel_initializer=init_1, use_bias=False),
    keras.layers.BatchNormalization(),
    keras.layers.Activation("elu"),
    keras.layers.Dense(75, use_bias=False),
    keras.layers.BatchNormalization(),
    keras.layers.LeakyReLU(alpha=0.3),
    keras.layers.Dense(10, activation="softmax")
])

In [36]:
model.summary()

Power lr

In [37]:
sgd = keras.optimizers.SGD(decay=1e-3)

In [38]:
model.compile(loss="sparse_categorical_crossentropy",
              optimizer=sgd,
              metrics=["accuracy"])

In [39]:
history_power = model.fit(x_train, y_train, epochs=30, validation_split=0.15)

Epoch 1/30
[1m1594/1594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.7129 - loss: 0.8828 - val_accuracy: 0.8277 - val_loss: 0.4821
Epoch 2/30
[1m1594/1594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.8283 - loss: 0.4968 - val_accuracy: 0.8441 - val_loss: 0.4352
Epoch 3/30
[1m1594/1594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.8410 - loss: 0.4522 - val_accuracy: 0.8528 - val_loss: 0.4131
Epoch 4/30
[1m1594/1594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.8508 - loss: 0.4222 - val_accuracy: 0.8577 - val_loss: 0.4040
Epoch 5/30
[1m1594/1594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.8581 - loss: 0.4036 - val_accuracy: 0.8622 - val_loss: 0.3852
Epoch 6/30
[1m1594/1594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.8616 - loss: 0.3858 - val_accuracy: 0.8666 - val_loss: 0.3778
Epoch 7/30
[1m1

Exp lr

In [41]:
def exp_lr_func(eta0, s):
  def exp_lr(epoch):
    return eta0*0.1**(epoch/s)
  return exp_lr
exp_lr_s = exp_lr_func(0.01, 10)

In [42]:
exp_lr_c = keras.callbacks.LearningRateScheduler(exp_lr_s)

In [43]:
model.compile(loss="sparse_categorical_crossentropy",
              optimizer="sgd",
              metrics=["accuracy"])

In [44]:
history_exp = model.fit(x_train, y_train, epochs=30,
                    validation_split=0.15,
                    callbacks=[exp_lr_c])

Epoch 1/30
[1m1594/1594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.9117 - loss: 0.2420 - val_accuracy: 0.8848 - val_loss: 0.3320 - learning_rate: 0.0100
Epoch 2/30
[1m1594/1594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.9125 - loss: 0.2416 - val_accuracy: 0.8857 - val_loss: 0.3268 - learning_rate: 0.0079
Epoch 3/30
[1m1594/1594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.9138 - loss: 0.2379 - val_accuracy: 0.8872 - val_loss: 0.3276 - learning_rate: 0.0063
Epoch 4/30
[1m1594/1594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.9152 - loss: 0.2333 - val_accuracy: 0.8862 - val_loss: 0.3276 - learning_rate: 0.0050
Epoch 5/30
[1m1594/1594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.9181 - loss: 0.2264 - val_accuracy: 0.8870 - val_loss: 0.3288 - learning_rate: 0.0040
Epoch 6/30
[1m1594/1594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

Piecewise lr

In [45]:
def pw_lr(epoch):
  if epoch < 10:
    return 0.01
  elif epoch < 20:
    return 0.005
  elif epoch < 40:
    return 0.001
  else:
    return 0.0001

In [46]:
pw_lr_c = keras.callbacks.LearningRateScheduler(pw_lr)

In [47]:
model.compile(loss="sparse_categorical_crossentropy",
              optimizer="sgd",
              metrics=["accuracy"])

In [48]:
history_pw = model.fit(x_train, y_train, epochs=30,
                    validation_split=0.15,
                    callbacks=[pw_lr_c])

Epoch 1/30
[1m1594/1594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.9167 - loss: 0.2276 - val_accuracy: 0.8809 - val_loss: 0.3462 - learning_rate: 0.0100
Epoch 2/30
[1m1594/1594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.9178 - loss: 0.2296 - val_accuracy: 0.8853 - val_loss: 0.3350 - learning_rate: 0.0100
Epoch 3/30
[1m1594/1594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.9172 - loss: 0.2299 - val_accuracy: 0.8853 - val_loss: 0.3373 - learning_rate: 0.0100
Epoch 4/30
[1m1594/1594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.9170 - loss: 0.2275 - val_accuracy: 0.8817 - val_loss: 0.3469 - learning_rate: 0.0100
Epoch 5/30
[1m1594/1594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.9205 - loss: 0.2212 - val_accuracy: 0.8870 - val_loss: 0.3346 - learning_rate: 0.0100
Epoch 6/30
[1m1594/1594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

Performance

In [49]:
per_lr_c = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss', factor=0.1, patience=10)

In [50]:
model.compile(loss="sparse_categorical_crossentropy",
              optimizer="sgd",
              metrics=["accuracy"])

In [51]:
history_per = model.fit(x_train, y_train, epochs=30,
                    validation_split=0.15,
                    callbacks=[per_lr_c])

Epoch 1/30
[1m1594/1594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9294 - loss: 0.1927 - val_accuracy: 0.8842 - val_loss: 0.3493 - learning_rate: 0.0100
Epoch 2/30
[1m1594/1594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9276 - loss: 0.1928 - val_accuracy: 0.8874 - val_loss: 0.3442 - learning_rate: 0.0100
Epoch 3/30
[1m1594/1594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.9304 - loss: 0.1924 - val_accuracy: 0.8819 - val_loss: 0.3626 - learning_rate: 0.0100
Epoch 4/30
[1m1594/1594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.9307 - loss: 0.1891 - val_accuracy: 0.8892 - val_loss: 0.3479 - learning_rate: 0.0100
Epoch 5/30
[1m1594/1594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.9318 - loss: 0.1854 - val_accuracy: 0.8842 - val_loss: 0.3664 - learning_rate: 0.0100
Epoch 6/30
[1m1594/1594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[