In [10]:
import numpy as np
import tensorflow as tf

In [15]:
dataset = tf.keras.datasets.fashion_mnist.load_data()
(X_train_full, y_train_full), (X_test, y_test) = dataset

(X_train_full, y_train_full), (X_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()

X_valid, X_train = X_train_full[:5000] / 255.0, X_train_full[5000:] / 255.0
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]

In [24]:
class residual_Block(tf.keras.layers.Layer):
    def __init__(self, n_layers, n_neurons, **kwargs):
        super().__init__(**kwargs)
        self.hidden = [
            tf.keras.layers.Dense(n_neurons, activation="relu", kernel_initializer="he_normal")
            for _ in range(n_layers)
        ]

    def call(self, inputs):
        Z = inputs
        for layer in self.hidden:
            Z = layer(Z)
        return inputs + Z

In [25]:
class ResidualRegressor(tf.keras.Model):
    def __init__(self, output_dim, **kwargs):
        super().__init__(**kwargs)
        self.hidden = tf.keras.layers.Dense(30, activation="relu", kernel_initializer="he_normal")
        self.block1 = residual_Block(2, 30)
        self.block2 = residual_Block(2, 30)
        self.out = tf.keras.layers.Dense(output_dim)

    def call(self, inputs):
        Z = self.hidden(inputs)

        for _ in range(1+3):  # Block1 is called 4 times (1 + 3)
            Z = self.block1(Z)
        Z = self.block2(Z)

        return self.out(Z)

In [26]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(30, activation="relu", kernel_initializer="he_normal"),
    residual_Block(2, 30),
    residual_Block(2, 30),
    tf.keras.layers.Dense(1)
])

model.compile(loss="mse", optimizer="sgd")

history = model.fit(X_train, y_train, epochs=5, validation_data=(X_valid, y_valid))

Epoch 1/5
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 6ms/step - loss: 8.8506 - val_loss: 8.2351
Epoch 2/5
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 5ms/step - loss: 8.3103 - val_loss: 8.2313
Epoch 3/5
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 6ms/step - loss: 8.3026 - val_loss: 8.2708
Epoch 4/5
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 6ms/step - loss: 8.2827 - val_loss: 8.2381
Epoch 5/5
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - loss: 8.2780 - val_loss: 8.2344


# Power Scheduling

In [30]:
optimizer = tf.keras.optimizers.SGD(learning_rate=0.01, decay=1e-4)



In [31]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(784,)),
    tf.keras.layers.Dense(10, activation='softmax')
])

model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [35]:
# Power Scheduling
initial_lr = 0.1
lr_schedule = tf.keras.optimizers.schedules.PolynomialDecay(
    initial_learning_rate=initial_lr,
    decay_steps=1000,
    end_learning_rate=0.01,
    power=1.0,  # Power factor, controls decay curve
    cycle=False  # True for cyclical decay
)

optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)

# Exponential Learning

In [36]:
# Exponential Decay Scheduling
initial_lr = 0.1
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=initial_lr,
    decay_steps=1000,
    decay_rate=0.96,  # Decay factor
    staircase=True  # If True, it decays at discrete intervals
)

optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)

# Example model
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(784,)),
    tf.keras.layers.Dense(10, activation='softmax')
])

model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Performance-Based Scheduling

In [37]:
lr_scheduler = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',  # Monitor validation loss
    factor=0.5,          # Reduce learning rate by this factor
    patience=5,          # Number of epochs with no improvement before reducing LR
    verbose=1,           # Print message when LR is reduced
    min_lr=0.0001        # Minimum learning rate
)

# 1Cycle

In [39]:
def one_cycle_lr_schedule(epoch, lr_max, lr_min, total_epochs):

    # Linear increase to max learning rate during the first half, then, it decreases  for the second half
    lr = lr_min + 0.5 * (lr_max - lr_min) * (1 + np.cos(np.pi * epoch / total_epochs))
    return lr

lr_max = 0.01  # Max
lr_min = 0.0001  # Min
total_epochs = 30  # Total epochs

# Learning rate scheduler
lr_scheduler = tf.keras.callbacks.LearningRateScheduler(
    lambda epoch: one_cycle_lr_schedule(epoch, lr_max, lr_min, total_epochs),
    verbose=1
)