# SegmenterPolynomialDecay Schedule

In [None]:
class SegmenterPolynomialDecay(optimizers.schedules.LearningRateSchedule):
    """A LearningRateSchedule that uses polynomial decay schedule described in the Segmenter paper. 

    The learning rate is computed as follows:

    ```python
    def decayed_learning_rate(step):
        return learning_rate * (1 - step/total_steps)**power
    ```

    Example Usage:

    ```python
    learning_rate = 0.001
    lr_schedule = SegmenterPolynomialDecay(
        learning_rate,
        total_steps,
        power=0.9
    )

    model.compile(optimizer=optimizers.SGD(learning_rate=lr_schedule),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy'])

    model.fit(data, labels, epochs=5)
    ```

    Returns:
      A 1-arg callable learning rate schedule that takes the current optimizer
      step and outputs the decayed learning rate, a scalar `Tensor` of the same
      type as `learning_rate`.
    """

    def __init__(
        self,
        learning_rate,
        total_steps,
        power=0.9,
        name=None,
    ):
        """Applies polynomial decay to the learning rate.

        Args:
          learning_rate: A scalar `float32` or `float64` `Tensor` or a
            Python number.  The initial learning rate.
          total_steps: A scalar `int32` or `int64` `Tensor` or a Python number.
            Must be positive.  See the decay computation above.
          power: A scalar `float32` or `float64` `Tensor` or a
            Python number.  See the decay computation above.
          staircase: Boolean.  If `True` decay the learning rate at discrete
            intervals
          name: String.  Optional name of the operation.  Defaults to
            'ExponentialDecay'.
        """
        super().__init__()
        self.learning_rate = learning_rate
        self.total_steps = total_steps
        self.power = power
        self.name = name

    def __call__(self, step):
        learning_rate = tf.convert_to_tensor(self.learning_rate, name="learning_rate")
        dtype = learning_rate.dtype
        total_steps = tf.cast(self.total_steps, dtype)
        power = tf.cast(self.power, dtype)

        step = tf.cast(step, dtype)
        lr = learning_rate * ((1 - (step / total_steps)) ** power)

        return lr

    def get_config(self):
        return {
            "learning_rate": self.learning_rate,
            "total_steps": self.total_steps,
            "power": self.power,
            "name": self.name,
        }

learning_rate, total_steps = 0.001, 20
lr_schedule = SegmenterPolynomialDecay(learning_rate, total_steps)
learning_rates = list(map(lambda step: f'{lr_schedule(step):.8f}', range(total_steps)))
print(f'Learning Rates: {learning_rates}')