In [1]:
import numpy as np

class Sigmoid(object):
  def __init__(self):
      pass

  def __call__(self, X):
      return 1.0/(1.0+np.exp(-X))

  def gradient(self, X):
      Q = self(X)
      return Q*(1-Q)

In [2]:
import numpy as np

class Tanh(object):
  def __init__(self):
      pass

  def __call__(self, X):
      return np.tanh(X)

  def gradient(self, X):
      return 1-np.tanh(X)**2

In [3]:
import numpy as np

class ReLU(object):
  def __init__(self):
      pass

  def __call__(self, X):
      return np.maximum(0, X)

  def gradient(self, X):
      # note I am considering df/dx @ x=0 to be 1
      return (X > 0).astype(float)

In [4]:
import numpy as np

class Softmax():
    def __init__(self):
        super().__init__()

    def __call__(self, x):
        exp_scores = np.exp(x - np.max(x, axis=1, keepdims=True))
        probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
        return probs

    def derivative(self, x):
        raise NotImplementedError

Softmax function is prone to two issues: overflow and underflow

Overflow: It occurs when very large numbers are approximated as infinity

Underflow: It occurs when very small numbers (near zero in the number line) are approximated (i.e. rounded to) as zero

To combat these issues when doing softmax computation, a common trick is to shift the input vector by subtracting the maximum element in it from all elements. For the input vector x, define z such that:
softmax(x) = softmax(x + c)

In [None]:
def stable_softmax(x):
    z = x - max(x)
    numerator = np.exp(z)
    denominator = np.sum(numerator)
    softmax = numerator/denominator

    return softmax