Basic MLP.

This is an example code for training a MLP classifier.

Questions:
1. Implement the fprop and backprop for the LogSoftmax activation function.
2. There are a total of 4 bugs in the fprop / bprop of ReLU and Dense Layer. Figure out where those bugs are, and fix them.

HINT: There are not any shape errors

Below is the equation for Log Softmax:

$$ \large \textrm{LogSoftMax}(x_{ij}) = \log\left(\frac{\exp (x_{ij})}{\sum_{j} \exp (x_{ij})}\right)$$

In [None]:
import numpy as np

In [None]:
# Activation Functions

class LogSoftmax:

  def __call__(self, x: np.ndarray) -> np.ndarray:
    assert len(x.shape) == 2, "x is shape (batch_size, in_dim)"
    # raise NotImplementedError("Implement Me!")

    max_ = x.max()
    term1 = x - max_
    term2 = np.log(np.exp(x - max_).sum(axis=1))
    logsoftmax = term1 - term2

    # num = np.exp(x)
    # num /= num.sum(axis=1)
    # logsoftmax = np.log(num)
    return logsoftmax


  def bprop(self, x: np.ndarray, dedy: np.ndarray) -> np.ndarray:
    """
    e: error (or loss)
    y: fprop output
    dedy: de / dy (derivative)
    x: input to this layer
    """
    raise NotImplementedError("Implement Me!")


class ReLU:

  def __call__(self, x: np.ndarray) -> np.ndarray:
    assert len(x.shape) == 2, "x is shape (batch_size, in_dim)"
    return np.maximum(x, 0)

  def bprop(self, x: np.ndarray, dedy: np.ndarray) -> np.ndarray:
    # return x * dedy

    dedy[x<0] = 0



In [None]:
# Dense Layer

class DenseLayer:
  """Basic dense layer."""

  def __init__(
    self,
    in_dim: int,
    out_dim: int,
    activation: Optional[Union[LogSoftmax, ReLU]] = None,
  ):
    """Create a dense layer.

    Initializes weights with Gaussian and bias zeros.

    Args:
      in_dim: input dim
      out_dim: output dim
      activation: if None, no activation (linear projection)
    """
    self.w = np.random.randn(in_dim, out_dim)
    self.b = np.zeros(shape=(out_dim,))
    self.activation = (lambda z: z) if activation is None else activation

  def __call__(self, x: np.ndarray) -> np.ndarray:
    """Fprop the layer.

    Args:
      x: The input of shape (batch_size, in_dim).

    Returns:
      The output of shape (batch_size, out_dim).
    """
    assert len(x.shape) == 2, "x is shape (batch_size, in_dim)"
    return self.activation(np.dot(x, self.w)) + self.b

  def bprop(
    self,
    x: np.ndarray,
    dedy: np.ndarray
  ) -> tuple[np.ndarray, tuple[np.ndarray, np.ndarray]]:
    """
    e: error
    y: fprop output
    dedy = de/dy (derivative)

    da: short for deda, or de / da (derivative)
    """

    x_ = np.dot(x, self.w) + self.b
    dedy = self.activation.bprop(x_, dedy)

    dx = np.dot(dedy, np.transpose(self.w))
    dw = np.dot(np.transpose(dx), dedy)
    db = np.sum(dedy, axis=0)


    return (dx, (dw, db))
