In [None]:
import numpy as np

class FCLayer:
    def __init__(self, input_size, output_size, l2_reg_lambda=0):
        self.w = np.random.normal(loc=0, scale=np.sqrt(1 / input_size), size=(output_size, input_size))
        self.b = np.zeros((output_size, 1))
        self.l2_reg_lambda = l2_reg_lambda
        self.layer_input = None
        self.sdw = 0 # RMSprop
        self.mdw = 0 # Momentum
        self.sdb = 0 # RMSprop
        self.mdb = 0 # Momentum

        self.rmsprop_decay_rate = 0.99
        self.momentum_rate = 0.9

    def forward(self, layer_input):
        self.layer_input = layer_input
        return np.dot(self.w, layer_input) + self.b

    def backprop(self, layer_error, learning_rate=0.01):
        m = layer_error.shape[1]

        dw = (np.dot(layer_error, self.layer_input.T) / m + self.l2_reg_lambda * self.w / m)
        db = np.sum(layer_error, axis=1, keepdims=True) / m

        self.sdb = self.rmsprop_decay_rate * self.sdb + db * db * (1 - self.rmsprop_decay_rate)
        self.sdw = self.rmsprop_decay_rate * self.sdw + dw * dw * (1 - self.rmsprop_decay_rate)

        self.mdb = self.momentum_rate * self.mdb + db * (1 - self.momentum_rate)
        self.mdw = self.momentum_rate * self.mdw + dw * (1 - self.momentum_rate)

        self.b -= self.mdb / (np.sqrt(self.sdb) + np.power(10, -8)) * learning_rate
        self.w -= self.mdw / (np.sqrt(self.sdw) + np.power(10, -8)) * learning_rate
        return np.dot(self.w.T, layer_error)