In [None]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
class MLP:
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size, seed=None):
        if seed is not None:
            np.random.seed(seed)

        self.W1 = np.random.randn(input_size, hidden_size1) * 0.01
        self.b1 = np.zeros((1, hidden_size1))
        
        self.W2 = np.random.randn(hidden_size1, hidden_size2) * 0.01
        self.b2 = np.zeros((1, hidden_size2))

        self.W3 = np.random.randn(hidden_size2, output_size) * 0.01
        self.b3 = np.zeros((1, output_size))

    def forward(self, X):
        self.X = X  # backward에서 사용
        self.Z1 = np.dot(X, self.W1) + self.b1
        self.A1 = self.tanh(self.Z1)

        self.Z2 = np.dot(self.A1, self.W2) + self.b2
        self.A2 = self.tanh(self.Z2)

        self.Z3 = np.dot(self.A2, self.W3) + self.b3
        return self.Z3

    def tanh(self, Z):
        return np.tanh(Z)

    def loss_function(self, h, Tn, lambda1, lambda2):

        loss = 0.0

        # (1) terminal constraint
        loss += (h[Tn-1] - 1.0) ** 2

        # (2) monotonicity penalty
        mono_pen = 0.0
        for t in range(1, Tn):
            d = h[t-1] - h[t]
            if d > 0:
                mono_pen += (np.exp(d) - 1)

        loss += lambda1 / (Tn - 1) * mono_pen

        # (3) convexity penalty
        conv_pen = 0.0
        for t in range(2, Tn):
            d_t   = h[t-1] - h[t]
            d_t_1 = h[t-2] - h[t-1]
            if (d_t - d_t_1) > 0:
                conv_pen += (np.exp(d_t - d_t_1) - 1)

        loss += lambda2 / (Tn - 2) * conv_pen

        return loss

    def backward_loss(h, Tn, lambda1, lambda2):

        grad_h = np.zeros_like(h)

        # ---------- (1) terminal constraint ----------
        # (h[Tn-1] - 1)^2
        grad_h[Tn-1] += 2.0 * (h[Tn-1] - 1.0)

        # ---------- (2) monotonicity penalty ----------
        # d_t = h[t-1] - h[t]
        for t in range(1, Tn):
            d = h[t-1] - h[t]
            if d > 0:
                coef = lambda1 / (Tn - 1)
                e = np.exp(d)

                grad_h[t-1] += coef * e      # ∂d/∂h[t-1] = +1
                grad_h[t]   -= coef * e      # ∂d/∂h[t]   = -1

        # ---------- (3) convexity penalty ----------
        # d_t - d_{t-1}
        for t in range(2, Tn):
            d_t   = h[t-1] - h[t]
            d_t_1 = h[t-2] - h[t-1]
            diff = d_t - d_t_1

            if diff > 0:
                coef = lambda2 / (Tn - 2)
                e = np.exp(diff)

                grad_h[t-2] += coef * e        # +1
                grad_h[t-1] += -2 * coef * e   # -2
                grad_h[t]   += coef * e        # +1

        return grad_h


    def update(theta, qk, m, v, k, alpha, beta1, beta2, eps):
        """
        theta : parameter vector
        qk    : gradient or subgradient
        m     : 1st moment vector
        v     : 2nd moment vector
        k     : iteration index (int, starts from 1)
        """

        # update moments
        m = beta1 * m + (1 - beta1) * qk
        v = beta2 * v + (1 - beta2) * (qk ** 2)

        # bias correction
        m_hat = m / (1 - beta1 ** k)
        v_hat = v / (1 - beta2 ** k)

        # parameter update
        theta = theta - alpha * m_hat / (np.sqrt(v_hat) + eps)

        return theta, m, v

 
    def train_step(self, X, Y, lambda1, lambda2, lr=0.1):
        Y_hat = self.forward(X)
        loss = self.loss_function(Y_hat, Y.shape[0], lambda1, lambda2)
        self.update(lr)
        return loss


In [None]:
mlp = MLP(input_size=10, hidden_size=32, output_size=5, seed=0)

for epoch in range(1000):
    loss = mlp.train_step(X_train, y_train, lr=0.1)