In [None]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# Activation Functions
class TanhActivation:
    def forward(self, x):
        self.output = np.tanh(x)
        return self.output
    
    def backward(self, grad_output):
        return grad_output * (1 - self.output ** 2)

class LinearActivation:
    def forward(self, x):
        self.output = x
        return self.output
    
    def backward(self, grad_output):
        return grad_output

# Dense Layer
class DenseLayer:
    def __init__(self, input_size, output_size):
        self.weights = np.random.randn(input_size, output_size) * 0.01
        self.bias = np.zeros((1, output_size))
        self.grad_weights = None
        self.grad_bias = None
        self.input = None
    
    def forward(self, x):
        self.input = x
        return np.dot(x, self.weights) + self.bias
    
    def backward(self, grad_output):
        self.grad_weights = np.dot(self.input.T, grad_output)
        self.grad_bias = np.sum(grad_output, axis=0, keepdims=True)
        grad_input = np.dot(grad_output, self.weights.T)
        return grad_input

# Neural Network Model
class NeuralDataFusionModel:
    def __init__(self, input_dim):
        self.layer1 = DenseLayer(input_dim, 5)
        self.activation1 = TanhActivation()
        self.layer2 = DenseLayer(5, 3)
        self.activation2 = TanhActivation()
        self.layer3 = DenseLayer(3, 1)
        self.activation3 = LinearActivation()
        
        self.parameters = [
            self.layer1.weights, self.layer1.bias,
            self.layer2.weights, self.layer2.bias,
            self.layer3.weights, self.layer3.bias
        ]
        self.gradients = []
    
    def forward(self, x):
        out = self.layer1.forward(x)
        out = self.activation1.forward(out)
        out = self.layer2.forward(out)
        out = self.activation2.forward(out)
        out = self.layer3.forward(out)
        out = self.activation3.forward(out)
        return out
    
    def backward(self, grad_output):
        grad = self.activation3.backward(grad_output)
        grad = self.layer3.backward(grad)
        grad = self.activation2.backward(grad)
        grad = self.layer2.backward(grad)
        grad = self.activation1.backward(grad)
        grad = self.layer1.backward(grad)
        
        self.gradients = [
            self.layer1.grad_weights, self.layer1.grad_bias,
            self.layer2.grad_weights, self.layer2.grad_bias,
            self.layer3.grad_weights, self.layer3.grad_bias
        ]
        return grad

# Delta function
def delta_function(x):
    return (x > 0).astype(float)

# Loss and Gradient Computation
def compute_loss_and_gradients(model, X_norm, K, lambda1=0.001, lambda2=0.001):
    num_features, total_samples = X_norm.shape
    N = total_samples // K
    
    loss_term1 = 0.0
    loss_term2 = 0.0
    loss_term3 = 0.0
    
    all_HI = []
    all_inputs = []
    
    # Forward pass for all engines
    for n in range(N):
        start_idx = n * K
        end_idx = (n + 1) * K
        engine_data = X_norm[:, start_idx:end_idx].T
        all_inputs.append(engine_data)
        
        HI_n = model.forward(engine_data)
        all_HI.append(HI_n)
        
        # First term
        h_n_Tn = HI_n[-1, 0]
        loss_term1 += (h_n_Tn - 1) ** 2
        
        # Second term: monotonicity
        T_n = K
        for t in range(1, T_n):
            d_nt = HI_n[t, 0] - HI_n[t-1, 0]
            exp_d = np.exp(d_nt)
            loss_term2 += (1.0 / (T_n - 1)) * max(exp_d - 1, 0)
        
        # Third term: smoothness
        for t in range(2, T_n):
            d_nt = HI_n[t, 0] - HI_n[t-1, 0]
            d_nt_prev = HI_n[t-1, 0] - HI_n[t-2, 0]
            exp_diff = np.exp(d_nt - d_nt_prev)
            loss_term3 += (1.0 / (T_n - 2)) * max(exp_diff - 1, 0)
    
    total_loss = loss_term1 + lambda1 * loss_term2 + lambda2 * loss_term3
    
    # Compute gradients
    total_grad_output = []
    
    for n in range(N):
        HI_n = all_HI[n]
        T_n = K
        grad_h = np.zeros_like(HI_n)
        
        # Gradient from first term
        h_n_Tn = HI_n[-1, 0]
        grad_h[-1, 0] += 2 * (h_n_Tn - 1)
        
        # Gradient from second term
        for t in range(1, T_n):
            d_nt = HI_n[t, 0] - HI_n[t-1, 0]
            exp_d = np.exp(d_nt)
            delta_exp = delta_function(exp_d - 1)
            grad_d_nt = (lambda1 / (T_n - 1)) * delta_exp * exp_d
            grad_h[t, 0] += grad_d_nt
            grad_h[t-1, 0] -= grad_d_nt
        
        # Gradient from third term
        for t in range(2, T_n):
            d_nt = HI_n[t, 0] - HI_n[t-1, 0]
            d_nt_prev = HI_n[t-1, 0] - HI_n[t-2, 0]
            exp_diff = np.exp(d_nt - d_nt_prev)
            delta_exp_diff = delta_function(exp_diff - 1)
            grad_diff = (lambda2 / (T_n - 2)) * delta_exp_diff * exp_diff
            grad_h[t, 0] += grad_diff
            grad_h[t-1, 0] -= 2 * grad_diff
            grad_h[t-2, 0] += grad_diff
        
        total_grad_output.append(grad_h)
    
    # Backpropagation
    for n in range(N):
        engine_data = all_inputs[n]
        grad_output = total_grad_output[n]
        _ = model.forward(engine_data)
        model.backward(grad_output)
        
        if n == 0:
            accumulated_grads = [g.copy() for g in model.gradients]
        else:
            for i in range(len(accumulated_grads)):
                accumulated_grads[i] += model.gradients[i]
    
    for i in range(len(accumulated_grads)):
        accumulated_grads[i] /= N
    
    model.gradients = accumulated_grads
    return total_loss

# Adam Optimizer
class AdamOptimizer:
    def __init__(self, parameters, alpha=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8):
        self.alpha = alpha
        self.beta1 = beta1
        self.beta2 = beta2
        self.epsilon = epsilon
        self.m = [np.zeros_like(p) for p in parameters]
        self.v = [np.zeros_like(p) for p in parameters]
        self.t = 0
    
    def step(self, parameters, gradients):
        self.t += 1
        for i in range(len(parameters)):
            g = gradients[i]
            self.m[i] = self.beta1 * self.m[i] + (1 - self.beta1) * g
            self.v[i] = self.beta2 * self.v[i] + (1 - self.beta2) * (g ** 2)
            m_hat = self.m[i] / (1 - self.beta1 ** self.t)
            v_hat = self.v[i] / (1 - self.beta2 ** self.t)
            parameters[i] -= self.alpha * m_hat / (np.sqrt(v_hat) + self.epsilon)

# Training Function
def train_model(X_norm, K, epochs=1000, lambda1=0.001, lambda2=0.001, 
                alpha=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8, 
                verbose=True, print_every=100):
    num_features = X_norm.shape[0]
    model = NeuralDataFusionModel(input_dim=num_features)
    optimizer = AdamOptimizer(model.parameters, alpha, beta1, beta2, epsilon)
    loss_history = []
    
    for epoch in range(epochs):
        loss = compute_loss_and_gradients(model, X_norm, K, lambda1, lambda2)
        loss_history.append(loss)
        optimizer.step(model.parameters, model.gradients)
        
        if verbose and (epoch + 1) % print_every == 0:
            print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss:.6f}")
    
    return model, loss_history

# Prediction Function
def predict(model, X_norm, K):
    num_features, total_samples = X_norm.shape
    N = total_samples // K
    predictions = []
    
    for n in range(N):
        start_idx = n * K
        end_idx = (n + 1) * K
        engine_data = X_norm[:, start_idx:end_idx].T
        HI_n = model.forward(engine_data)
        predictions.append(HI_n.flatten())
    
    return predictions

In [None]:
# 모델 학습
model, loss_history = train_model(
    X_norm, 
    K, 
    epochs=1000,
    lambda1=0.001,
    lambda2=0.001,
    alpha=0.001,
    beta1=0.9,
    beta2=0.999,
    epsilon=1e-8,
    verbose=True,
    print_every=100
)