In [1]:
import warnings

warnings.filterwarnings(action='ignore')

# Multi-Layer-Perceptron

In [2]:
import numpy as np

class MulLayer:
    def __init__(self, input_dim, hidden_dim):
        self.X = None
        self.W = np.random.uniform(-1, 1, (input_dim, hidden_dim))
        self.grad = None
    
    def forward(self, X):
        self.X = X
        return X.dot(self.W)
    
    def backward(self, dout):
        self.grad = np.dot(self.X.T, dout) / len(dout)
        return np.dot(dout, self.W.T)

class AddLayer:
    def __init__(self, hidden_dim):
        self.X = None
        self.b = np.random.uniform(-1, 1, (1, hidden_dim))
        self.grad = None
    
    def forward(self, X):
        self.X = X
        return X + self.b
    
    def backward(self, dout):
        self.grad = np.sum(dout, axis = 0) / len(dout)
        return dout

class SigmoidLayer:
    def __init__(self):
        self.X = None

    def sigmoid(self, X):
        return 1 / (1 + np.exp(-X))

    def forward(self, X):
        self.X = X
        return self.sigmoid(X)
    
    def backward(self, dout):
        grad = self.sigmoid(self.X) * (1 - self.sigmoid(self.X))
        return dout * grad

class MSELoss:
    def forward(self, X, Y):
        return np.mean((X - Y.reshape(-1, 1)) ** 2)
    def backward(self, X, Y):
        return X - Y.reshape(-1, 1)

class CrossEntropyLoss:
    def __init__(self, classse):
        self.classse = classse

    def softmax(self, X):
        X = np.exp(X - np.max(X))
        return X / np.sum(X, 1).reshape(-1, 1)

    def forward(self, X, Y):
        epsilon = 1e-10
        Y = np.eye(self.classse)[Y]
        X = self.softmax(X)
        return -np.mean(Y * np.log(X + epsilon))
    
    def backward(self, X, Y):
        Y = np.eye(self.classse)[Y]
        X = self.softmax(X)
        return X - Y

# Regression

- MSE를 손실함수로 사용하고, 경사하강법을 이용하여 모델의 가중치를 업데이트

- 경사하강법은 SGD가 아닌 전체 데이터를 사용하는 Batch GD를 사용함

- 활성화 함수는 Sigmoid를 사용함

- Loss = MSE = (Y - Y_hat)**2 / N

In [3]:
# 데이터 생성
X = np.random.randn(1000, 10)
W = np.random.rand(10)
b = np.random.rand()

Y = X.dot(W) + b

In [4]:
# 모델 정의
W1 = MulLayer(input_dim=10, hidden_dim = 128)
b1 = AddLayer(hidden_dim = 128)
A1 = SigmoidLayer()

W2 = MulLayer(input_dim=128, hidden_dim = 64)
b2 = AddLayer(hidden_dim = 64)
A2 = SigmoidLayer()

W3 = MulLayer(input_dim=64, hidden_dim = 1)
b3 = AddLayer(hidden_dim = 1)

creterion = MSELoss()

In [5]:
# 학습
lr = 0.01
epochs = 1000
layers = [W1, b1, A1, W2, b2, A2, W3, b3]

for epoch in range(1, epochs + 1):
    # 순전파
    output = X
    for layer in layers:
        output = layer.forward(output)
    
    # Loss 계산
    loss = creterion.forward(output, Y)
    if epoch % 100 == 0:
        print(f"Epoch: {epoch} | Loss : {loss}")

    # 역전파
    dout = creterion.backward(output, Y)
    for layer in layers[::-1]:
        dout = layer.backward(dout)

    # 업데이트
    for layer in layers:
        if layer.__class__.__name__ == 'MulLayer':
            layer.W -= lr * layer.grad
        if layer.__class__.__name__ == 'AddLayer':
            layer.b -= lr * layer.grad

Epoch: 100 | Loss : 0.6955597137341711
Epoch: 200 | Loss : 0.29999259860306926
Epoch: 300 | Loss : 0.24426497636646774
Epoch: 400 | Loss : 0.22722615241701571
Epoch: 500 | Loss : 0.2159969070529418
Epoch: 600 | Loss : 0.20650455402936926
Epoch: 700 | Loss : 0.19804849299560867
Epoch: 800 | Loss : 0.1903988349115784
Epoch: 900 | Loss : 0.18341942353260024
Epoch: 1000 | Loss : 0.17700912107396394


# Classification

- CE를 손실함수로 사용하고, 경사하강법을 이용하여 모델의 가중치를 업데이트

- 경사하강법은 SGD가 아닌 전체 데이터를 사용하는 Batch GD를 사용함

- 활성화 함수는 Sigmoid를 사용함

- Loss = CE = -(Y * log(Y_hat)) / N

In [12]:
# 데이터 생성
from sklearn.datasets import load_iris

data = load_iris()

X = data.data
Y = data.target

num_feature = len(data.feature_names)
classse = len(data.target_names)

In [13]:
# 모델 정의
W1 = MulLayer(input_dim=num_feature, hidden_dim = 128)
b1 = AddLayer(hidden_dim = 128)
A1 = SigmoidLayer()

W2 = MulLayer(input_dim=128, hidden_dim = 64)
b2 = AddLayer(hidden_dim = 64)
A2 = SigmoidLayer()

W3 = MulLayer(input_dim=64, hidden_dim = classse)
b3 = AddLayer(hidden_dim = classse)

creterion = CrossEntropyLoss(classse=classse)

In [14]:
# 학습
lr = 0.01
epochs = 1000
layers = [W1, b1, A1, W2, b2, A2, W3, b3]

for epoch in range(1, epochs + 1):
    # 순전파
    output = X
    for layer in layers:
        output = layer.forward(output)
    
    # Loss 계산
    loss = creterion.forward(output, Y)
    if epoch % 100 == 0:
        print(f"Epoch: {epoch} | Loss : {loss} | Acc : {sum(np.argmax(output, axis=1) == Y) / len(Y)}")

    # 역전파
    dout = creterion.backward(output, Y)
    for layer in layers[::-1]:
        dout = layer.backward(dout)

    # 업데이트
    for layer in layers:
        if layer.__class__.__name__ == 'MulLayer':
            layer.W -= lr * layer.grad
        if layer.__class__.__name__ == 'AddLayer':
            layer.b -= lr * layer.grad

Epoch: 100 | Loss : 0.19954912629044208 | Acc : 0.88
Epoch: 200 | Loss : 0.1574075774026065 | Acc : 0.9733333333333334
Epoch: 300 | Loss : 0.13434658445165368 | Acc : 0.9733333333333334
Epoch: 400 | Loss : 0.11858732987061522 | Acc : 0.98
Epoch: 500 | Loss : 0.10632626955306948 | Acc : 0.98
Epoch: 600 | Loss : 0.09610901291170415 | Acc : 0.9866666666666667
Epoch: 700 | Loss : 0.0873418788424845 | Acc : 0.9866666666666667
Epoch: 800 | Loss : 0.07982543850064526 | Acc : 0.9866666666666667
Epoch: 900 | Loss : 0.07347619696158178 | Acc : 0.9866666666666667
Epoch: 1000 | Loss : 0.06815821632355323 | Acc : 0.9866666666666667
