<h1> 오차 역전파를 이용한 XOR 실행 </h1>

In [30]:
import numpy as np
import random

In [31]:
data = [
    [[0, 0], [0]],
    [[0, 1], [1]],
    [[1, 0], [1]],
    [[1, 1], [0]]
]

# 실행 횟수, 학습률, 모멘텀 계수 설정
iterations = 5000
lr = 0.1
mo = 0.9

In [32]:
# 활성화 함수 설정
def sigmoid(x, derivative = False):
    if(derivative == True):
        return x * (1 - x)
    return 1 / (1 + np.exp(-x))

def tanh(x, derivative = False):
    if(derivative == True):
        return 1 - x ** 2
    return np.tanh(x)

def makeMatrix(i, j, fill=0.0): # 가중치 배열을 만드는 함수
    mat = []
    for i in range(i):
        mat.append([fill] * j)
    return mat

In [39]:
# 신경망의 실행
class Neural_network:
    
    #초깃값 설정
    def __init__(self, num_x, num_yh, num_yo, bias = 1):
        self.num_x = num_x + bias
        self.num_yh = num_yh
        self.num_yo = num_yo
        
        # 활성화 함수 초깃값
        self.activation_input = [1.0] * self.num_x
        self.activation_hidden = [1.0] * self.num_yh
        self.activation_output = [1.0] * self.num_yo
        
        # 가중치 입력 초깃값
        self.weight_in = makeMatrix(self.num_x, self.num_yh)
        for i in range(self.num_x):
            for j in range(self.num_yh):
                self.weight_in[i][j] = random.random()
        
        # 가중치 출력 초깃값
        self.weight_out = makeMatrix(self.num_yh, self.num_yo)
        for j in range(self.num_yh):
            for k in range(self.num_yo):
                self.weight_out[j][k] = random.random()
            
        # 모멘텀 SGD를 위한 이전 가중치 초깃값
        self.gradient_in = makeMatrix(self.num_x, self.num_yh)
        self.gradient_out = makeMatrix(self.num_yh, self.num_yo)
    
    # 업데이트 함수
    def update(self, inputs):

        # 입력층의 활성화 함수
        for i in range(self.num_x - 1):
            self.activation_input[i] = inputs[i]

        # 은닉층의 활성화 함수
        for j in range(self.num_yh):
            sum = 0.0
            for i in range(self.num_x):
                sum = sum + self.activation_input[i] * self.weight_in[i][j]

            self.activation_hidden[j] = tanh(sum, False)

        # 출력층의 활성화 함수
        for k in range(self.num_yo):
            sum = 0.0
            for j in range(self.num_yh):
                sum = sum + self.activation_hidden[j] * self.weight_out[j][k]

            self.activation_output[k] = tanh(sum, False)

        return self.activation_output[:]
    
    # 역전파 실행 함수
    def back_propagate(self, targets):

        # 델타 출력 계산
        output_delta = [0.0] * self.num_yo
        for k in range(self.num_yo):
            error = targets[k] - self.activation_output[k]

            output_delta[k] = tanh(self.activation_output[k], True) * error

        # 은닉 노드의 오차 함수
        hidden_delta = [0.0] * self.num_yh
        for j in range(self.num_yh):
            error = 0.0
            for k in range(self.num_yo):
                error = error + output_delta[k] * self.weight_out[j][k]

            hidden_delta[j] = tanh(self.activation_hidden[j], True) * error

        # 출력 가중치 업데이트
        for j in range(self.num_yh):
            for k in range(self.num_yo):
                gradient = output_delta[k] * self.activation_hidden[j]
                v = mo * self.gradient_out[j][k] - lr * gradient
                self.weight_out[j][k] += v
                self.gradient_out[j][k] = gradient

        # 입력 가중치 업데이트
            for i in range(self.num_x):
                for j in range(self.num_yh):
                    gradient = hidden_delta[j] * self.activation_input[i]
                    v = mo * self.gradient_in[i][j] - lr * gradient
                    self.weight_in[i][j] += v
                    self.gradient_in[i][j] = gradient

        # 오차 계산 - 최소 제곱법
        error = 0.0
        for k in range(len(targets)):
            error = error + 0.5 * (targets[k] - self.activation_output[k]) ** 2

        return error
    
    # 학습 실행 함수
    def train(self, patterns):
        for i in range(iterations):
            error = 0.0
            for p in patterns:
                inputs = p[0]
                targets = p[1]
                self.update(inputs)
                error = error + self.back_propagate(targets)
            if i % 500 == 0:
                print('error: %-.5f' % error)

    def result(self, patterns):
        for p in patterns:
            print('Input: %s, Predict: %s' % (p[0], self.update(p[0])))

if __name__ == '__main__':
    n = Neural_network(2, 2, 1) # 두 개의 입력 값, 두 개의 레이어, 하나의 출력 값을 갖도록 설정
    
    n.train(data)
    
    n.result(data)

error: 0.60867
error: 0.11098
error: 0.10852
error: 0.10784
error: 0.10752
error: 0.10734
error: 0.10723
error: 0.10714
error: 0.10708
error: 0.10704
Input: [0, 0], Predict: [-0.30166311882010477]
Input: [0, 1], Predict: [0.921478128572891]
Input: [1, 0], Predict: [0.9216086968020379]
Input: [1, 1], Predict: [0.5701262111611856]
