# 12_머신러닝 XOR 문제

구현코드

In [1]:
# [1] external function (sigmoid, numerical_derivative)
import numpy as np

# sigmoid 함수
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# 수치미분 함수
def numerical_derivative(f, x):
    delta_x = 1e-4 # 0.0001
    grad = np.zeros_like(x)
    
    it = np.nditer(x, flags = ['multi_index'], op_flags = ['readwrite'])
    
    while not it.finished:
        idx = it.multi_index
        tmp_val = x[idx]
        x[idx] = float(tmp_val) + delta_x
        fx1 = f(x) # f(x + delta_x)
        
        x[idx] = tmp_val - delta_x
        fx2 = f(x) # f(x - delta_x)
        grad[idx] = (fx1 - fx2) / (2 * delta_x)
        
        x[idx] = tmp_val
        it.iternext()
        
    return grad

In [5]:
# [2] LogicGate class (__init__, __loss_func, error_val)
# LogicGate Class

class LogicGate:
    
    def __init__(self, gate_name, xdata, tdata): # xdata, tdata => numpy.array(...)
        
        self.name = gate_name
        
        # 입력 데이터, 정답 데이터 초기화
        self.__xdata = xdata.reshape(4, 2)
        self.__tdata = tdata.reshape(4, 1)
        
        # 가중치 W, 바이어스 b 초기화
        self.__W = np.random.rand(2, 1) # weight. 2 X 1 matrix
        self.__b = np.random.rand(1)
        
        # 학습율 learning rate 초기화
        self.__learning_rate = 1e-2
    
    # 손실함수
    def __loss_func(self):
        
        delta = 1e-7 # log 무한대 발산 방지
        
        z = np.dot(self.__xdata, self.__W) + self.__b
        y = sigmoid(z)
        
        # cross-entropy
        return -np.sum(self.__tdata*np.log(y + delta) + (1-self.__tdata)*np.log((1-y) + delta))
    
    # 손실 값 계산
    def error_val(self):
        
        delta = 1e-7 # log 무한대 발산 방지
        
        z = np.dot(self.__xdata, self.__W) + self.__b
        y = sigmoid(z)
        
        # cross-entropy
        return -np.sum(self.__tdata*np.log(y + delta) + (1-self.__tdata)*np.log((1-y) + delta))
# [2] LogicGate class (train, predict)
    # 수치미분을 이용하여 손실함수가 최소가 될때 까지 학습하는 함수
    def train(self):

        f = lambda x : self.__loss_func()

        print("Initial error value = ", self.error_val())

        for step in range(8001):

            self.__W -= self.__learning_rate * numerical_derivative(f, self.__W)
            self.__b -= self.__learning_rate * numerical_derivative(f, self.__b)

            if (step % 400 == 0):
                print("step = ", step, "error value = ", self.error_val())
            
    # 미래 값 예측 함수
    def predict(self, input_data):
        
        z = np.dot(input_data, self.__W) + self.__b
        y = sigmoid(z)
        
        if y > 0.5:
            result = 1 # True
        else:
            result = 0 # False
            
        return y, result

검증코드

In [6]:
# [3] usage (data feeding, AND Gate 검증)
xdata = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
tdata = np.array([0, 0, 0, 1])

AND_obj = LogicGate("AND_GATE", xdata, tdata)

AND_obj.train()

Initial error value =  3.2005920174076743
step =  0 error value =  3.1795495775461147
step =  400 error value =  1.6118061534157442
step =  800 error value =  1.1805844240965708
step =  1200 error value =  0.9422860912389073
step =  1600 error value =  0.7872294147080747
step =  2000 error value =  0.6767403478919802
step =  2400 error value =  0.5934472969499076
step =  2800 error value =  0.5282002851992399
step =  3200 error value =  0.4756351829696816
step =  3600 error value =  0.4323617776561865
step =  4000 error value =  0.39611597365895834
step =  4400 error value =  0.36532081996567145
step =  4800 error value =  0.3388409381329489
step =  5200 error value =  0.31583658356294025
step =  5600 error value =  0.2956726291507431
step =  6000 error value =  0.27785952590858876
step =  6400 error value =  0.26201373589997595
step =  6800 error value =  0.24783047488525498
step =  7200 error value =  0.2350644869403086
step =  7600 error value =  0.223516203947052
step =  8000 error

In [7]:
# AND Gate prediction
print(AND_obj.name, "\n")

test_data = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])

for input_data in test_data:
    (sigmoid_val, logical_val) = AND_obj.predict(input_data)
    print(input_data, " = ", logical_val, "\n")

AND_GATE 

[0 0]  =  0 

[0 1]  =  0 

[1 0]  =  0 

[1 1]  =  1 



In [8]:
# [3] usage (data feeding, OR Gate 검증)
xdata = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
tdata = np.array([0, 1, 1, 1])

OR_obj = LogicGate("OR_GATE", xdata, tdata)

OR_obj.train()

Initial error value =  1.5298190735159398
step =  0 error value =  1.5280184630646219
step =  400 error value =  1.020116135781697
step =  800 error value =  0.7534972806168816
step =  1200 error value =  0.5924774200763522
step =  1600 error value =  0.4855412501702685
step =  2000 error value =  0.409796123555399
step =  2400 error value =  0.35358306847701154
step =  2800 error value =  0.31035737225023685
step =  3200 error value =  0.27617334668870197
step =  3600 error value =  0.24851818265119963
step =  4000 error value =  0.22572013892007306
step =  4400 error value =  0.20662648415712512
step =  4800 error value =  0.1904181846554927
step =  5200 error value =  0.17649827687851222
step =  5600 error value =  0.1644219881874202
step =  6000 error value =  0.15385153008496816
step =  6400 error value =  0.14452600801593482
step =  6800 error value =  0.13624088716702337
step =  7200 error value =  0.12883366714959188
step =  7600 error value =  0.12217369000047978
step =  8000 

In [9]:
# OR Gate prediction
print(OR_obj.name, "\n")

test_data = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])

for input_data in test_data:
    (sigmoid_val, logical_val) = OR_obj.predict(input_data)
    print(input_data, " = ", logical_val, "\n")

OR_GATE 

[0 0]  =  0 

[0 1]  =  1 

[1 0]  =  1 

[1 1]  =  1 



In [10]:
# [3] usage (data feeding, NAND Gate 검증)
xdata = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
tdata = np.array([1, 1, 1, 0])

NAND_obj = LogicGate("NAND_GATE", xdata, tdata)

NAND_obj.train()

Initial error value =  3.054562866940045
step =  0 error value =  3.048958099014306
step =  400 error value =  1.7427817547520026
step =  800 error value =  1.2437208910185622
step =  1200 error value =  0.9804160091664695
step =  1600 error value =  0.8132360959448572
step =  2000 error value =  0.6957970552828365
step =  2400 error value =  0.6080799915551021
step =  2800 error value =  0.5398138795516486
step =  3200 error value =  0.4850845195907424
step =  3600 error value =  0.4402017229639499
step =  4000 error value =  0.4027245423903027
step =  4400 error value =  0.37096540083050283
step =  4800 error value =  0.34371644143450597
step =  5200 error value =  0.3200886388622848
step =  5600 error value =  0.29941232258515443
step =  6000 error value =  0.2811731057832151
step =  6400 error value =  0.2649692000215093
step =  6800 error value =  0.250482157498661
step =  7200 error value =  0.2374563240593705
step =  7600 error value =  0.22568410309363535
step =  8000 error val

In [12]:
# NAND Gate prediction
print(NAND_obj.name, "\n")

test_data = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])

for input_data in test_data:
    (sigmoid_val, logical_val) = NAND_obj.predict(input_data)
    print(input_data, " = ", logical_val, "\n")

NAND_GATE 

[0 0]  =  1 

[0 1]  =  1 

[1 0]  =  1 

[1 1]  =  0 



In [13]:
# [3] usage (data feeding, XOR Gate 검증 ?)
xdata = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
tdata = np.array([0, 1, 1, 0])

XOR_obj = LogicGate("XOR_GATE", xdata, tdata)

# XOR Gate 를 보면, 손실함수 값이 2.7 근처에서 더 이상 감소하지 않음
XOR_obj.train()

Initial error value =  3.224797174702636
step =  0 error value =  3.2135144672252105
step =  400 error value =  2.7821118633231183
step =  800 error value =  2.7752786258966538
step =  1200 error value =  2.7733489267747036
step =  1600 error value =  2.772803349542253
step =  2000 error value =  2.7726489408525246
step =  2400 error value =  2.7726052108296138
step =  2800 error value =  2.772592821453552
step =  3200 error value =  2.7725893106818544
step =  3600 error value =  2.77258831574096
step =  4000 error value =  2.7725880337648783
step =  4400 error value =  2.7725879538482645
step =  4800 error value =  2.7725879311983483
step =  5200 error value =  2.77258792477889
step =  5600 error value =  2.772587922959478
step =  6000 error value =  2.772587922443816
step =  6400 error value =  2.7725879222976673
step =  6800 error value =  2.772587922256245
step =  7200 error value =  2.772587922244505
step =  7600 error value =  2.7725879222411773
step =  8000 error value =  2.7725

In [14]:
# XOR Gate predciton => 예측이 되지 않음
print(XOR_obj.name, "\n")

test_data = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])

for input_data in test_data:
    (sigmoid_val, logical_val) = XOR_obj.predict(input_data)
    print(input_data, " = ", logical_val, "\n")

XOR_GATE 

[0 0]  =  0 

[0 1]  =  0 

[1 0]  =  0 

[1 1]  =  1 



XOR 구현 (1)

=> NAND, OR, AND 조합으로 XOR 구현

In [16]:
# XOR을 NAND + OR => AND 조합으로 계산함
input_data = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])

s1 = [] # NAND 입력
s2 = [] # OR 출력

new_input_data = [] # AND 입력
final_output = []   # AND 출력

for index in range(len(input_data)):
    
    s1 = NAND_obj.predict(input_data[index]) # NAND 출력
    s2 = OR_obj.predict(input_data[index])    # OR 출력
    
    new_input_data.append(s1[-1]) # AND 입력
    new_input_data.append(s2[-1]) # AND 입력
    
    (sigmoid_val, logical_val) = AND_obj.predict(np.array(new_input_data))
    
    final_output.append(logical_val) # AND 출력. 즉 XOR 출력
    new_input_data = [] # AND 입력 초기화
    
for index in range(len(input_data)):
    print(input_data[index], " = ", final_output[index], end = '')
    print("\n")

[0 0]  =  0

[0 1]  =  1

[1 0]  =  1

[1 1]  =  0



XOR 구현 (2)

- 머신러닝 XOR 문제는 다양한 Gate 조합인 Multi-layer로 해결 할 수 있음. (Layer : 데이터를 처리하거나 계산이 이루어지는 단위)
- 각각의 Gate(NAND, OR, AND)는 Logistic Regression 시스템으로 구성 됨.
- 이전 Gate 모든 출력은 (previous output) 다음 Gate 입력 (next input)으로 들어감

- => 신경망(Neural Network) 기반의 딥러닝(Deep Learning) 핵심 아이디어