In [2]:
import numpy as np
from datetime import datetime

# 수치미분 함수

def numerical_derivative(f, x):
    delta_x = 1e-4 # 0.0001
    grad = np.zeros_like(x)
    
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    
    while not it.finished:
        idx = it.multi_index        
        tmp_val = x[idx]
        x[idx] = float(tmp_val) + delta_x
        fx1 = f(x) # f(x+delta_x)
        
        x[idx] = tmp_val - delta_x 
        fx2 = f(x) # f(x-delta_x)
        grad[idx] = (fx1 - fx2) / (2*delta_x)
        
        x[idx] = tmp_val 
        it.iternext()   
        
    return grad

# sigmoid 함수

def sigmoid(x):
    return 1 / (1+np.exp(-x))

In [3]:
# feed forward
def feed_forward(xdata, tdata):        # feed forward 를 통하여 손실함수(cross-entropy) 값 계산
        
    delta = 1e-7    # log 무한대 발산 방지
    
    z2 = np.dot(xdata, W2) + b2  # 은닉층의 선형회귀 값
    a2 = sigmoid(z2)                                  # 은닉층의 출력
        
    z3 = np.dot(a2, W3) + b3            # 출력층의 선형회귀 값
    y = a3 = sigmoid(z3)                              # 출력층의 출력
    
    # cross-entropy 
    return  -np.sum( tdata*np.log(y + delta) + (1-tdata)*np.log((1 - y)+delta ) )    



# loss val
def loss_val(xdata, tdata):        # feed forward 를 통하여 손실함수(cross-entropy) 값 계산
        
    delta = 1e-7    # log 무한대 발산 방지
    
    z2 = np.dot(xdata, W2) + b2  # 은닉층의 선형회귀 값
    a2 = sigmoid(z2)                                  # 은닉층의 출력
        
    z3 = np.dot(a2, W3) + b3            # 출력층의 선형회귀 값
    y = a3 = sigmoid(z3)                              # 출력층의 출력
    
    # cross-entropy 
    return  -np.sum( tdata*np.log(y + delta) + (1-tdata)*np.log((1 - y)+delta ) )    

In [4]:
# query, 즉 미래 값 예측 함수
def predict(xdata):
        
    z2 = np.dot(xdata, W2) + b2         # 은닉층의 선형회귀 값
    a2 = sigmoid(z2)                                  # 은닉층의 출력
        
    z3 = np.dot(a2, W3) + b3            # 출력층의 선형회귀 값
    y = a3 = sigmoid(z3)                              # 출력층의 출력
    
    if y >= 0.5:
        result = 1  # True
    else:
        result = 0  # False
    
    return y, result

#### 입력데이터, 정답데이터 정의 

In [5]:
# and, or, nand, xor data
xdata = np.array([ [0,0], [0,1], [1,0], [1,1] ])   

and_tdata = np.array([0, 0, 0, 1]).reshape(4,1)
or_tdata = np.array([0, 1, 1, 1]).reshape(4,1)
nand_tdata = np.array([1, 1, 1, 0]).reshape(4,1)
xor_tdata = np.array([0, 1, 1, 0]).reshape(4,1)

# test data
test_data = np.array([ [0,0], [0,1], [1,0], [1,1] ])

#### AND 학습 (은닉노드 10개)

In [6]:
# define neural network architecture
input_nodes = 2   # 입력노드 2개
hidden_nodes = 10  # 은닉노드 10개
output_nodes = 1   # 출력노드 1개

W2 = np.random.rand(input_nodes,hidden_nodes)  # 입력층-은닉층 가중치
b2 = np.random.rand(hidden_nodes)    # 은닉층 바이어스

W3 = np.random.rand(hidden_nodes,output_nodes)  # 은닉층-출력층 가중치
b3 = np.random.rand(output_nodes)    # 출력층 바이어스

learning_rate = 1e-1

# and verification
f = lambda x : feed_forward(xdata, and_tdata)
        
print("Initial loss value = ", loss_val(xdata, and_tdata))
     
start_time = datetime.now()

for step in  range(10001):
            
    W2 -= learning_rate * numerical_derivative(f, W2)
    
    b2 -= learning_rate * numerical_derivative(f, b2)
        
    W3 -= learning_rate * numerical_derivative(f, W3)
    
    b3 -= learning_rate * numerical_derivative(f, b3)
    
    if (step % 500 == 0):
        print("step = ", step, "  , loss value = ", loss_val(xdata, and_tdata))
        
end_time = datetime.now()

print("")
print("Elapsed Time => ", end_time - start_time)

Initial loss value =  12.833106254404475
step =  0   , loss value =  8.21112605673274
step =  500   , loss value =  0.1622462725299347
step =  1000   , loss value =  0.04301540722379691
step =  1500   , loss value =  0.022507313595311686
step =  2000   , loss value =  0.014758247650568098
step =  2500   , loss value =  0.010809169939933484
step =  3000   , loss value =  0.00845115222772078
step =  3500   , loss value =  0.006897728448743354
step =  4000   , loss value =  0.005803472827317618
step =  4500   , loss value =  0.004994307866386929
step =  5000   , loss value =  0.004373518222593551
step =  5500   , loss value =  0.0038833009520438232
step =  6000   , loss value =  0.003487091980841234
step =  6500   , loss value =  0.003160687077731321
step =  7000   , loss value =  0.0028874563830037685
step =  7500   , loss value =  0.0026556138434931944
step =  8000   , loss value =  0.002456583756112827
step =  8500   , loss value =  0.0022839838983772787
step =  9000   , loss value =  

In [7]:
# and prediction

for data in test_data:
    (real_val, logical_val) = predict(data)
    print("real_val", real_val, ", logical_val = ", logical_val)

real_val [3.20010568e-07] , logical_val =  0
real_val [0.00048575] , logical_val =  0
real_val [0.00049221] , logical_val =  0
real_val [0.99909698] , logical_val =  1


#### AND 학습 (은닉노드 1개)

In [7]:
# define neural network architecture
input_nodes = 2   # 입력노드 2개
hidden_nodes = 1  # 은닉노드 1개
output_nodes= 1   # 출력노드 1개

W2 = np.random.rand(input_nodes,hidden_nodes)  # 입력층-은닉층 가중치
b2 = np.random.rand(hidden_nodes)    # 은닉층 바이어스

W3 = np.random.rand(hidden_nodes,output_nodes)  # 은닉층-출력층 가중치
b3 = np.random.rand(output_nodes)    # 출력층 바이어스

learning_rate = 1e-1

# and verification
f = lambda x : feed_forward(xdata, and_tdata)
        
print("Initial loss value = ", loss_val(xdata, and_tdata))
     
start_time = datetime.now()

for step in  range(10001):
            
    W2 -= learning_rate * numerical_derivative(f, W2)
    
    b2 -= learning_rate * numerical_derivative(f, b2)
        
    W3 -= learning_rate * numerical_derivative(f, W3)
    
    b3 -= learning_rate * numerical_derivative(f, b3)
    
    if (step % 500 == 0):
        print("step = ", step, "  , loss value = ", loss_val(xdata, and_tdata))
        
end_time = datetime.now()

print("")
print("Elapsed Time => ", end_time - start_time)

Initial loss value =  2.98232154077495
step =  0   , loss value =  2.784312170808808
step =  500   , loss value =  1.6235042975040053
step =  1000   , loss value =  0.16597055202862454
step =  1500   , loss value =  0.07055178879263149
step =  2000   , loss value =  0.04372717373436524
step =  2500   , loss value =  0.03143983393338809
step =  3000   , loss value =  0.02445776867002747
step =  3500   , loss value =  0.01997507031336701
step =  4000   , loss value =  0.01686124870464301
step =  4500   , loss value =  0.014575933735953246
step =  5000   , loss value =  0.012829135803858402
step =  5500   , loss value =  0.011451608678252932
step =  6000   , loss value =  0.010338071064857839
step =  6500   , loss value =  0.009419662620357892
step =  7000   , loss value =  0.008649482005401485
step =  7500   , loss value =  0.007994498349322438
step =  8000   , loss value =  0.0074307872521148215
step =  8500   , loss value =  0.006940601169262827
step =  9000   , loss value =  0.0065105

In [8]:
# and prediction

for data in test_data:
    (real_val, logical_val) = predict(data)
    print("real_val", real_val, ", logical_val = ", logical_val)

real_val [0.00054745] , logical_val =  0
real_val [0.00142787] , logical_val =  0
real_val [0.00142787] , logical_val =  0
real_val [0.99761647] , logical_val =  1


#### OR 검증 (은닉노드 1개)

In [9]:
# define neural network architecture
input_nodes = 2   # 입력노드 2개
hidden_nodes = 1  # 은닉노드 1개    variation을 가지면 정확도를 높일수 있는지 찾는거
output_nodes= 1   # 출력노드 1개

W2 = np.random.rand(input_nodes,hidden_nodes)  # 입력층-은닉층 가중치
b2 = np.random.rand(hidden_nodes)    # 은닉층 바이어스

W3 = np.random.rand(hidden_nodes,output_nodes)  # 은닉층-출력층 가중치
b3 = np.random.rand(output_nodes)    # 출력층 바이어스

learning_rate = 1e-1

# or verification
f = lambda x : feed_forward(xdata, or_tdata)
        
print("Initial loss value = ", loss_val(xdata, or_tdata))
     
start_time = datetime.now()

for step in  range(10001):
            
    W2 -= learning_rate * numerical_derivative(f, W2)
    
    b2 -= learning_rate * numerical_derivative(f, b2)
        
    W3 -= learning_rate * numerical_derivative(f, W3)
    
    b3 -= learning_rate * numerical_derivative(f, b3)
    
    if (step % 400 == 0):
        print("step = ", step, "  , loss value = ", loss_val(xdata, or_tdata))
        
end_time = datetime.now()

print("")
print("Elapsed Time => ", end_time - start_time)

Initial loss value =  2.208734981258847
step =  0   , loss value =  2.201269706718813
step =  400   , loss value =  0.2773716809259913
step =  800   , loss value =  0.09762005581049799
step =  1200   , loss value =  0.057308126941807744
step =  1600   , loss value =  0.04022744757235701
step =  2000   , loss value =  0.030888016097815673
step =  2400   , loss value =  0.025025854908522924
step =  2800   , loss value =  0.021013264502166742
step =  3200   , loss value =  0.018098296962974967
step =  3600   , loss value =  0.015886790572545523
step =  4000   , loss value =  0.014152610024920804
step =  4400   , loss value =  0.012756905035894701
step =  4800   , loss value =  0.011609780502553834
step =  5200   , loss value =  0.010650507065168168
step =  5600   , loss value =  0.0098366005976263
step =  6000   , loss value =  0.009137460378972073
step =  6400   , loss value =  0.008530492381580367
step =  6800   , loss value =  0.007998655427764236
step =  7200   , loss value =  0.00752

In [10]:
# or prediction

for data in test_data:
    (real_val, logical_val) = predict(data)
    print("real_val", real_val, ", logical_val = ", logical_val)

real_val [0.00315881] , logical_val =  0
real_val [0.99914251] , logical_val =  1
real_val [0.99914229] , logical_val =  1
real_val [0.99954832] , logical_val =  1


#### NAND 검증 (은닉층 노드 1개)

In [11]:
# define neural network architecture
input_nodes = 2   # 입력노드 2개
hidden_nodes = 1  # 은닉노드 1개
output_nodes= 1   # 출력노드 1개

W2 = np.random.rand(input_nodes,hidden_nodes)  # 입력층-은닉층 가중치
b2 = np.random.rand(hidden_nodes)    # 은닉층 바이어스

W3 = np.random.rand(hidden_nodes,output_nodes)  # 은닉층-출력층 가중치
b3 = np.random.rand(output_nodes)    # 출력층 바이어스

learning_rate = 1e-1

# nand verification
f = lambda x : feed_forward(xdata, nand_tdata)
        
print("Initial loss value = ", loss_val(xdata, nand_tdata))
     
start_time = datetime.now()

for step in  range(10001):
            
    W2 -= learning_rate * numerical_derivative(f, W2)
    
    b2 -= learning_rate * numerical_derivative(f, b2)
        
    W3 -= learning_rate * numerical_derivative(f, W3)
    
    b3 -= learning_rate * numerical_derivative(f, b3)
    
    if (step % 500 == 0):
        print("step = ", step, "  , loss value = ", loss_val(xdata, nand_tdata))
        
end_time = datetime.now()

print("")
print("Elapsed Time => ", end_time - start_time)

Initial loss value =  2.279742418390616
step =  0   , loss value =  2.2763508276400275
step =  500   , loss value =  0.3958394607888712
step =  1000   , loss value =  0.10066601881618137
step =  1500   , loss value =  0.054351867327712106
step =  2000   , loss value =  0.03674594526537822
step =  2500   , loss value =  0.02761403315465367
step =  3000   , loss value =  0.0220604711361652
step =  3500   , loss value =  0.018339012446125447
step =  4000   , loss value =  0.015676737652889877
step =  4500   , loss value =  0.013680390213568826
step =  5000   , loss value =  0.012129288511508263
step =  5500   , loss value =  0.010890234041108244
step =  6000   , loss value =  0.009878166706967435
step =  6500   , loss value =  0.009036267773286134
step =  7000   , loss value =  0.00832516260289261
step =  7500   , loss value =  0.00771671747531569
step =  8000   , loss value =  0.007190303664954802
step =  8500   , loss value =  0.006730456390347343
step =  9000   , loss value =  0.006325

In [12]:
# nand prediction

for data in test_data:
    (real_val, logical_val) = predict(data)
    print("real_val", real_val, ", logical_val = ", logical_val)

real_val [0.99946303] , logical_val =  1
real_val [0.99860956] , logical_val =  1
real_val [0.99860956] , logical_val =  1
real_val [0.00232237] , logical_val =  0


#### XOR 검증 (은닉층 노드 1개)

In [8]:
# define neural network architecture
input_nodes = 2   # 입력노드 2개
hidden_nodes = 1  # 은닉노드 1개
output_nodes= 1   # 출력노드 1개

W2 = np.random.rand(input_nodes,hidden_nodes)  # 입력층-은닉층 가중치
b2 = np.random.rand(hidden_nodes)    # 은닉층 바이어스

W3 = np.random.rand(hidden_nodes,output_nodes)  # 은닉층-출력층 가중치
b3 = np.random.rand(output_nodes)    # 출력층 바이어스

learning_rate = 1e-1

# xor verification
f = lambda x : feed_forward(xdata, xor_tdata)
        
print("Initial loss value = ", loss_val(xdata, xor_tdata))
     
start_time = datetime.now()

for step in  range(10001):
            
    W2 -= learning_rate * numerical_derivative(f, W2)
    
    b2 -= learning_rate * numerical_derivative(f, b2)
        
    W3 -= learning_rate * numerical_derivative(f, W3)
    
    b3 -= learning_rate * numerical_derivative(f, b3)
    
    if (step % 500 == 0):
        print("step = ", step, "  , loss value = ", loss_val(xdata, xor_tdata))
        
end_time = datetime.now()

print("")
print("Elapsed Time => ", end_time - start_time)

Initial loss value =  3.5145127645602114
step =  0   , loss value =  3.3428168189945833
step =  500   , loss value =  2.77248007931859
step =  1000   , loss value =  2.771025385816299
step =  1500   , loss value =  2.7293876506744863
step =  2000   , loss value =  2.189850249596488
step =  2500   , loss value =  2.0071615387774013
step =  3000   , loss value =  1.9648084900683735
step =  3500   , loss value =  1.9474194329873993
step =  4000   , loss value =  1.938139386846235
step =  4500   , loss value =  1.9324195033297413
step =  5000   , loss value =  1.9285593716797589
step =  5500   , loss value =  1.9257870317846781
step =  6000   , loss value =  1.9237036003853087
step =  6500   , loss value =  1.9220829640362493
step =  7000   , loss value =  1.920787696522099
step =  7500   , loss value =  1.9197296065509046
step =  8000   , loss value =  1.9188495746832586
step =  8500   , loss value =  1.918106525481352
step =  9000   , loss value =  1.9174710537936943
step =  9500   , los

In [9]:
# xor prediction

for data in test_data:
    (real_val, logical_val) = predict(data)
    print("real_val", real_val, ", logical_val = ", logical_val)

real_val [0.00358237] , logical_val =  0
real_val [0.66552041] , logical_val =  1
real_val [0.66552037] , logical_val =  1
real_val [0.66662315] , logical_val =  1


#### XOR 검증 (은닉층 노드 2개)

In [10]:
# define neural network architecture
input_nodes = 2   # 입력노드 2개
hidden_nodes = 2  # 은닉노드 1개
output_nodes= 1   # 출력노드 1개

W2 = np.random.rand(input_nodes,hidden_nodes)  # 입력층-은닉층 가중치
b2 = np.random.rand(hidden_nodes)    # 은닉층 바이어스

W3 = np.random.rand(hidden_nodes,output_nodes)  # 은닉층-출력층 가중치
b3 = np.random.rand(output_nodes)    # 출력층 바이어스

learning_rate = 1e-1

# xor verification
f = lambda x : feed_forward(xdata, xor_tdata)
        
print("Initial loss value = ", loss_val(xdata, xor_tdata))
     
start_time = datetime.now()

for step in  range(10001):
            
    W2 -= learning_rate * numerical_derivative(f, W2)
    
    b2 -= learning_rate * numerical_derivative(f, b2)
        
    W3 -= learning_rate * numerical_derivative(f, W3)
    
    b3 -= learning_rate * numerical_derivative(f, b3)
    
    if (step % 500 == 0):
        print("step = ", step, "  , loss value = ", loss_val(xdata, xor_tdata))
        
end_time = datetime.now()

print("")
print("Elapsed Time => ", end_time - start_time)

Initial loss value =  3.179683354172884
step =  0   , loss value =  3.058223070999122
step =  500   , loss value =  2.5916427941158613
step =  1000   , loss value =  1.3472084547073138
step =  1500   , loss value =  0.23293631731715067
step =  2000   , loss value =  0.11107913564823321
step =  2500   , loss value =  0.07171948280093633
step =  3000   , loss value =  0.05265015564623191
step =  3500   , loss value =  0.041476751233983755
step =  4000   , loss value =  0.03416225461510036
step =  4500   , loss value =  0.02901253814351057
step =  5000   , loss value =  0.02519547758731775
step =  5500   , loss value =  0.022255679400209127
step =  6000   , loss value =  0.01992339185589257
step =  6500   , loss value =  0.01802884788254184
step =  7000   , loss value =  0.016459960885409082
step =  7500   , loss value =  0.015139793359204222
step =  8000   , loss value =  0.014013806948557264
step =  8500   , loss value =  0.01304227933318645
step =  9000   , loss value =  0.012195602239

In [11]:
# xor prediction

for data in test_data:
    (real_val, logical_val) = predict(data)
    print("real_val", real_val, ", logical_val = ", logical_val)

real_val [0.00340643] , logical_val =  0
real_val [0.99757499] , logical_val =  1
real_val [0.99757593] , logical_val =  1
real_val [0.00252184] , logical_val =  0
