In [2]:
import numpy as np

x_data=np.array([2,4,6,8,10,12,14,16,18,20]).reshape(10,1)
t_data=np.array([0,0,0,0,0,1,1,1,1,1]).reshape(10,1)

print("x_data.shape=",x_data.shape,", t_data.shape=",t_data.shape)

x_data.shape= (10, 1) , t_data.shape= (10, 1)


In [4]:
W=np.random.rand(1,1)
b=np.random.rand(1)
print("W=",W, ", W.shape=",W.shape, ", b=",b,", b.shape=",b.shape)

W= [[0.37122648]] , W.shape= (1, 1) , b= [0.53518048] , b.shape= (1,)


In [5]:
def sigmoid(x):
    return 1 / (1+np.exp(-x))

In [6]:
def loss_func(x,t):
    delta = 1e-7
    
    z = np.dot(x,W)+b
    y = sigmoid(z)
    
    return -np.sum( t*np.log(y+delta) + (1-t)*np.log((1-y)+delta))

In [7]:
def numerical_derivative(f, x):    # 수치미분 debug version
    delta_x = 1e-4 
    grad = np.zeros_like(x)
#     print("debug 1. initial input variable =", x)   
#     print("debug 2. initial grad =", grad) 
#     print("=======================================")
    
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    
    while not it.finished:
        idx = it.multi_index
#         print("debug 3. idx = ", idx, ", x[idx] = ", x[idx])   
        
        tmp_val = x[idx]
        x[idx] = float(tmp_val) + delta_x
        fx1 = f(x)   # f(x+delta_x)
        
        x[idx] = tmp_val - delta_x 
        fx2 = f(x)   # f(x-delta_x)
        grad[idx] = (fx1 - fx2) / (2*delta_x)
                
#         print("debug 4. grad[idx] = ", grad[idx])   
#         print("debug 5. grad = ", grad) 
#         print("=======================================")
        
        x[idx] = tmp_val
        it.iternext()        
    return grad

In [10]:
def error_val(x, t):
    delta = 1e-7    # log 무한대 발산 방지
    
    z = np.dot(x,W) + b
    y = sigmoid(z)
    
    # cross-entropy 
    return  -np.sum( t*np.log(y + delta) + (1-t)*np.log((1 - y)+delta ) ) 

In [8]:
def predict(x):
    
    z = np.dot(x,W) + b
    y = sigmoid(z)
    
    if y >= 0.5:
        result = 1  # True
    else:
        result = 0  # False
    
    return y, result

In [11]:
learning_rate = 1e-2  # 발산하는 경우, 1e-3 ~ 1e-6 등으로 바꾸어서 실행

f = lambda x : loss_func(x_data,t_data)  # f(x) = loss_func(x_data, t_data)

print("Initial error value = ", error_val(x_data, t_data), "Initial W = ", W, "\n", ", b = ", b )

for step in  range(30001):  
    
    W -= learning_rate * numerical_derivative(f, W)
    
    b -= learning_rate * numerical_derivative(f, b)
    
    if (step % 400 == 0):
        print("step = ", step, "error value = ", error_val(x_data, t_data), "W = ", W, ", b = ",b )

Initial error value =  14.300741711898343 Initial W =  [[0.37122648]] 
 , b =  [0.53518048]
step =  0 error value =  7.437843574484569 W =  [[0.08928897]] , b =  [0.5043708]
step =  400 error value =  2.265867004268756 W =  [[0.36260028]] , b =  [-3.59908231]
step =  800 error value =  1.6481507029983413 W =  [[0.49628694]] , b =  [-5.1556612]
step =  1200 error value =  1.3770952921190607 W =  [[0.58689585]] , b =  [-6.19348418]
step =  1600 error value =  1.2152313346294812 W =  [[0.65761179]] , b =  [-6.99661631]
step =  2000 error value =  1.1037775438750075 W =  [[0.71662897]] , b =  [-7.66332614]
step =  2400 error value =  1.0204819977382427 W =  [[0.76784356]] , b =  [-8.23975753]
step =  2800 error value =  0.9548371763881565 W =  [[0.8134337]] , b =  [-8.75148373]
step =  3200 error value =  0.901147932193933 W =  [[0.85474943]] , b =  [-9.21425195]
step =  3600 error value =  0.8560205654514836 W =  [[0.89268983]] , b =  [-9.63849553]
step =  4000 error value =  0.8172875683

In [12]:
(real_val, logical_val) = predict(3) 

print(real_val, logical_val)

[[2.37983407e-07]] 0
