# Single Layer Neural Network

이번 시간에는 딥러닝 알고리즘의 가장 기본이 되는 인공신경망(artificial neural network, ANN), 그 중에서도 single-layer neural network 모델을 구현합니다. 오늘은 크게 크게 세 가지 방식, 1) Random Search, 2) h-step Search, 3) Gradient Descent 로 모델을 학습하는 법을 배우며, 이 중에 어떤 것이 가장 좋고 어떤 것을 선택해야하는지를 배웁니다.


In [2]:
import numpy as np

## Case 1 - 0.3 x X1 + 0.5 x X2

### Load Dataset

In [7]:
x1 = np.random.rand(100)

print(x1.shape)
x1[:10]

(100,)


array([ 0.32292405,  0.02824925,  0.77935433,  0.51237155,  0.36911944,
        0.77536662,  0.52255182,  0.37368944,  0.03106781,  0.59215718])

In [8]:
x2 = np.random.rand(100)

print(x2.shape)
x2[:10]

(100,)


array([ 0.39483935,  0.67122232,  0.17666455,  0.18080412,  0.36707923,
        0.12604496,  0.65165734,  0.87850773,  0.98550692,  0.76684148])

In [9]:
y = 0.3 * x1 + 0.5 * x2 + 0.1

print(y.shape)
y[:10]

(100,)


array([ 0.39429689,  0.44408594,  0.42213858,  0.34411353,  0.39427545,
        0.39563247,  0.58259422,  0.6513607 ,  0.6020738 ,  0.66106789])

### First idea: Random Search

In [16]:
num_epoch = 10000

best_error = np.inf
best_epoch = None
best_w1 = None
best_w2 = None
best_b = None

for epoch in range(num_epoch):
    w1 = np.random.uniform(low=-1.0, high=1.0)
    w2 = np.random.uniform(low=-1.0, high=1.0)
    b = np.random.uniform(low=-1.0, high=1.0)

    y_predict = x1 * w1 + x2 * w2 + b
    
    error = np.abs(y_predict - y).mean()
    
    if error < best_error:
        best_error = error
        best_epoch = epoch
        best_w1 = w1
        best_w2 = w2
        best_b = b

        print("{0:4} w1 = {1:.5f}, w2 = {2:.5f}, b = {3:.5f}, error = {4:.5f}".format(epoch, w1, w2, b, error))

print("----" * 15)
print("{0:4} w1 = {1:.5f}, w2 = {2:.5f}, b = {3:.5f}, error = {4:.5f}".format(best_epoch, best_w1, best_w2, best_b, best_error))

   0 w1 = -0.16538, w2 = -0.64619, b = -0.07813, error = 0.92481
   2 w1 = 0.60732, w2 = -0.96803, b = 0.42562, error = 0.37903
   3 w1 = -0.29493, w2 = -0.65896, b = 0.64102, error = 0.35231
   5 w1 = 0.87899, w2 = -0.38937, b = 0.22045, error = 0.23904
   7 w1 = -0.24248, w2 = 0.50377, b = 0.38434, error = 0.14804
  10 w1 = 0.18702, w2 = 0.87537, b = 0.09112, error = 0.13302
  18 w1 = 0.68040, w2 = 0.19399, b = -0.00064, error = 0.13270
  42 w1 = 0.18314, w2 = 0.24889, b = 0.30489, error = 0.06987
  54 w1 = 0.35682, w2 = 0.37942, b = 0.10239, error = 0.03871
 442 w1 = 0.38362, w2 = 0.36644, b = 0.12395, error = 0.03505
3398 w1 = 0.28501, w2 = 0.62950, b = 0.05357, error = 0.03001
3612 w1 = 0.25198, w2 = 0.48754, b = 0.09858, error = 0.02786
8173 w1 = 0.35903, w2 = 0.48740, b = 0.07439, error = 0.01626
------------------------------------------------------------
8173 w1 = 0.35903, w2 = 0.48740, b = 0.07439, error = 0.01626


### Case 2 - h-step Search

In [17]:
num_epoch = 10000

w1 = np.random.uniform(low=-1.0, high=1.0)
w2 = np.random.uniform(low=-1.0, high=1.0)
b = np.random.uniform(low=-1.0, high=1.0)

h = 0.01

for epoch in range(num_epoch):
    y_predict = x1 * w1 + x2 * w2 + b
    current_error = np.abs(y_predict - y).mean()

    if current_error < 0.005:
        break

    y_predict = x1 * (w1 + h) + x2 * w2 + b
    h_plus_error = np.abs(y_predict - y).mean()
    if h_plus_error < current_error:
        w1 = w1 + h
    else:
        y_predict = x1 * (w1 - h) + x2 * w2 + b
        h_minus_error = np.abs(y_predict - y).mean()
        if h_minus_error < current_error:
            w1 = w1 - h
            
    y_predict = x1 * w1 + x2 * (w2 + h) + b
    h_plus_error = np.abs(y_predict - y).mean()
    if h_plus_error < current_error:
        w2 = w2 + h
    else:
        y_predict = x1 * w1 + x2 * (w2 - h) + b
        h_minus_error = np.abs(y_predict - y).mean()
        if h_minus_error < current_error:
            w2 = w2 - h

    y_predict = x1 * w1 + x2 * w2 + (b + h)
    h_plus_error = np.abs(y_predict - y).mean()
    if h_plus_error < current_error:
        b = b + h
    else:
        y_predict = x1 * w1 + x2 * w2 + (b - h)
        h_minus_error = np.abs(y_predict - y).mean()
        if h_minus_error < current_error:
            b = b - h

print("{0} w1 = {1:.5f}, w2 = {2:.5f} b = {3:.5f} error = {4:.5f}".format(epoch, w1, w2, b, current_error))

98 w1 = 0.28303, w2 = 0.49525 b = 0.10730 error = 0.00430


### Third Idea - Gradient Descent

In [22]:
num_epoch = 100
learning_rate = 1.1

w1 = np.random.uniform(low=-1.0, high=1.0)
w2 = np.random.uniform(low=-1.0, high=1.0)
b = np.random.uniform(low=-1.0, high=1.0)

for epoch in range(num_epoch):
    y_predict = x1 * w1 + x2 * w2 + b

    error = np.abs(y_predict - y).mean()
    if error < 0.005:
        break

    w1 = w1 - learning_rate * ((y_predict - y) * x1).mean()
    w2 = w2 - learning_rate * ((y_predict - y) * x2).mean()
    b = b - learning_rate * (y_predict - y).mean()

    if epoch % 10 == 0:
        print("{0:2} w1 = {1:.5f}, w2 = {2:.5f} b = {3:.5f} error = {4:.5f}".format(epoch, w1, w2, b, error))
    
print("----" * 15)
print("{0:2} w1 = {1:.5f}, w2 = {2:.5f} b = {3:.5f} error = {4:.5f}".format(epoch, w1, w2, b, error))

 0 w1 = 0.56740, w2 = -0.44094 b = 0.25416 error = 0.36645
10 w1 = 0.39286, w2 = 0.07422 b = 0.27144 error = 0.10776
20 w1 = 0.31619, w2 = 0.28783 b = 0.19948 error = 0.05240
30 w1 = 0.29495, w2 = 0.39211 b = 0.15661 error = 0.02633
40 w1 = 0.29167, w2 = 0.44422 b = 0.13186 error = 0.01368
50 w1 = 0.29321, w2 = 0.47076 b = 0.11780 error = 0.00727
------------------------------------------------------------
57 w1 = 0.29455, w2 = 0.48005 b = 0.11252 error = 0.00471


In [37]:
num_epoch = 100
learning_rate = 1.1

w1 = np.random.uniform(low=-1.0, high=1.0)
w2 = np.random.uniform(low=-1.0, high=1.0)
b = np.random.uniform(low=-1.0, high=1.0)

for epoch in range(num_epoch):
    y_predict = x1 * w1 + x2 * w2 + b

    error = np.abs(y_predict - y).mean()
    if error < 0.005:
        break

    w1 = w1 - learning_rate * ((y_predict - y) * x1).mean()
    w2 = w2 - learning_rate * ((y_predict - y) * x2).mean()
    b = b - learning_rate * (y_predict - y).mean()

    if epoch % 10 == 0:
        print("{0:2} w1 = {1:.5f}, w2 = {2:.5f} b = {3:.5f} error = {4:.5f}".format(epoch, w1, w2, b, error))

print("----" * 15)
print("{0:2} w1 = {1:.5f}, w2 = {2:.5f} b = {3:.5f} error = {4:.5f}".format(epoch, w1, w2, b, error))

 0 w1 = 0.52809, w2 = 0.37146 b = 0.61696 error = 0.93863
10 w1 = 0.30652, w2 = 0.34007 b = 0.17986 error = 0.03949
20 w1 = 0.29338, w2 = 0.41783 b = 0.14439 error = 0.02005
30 w1 = 0.29245, w2 = 0.45732 b = 0.12491 error = 0.01050
40 w1 = 0.29429, w2 = 0.47755 b = 0.11389 error = 0.00561
------------------------------------------------------------
42 w1 = 0.29450, w2 = 0.47893 b = 0.11310 error = 0.00496
