# 11_Logistic Regression

Simple logistic regression (classification) - example

In [90]:
# [1] 학습데이터 (training data) 준비
import numpy as np

x_data = np.array([2, 4, 6, 8, 10, 12, 14, 16, 18, 20]).reshape(10, 1)
t_data = np.array([0, 0, 0, 0,  0,  0,  1,  1,  1,  1]).reshape(10, 1)

print("x_data.shape = ", x_data.shape, ", t_data.shape = ", t_data.shape)

x_data.shape =  (10, 1) , t_data.shape =  (10, 1)


In [91]:
# [2] 임의의 직선 z = Wx + b 정의 (임의의 값으로 가중치 W, 바이어스 b 초기화)
W = np.random.rand(1, 1)
b = np.random.rand(1)
print("W = ", W, ", W.shape = ", W.shape, ", b = ", b, ", b.shape = ", b.shape)

W =  [[0.89044891]] , W.shape =  (1, 1) , b =  [0.80677837] , b.shape =  (1,)


In [92]:
# [3] 손실함수 E(W, b) 정의

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def loss_func(x, t):
    
    delta = 1e-7 # log 무한대 발산 방지
    
    z = np.dot(x, W) + b
    y = sigmoid(z)
    
    # cross-entropy
    return -np.sum(t * np.log(y + delta) + (1 - t) * np.log((1 - y) + delta))

In [93]:
# [4] 수치미분 numerical_derivative 및 utility 함수 정의
def numerical_derivative(f, x):
    delta_x = 1e-4 # 0.0001
    grad = np.zeros_like(x)
    
    it = np.nditer(x, flags = ['multi_index'], op_flags = ['readwrite'])
    
    while not it.finished:
        idx = it.multi_index
        tmp_val = x[idx]
        x[idx] = float(tmp_val) + delta_x
        fx1 = f(x) # f(x + delta_x)
        
        x[idx] = tmp_val - delta_x
        fx2 = f(x) # f(x - delta_x)
        grad[idx] = (fx1 - fx2) / (2 * delta_x)
        
        x[idx] = tmp_val
        it.iternext()
        
    return grad

In [94]:
# 손실함수 값 계산 함수
# 입력변수 x, t : numpy type
def error_val(x, t):
    delta = 1e-7 # log 무한대 발산 방지
    
    z = np.dot(x, W) + b
    y = sigmoid(z)
    
    # cross-entropy
    return -np.sum(t * np.log(y + delta) + (1 - t)*np.log(1 - y) + delta)

# 학습을 마친 후, 임의 데이터에 대해 미래 값 예측 함수
# 입력변수 x : numpy
def predict(x):
    
    z = np.dot(x, W) + b
    y = sigmoid(z)
    
    if y > 0.5:
        result = 1 # True
    else:
        result = 0 # False
    
    return y, result

In [95]:
# [5] 학습율 (learning rate) 초기화 및 손실함수가 최소가 될 때까지 W, b 업데이트
learning_rate = 1e-2 # 발산하는 경우, 1e-3 ~ 1e-6 등으로 바꾸어서 실행

f = lambda x : loss_func(x_data, t_data) # f(x) = loss_func(x_data, t_data)

print("Initial error value = ", error_val(x_data, t_data), "Initial W = ", W, "\n", ", b = ", b)

for step in range(10001):
    
    W -= learning_rate * numerical_derivative(f, W)
    b -= learning_rate * numerical_derivative(f, b)
    
    if (step % 400 == 0):
        print("step = ", step, "error value = ", error_val(x_data, t_data), "W = ", W, ", b = ", b)

Initial error value =  42.3271792565909 Initial W =  [[0.89044891]] 
 , b =  [0.80677837]
step =  0 error value =  24.67952936471593 W =  [[0.47386629]] , b =  [0.7492991]
step =  400 error value =  2.8161069258081373 W =  [[0.27761678]] , b =  [-4.10152596]
step =  800 error value =  1.7838884755451239 W =  [[0.45331488]] , b =  [-5.64042455]
step =  1200 error value =  1.5180178433967753 W =  [[0.5306688]] , b =  [-6.67044374]
step =  1600 error value =  1.3524887523611018 W =  [[0.59193391]] , b =  [-7.48399899]
step =  2000 error value =  1.236002015941317 W =  [[0.64345523]] , b =  [-8.16666804]
step =  2400 error value =  1.1478327595874274 W =  [[0.68836114]] , b =  [-8.7606246]
step =  2800 error value =  1.077796808981543 W =  [[0.72844154]] , b =  [-9.28997746]
step =  3200 error value =  1.0202236237666589 W =  [[0.76482409]] , b =  [-9.76989876]
step =  3600 error value =  0.9716683003743014 W =  [[0.79826891]] , b =  [-10.21060447]
step =  4000 error value =  0.92989872737

In [96]:
# [6] 학습 결과 (오차 함수 값 감소 확인) 및 입력 3시간, 17시간에 대한 미래 값 Fail / Pass 예측
(real_val, logical_val) = predict(3)

print(real_val, logical_val)

[[1.12075526e-05]] 0


In [97]:
(real_val, logical_val) = predict(17)

print(real_val, logical_val)

[[0.99128495]] 1


Multi-variable logistic regression (classification) - overview

In [98]:
# [1] 학습데이터 (training data) 준비
import numpy as np

x_data = np.array([[2, 4], [4, 11], [6, 6], [8, 5], [10, 7], [12, 16], [14, 8], [16, 3], [18, 7]])
t_data = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1]).reshape(9, 1)

# 데이터 차원 및 shape 확인
print("x_data.ndim = ", x_data.ndim, ", x_data.shape = ", x_data.shape)
print("t_data.ndim = ", t_data.ndim, ", t_data.shape = ", t_data.shape)

x_data.ndim =  2 , x_data.shape =  (9, 2)
t_data.ndim =  2 , t_data.shape =  (9, 1)


In [99]:
# [2] 임의의 직선 z= W1x1 + W2x2 + b 정의 (가중치 W, 바이어스 b 초기화)
W = np.random.rand(2, 1) # 2 X 1 행렬
b = np.random.rand(1)
print("W = ", W, ", W.shape = ", W.shape, ", b = ", b, ", b.shape = ", b.shape)

W =  [[0.86802812]
 [0.21258245]] , W.shape =  (2, 1) , b =  [0.76576268] , b.shape =  (1,)


In [100]:
# [3] 손실함수 E(W, b) 정의
# classification 이므로 출력함수로 sigmoid 정의

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# 최종출력은 y = sigmoid(Wx + b)이며, 손실함수는 cross-entropy로 나타냄

def loss_func(x, t):
    
    delta = 1e-7 # log 무한대 발산 방지
    
    z = np.dot(x, W) + b
    y = sigmoid(z)
    
    # cross-entropy
    return -np.sum(t * np.log(y + delta) + (1 - t) * np.log((1 - y) + delta))

In [101]:
# [4] 수치미분 numerical_derivative 및 utility 함수 정의
def numerical_derivative(f, x):
    delta_x = 1e-4 # 0.0001
    grad = np.zeros_like(x)
    
    it = np.nditer(x, flags = ['multi_index'], op_flags = ['readwrite'])
    
    while not it.finished:
        idx = it.multi_index
        tmp_val = x[idx]
        x[idx] = float(tmp_val) + delta_x
        fx1 = f(x) # f(x + delta_x)
        
        x[idx] = tmp_val - delta_x
        fx2 = f(x) # f(x - delta_x)
        grad[idx] = (fx1 - fx2) / (2 * delta_x)
        
        x[idx] = tmp_val
        it.iternext()
        
    return grad

In [102]:
def error_val(x, t):
    delta = 1e-7 # log 무한대 발산 방지
    
    z = np.dot(x, W) + b
    y = sigmoid(z)
    
    # cross-entropy
    return -np.sum(t * np.log(y + delta) + (1 - t)*np.log(1 - y) + delta)

def predict(x):
    
    z = np.dot(x, W) + b
    y = sigmoid(z)
    
    if y > 0.5:
        result = 1 # True
    else:
        result = 0 # False
    
    return y, result

In [103]:
# [5] 학습율 (learning rate) 초기화 및 손실함수가 최소가 될 때까지 W, b 업데이트
learning_rate = 1e-2 # 1e-2, 1e-3 은 손실함수 값 발산

f = lambda x : loss_func(x_data, t_data)

print("Initial error value = ", error_val(x_data, t_data), "Initial W = ", W, "\n", ", b = ", b)

for step in range(80001):
    
    W -= learning_rate * numerical_derivative(f, W)
    b -= learning_rate * numerical_derivative(f, b)
    
    if (step % 400 == 0):
        print("step = ", step, "error value = ", error_val(x_data, t_data), "W = ", W, ", b = ", b)

Initial error value =  25.98744173377466 Initial W =  [[0.86802812]
 [0.21258245]] 
 , b =  [0.76576268]
step =  0 error value =  15.30814350056577 W =  [[ 0.66888018]
 [-0.04581111]] , b =  [0.72769377]
step =  400 error value =  2.319783837942666 W =  [[ 0.41456134]
 [-0.09052032]] , b =  [-2.53479398]
step =  800 error value =  1.6091545856036256 W =  [[ 0.53213194]
 [-0.02824481]] , b =  [-4.20758883]
step =  1200 error value =  1.2895214191339162 W =  [[0.62015327]
 [0.00778712]] , b =  [-5.33425143]
step =  1600 error value =  1.104218097490032 W =  [[0.6910408 ]
 [0.03329607]] , b =  [-6.1930682]
step =  2000 error value =  0.9808970268411208 W =  [[0.75070232]
 [0.05353171]] , b =  [-6.89390948]
step =  2400 error value =  0.8914288933175842 W =  [[0.80238004]
 [0.07080307]] , b =  [-7.49090051]
step =  2800 error value =  0.8226006371671412 W =  [[0.84804696]
 [0.08629783]] , b =  [-8.01451671]
step =  3200 error value =  0.7673732440511035 W =  [[0.88899716]
 [0.10069005]] , 

step =  31600 error value =  0.17015342352555957 W =  [[1.78381405]
 [0.70418904]] , b =  [-20.19876645]
step =  32000 error value =  0.16832780555554278 W =  [[1.79013858]
 [0.70890843]] , b =  [-20.28391925]
step =  32400 error value =  0.16654042622993812 W =  [[1.79639892]
 [0.71357439]] , b =  [-20.36817497]
step =  32800 error value =  0.16479010619729728 W =  [[1.80259633]
 [0.71818811]] , b =  [-20.4515521]
step =  33200 error value =  0.1630757139061897 W =  [[1.80873205]
 [0.72275072]] , b =  [-20.53406854]
step =  33600 error value =  0.1613961632098672 W =  [[1.81480727]
 [0.72726332]] , b =  [-20.61574167]
step =  34000 error value =  0.15975041111393443 W =  [[1.82082316]
 [0.73172698]] , b =  [-20.69658833]
step =  34400 error value =  0.1581374556570462 W =  [[1.82678085]
 [0.73614274]] , b =  [-20.7766249]
step =  34800 error value =  0.1565563339155439 W =  [[1.83268143]
 [0.74051159]] , b =  [-20.85586724]
step =  35200 error value =  0.15500612012360845 W =  [[1.838

step =  63600 error value =  0.09077698010093695 W =  [[2.15416353]
 [0.97290774]] , b =  [-25.14005106]
step =  64000 error value =  0.09024795971387249 W =  [[2.15762361]
 [0.97536028]] , b =  [-25.1858753]
step =  64400 error value =  0.08972502548523373 W =  [[2.16106408]
 [0.9777981 ]] , b =  [-25.23143508]
step =  64800 error value =  0.08920807352967236 W =  [[2.16448516]
 [0.98022137]] , b =  [-25.27673342]
step =  65200 error value =  0.08869700230349632 W =  [[2.16788707]
 [0.98263026]] , b =  [-25.32177327]
step =  65600 error value =  0.08819171253929084 W =  [[2.17127003]
 [0.98502495]] , b =  [-25.36655754]
step =  66000 error value =  0.0876921071826855 W =  [[2.17463424]
 [0.98740559]] , b =  [-25.4110891]
step =  66400 error value =  0.08719809133120736 W =  [[2.1779799 ]
 [0.98977235]] , b =  [-25.45537076]
step =  66800 error value =  0.086709572175145 W =  [[2.18130722]
 [0.9921254 ]] , b =  [-25.49940529]
step =  67200 error value =  0.08622645894035737 W =  [[2.18

In [104]:
# [6] 미래 값 예측

In [105]:
test_data = np.array([3, 17]) # (예습, 복습) = (3, 17) => Fail(0)
predict(test_data)

(array([0.1286396]), 0)

In [106]:
test_data = np.array([5, 8]) # (예습, 복습) = (5, 8) => Fail (0)
predict(test_data)

(array([0.00099085]), 0)

In [107]:
test_data = np.array([7, 21]) # (예습, 복습) = (7, 21) => Pass (1)
predict(test_data)

(array([0.99998953]), 1)

In [108]:
test_data = np.array([12, 0]) #(예습, 복습) = (12, 0) => Pass (1)
predict(test_data)

(array([0.63505424]), 1)

미래 값을 예측해보면, 복습보다는 예습시간이 합격(Pass)에 미치는 영향이 크다는 것을 알 수 있음. (즉, 예습시간에 대한 가중치 W1 = 2.28, 복습시간에 대한 가중치 W2 = 1.06, 에서 보듯이 예습시간이 복습시간에 비해 최종결과에 미치는 영향이 2배 이상임)