### Logistic Regression (= Binary Classification)
:  당뇨병 진단 2진 분류 모델

In [1]:
import tensorflow as tf
import numpy as np

In [2]:
# 데이터 불러오기
xy = np.loadtxt('data-03-diabetes.csv',delimiter=',',dtype=np.float32)
xy.shape  # (759, 9)

# 학습 데이터 분리 : 70% , 531개
x_train = xy[:531,:-1]   # X
y_train = xy[:531,[-1]]  # Y
x_train.shape            # (531, 8)
y_train.shape            # (531, 1)

# 검증 데이터 분리 : 30% , 228개
x_test = xy[531:,:-1]
y_test = xy[531:,[-1]]
x_test.shape             # (228, 8)
y_test.shape             # (228, 1)
xy

array([[-0.294118 ,  0.487437 ,  0.180328 , ..., -0.53117  , -0.0333333,
         0.       ],
       [-0.882353 , -0.145729 ,  0.0819672, ..., -0.766866 , -0.666667 ,
         1.       ],
       [-0.0588235,  0.839196 ,  0.0491803, ..., -0.492741 , -0.633333 ,
         0.       ],
       ...,
       [-0.411765 ,  0.21608  ,  0.180328 , ..., -0.857387 , -0.7      ,
         1.       ],
       [-0.882353 ,  0.266332 , -0.0163934, ..., -0.768574 , -0.133333 ,
         0.       ],
       [-0.882353 , -0.0653266,  0.147541 , ..., -0.797609 , -0.933333 ,
         1.       ]], dtype=float32)

In [3]:
# 변수 초기화 : weight, bias
# (m,n) * (n,l)  = (m,l) : 행렬의 내적 곱셈 공식
# (531,8) * (8,1) = (531,1)
#  X    *   W     =   Y
W = tf.Variable(tf.random.normal([8,1]),name='weight')
b = tf.Variable(tf.random.normal([1]),name='bias')

In [4]:
# 예측 함수(hypothesis) : H(X) = sigmoid(W*X + b)
def hypothesis(X):
    return tf.sigmoid(tf.matmul(X,W) + b)  # 0과 1사이의 값이 출력

In [5]:
# 비용 함수 : logloss , 2진 분류 모델
def cost_func():
    cost = -tf.reduce_mean(y_train*tf.math.log(hypothesis(x_train)) +
                         (1-y_train)*tf.math.log(1-hypothesis(x_train)))
    return cost

In [6]:
# 경사 하강법
optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)

In [7]:
# 학습 시작
print('***** Start Learning!!')
for step in range(10001):
    optimizer.minimize(cost_func,var_list=[W,b])

    if step % 1000 == 0:
        print('%04d'%step,'cost:[',cost_func().numpy(),']',
             ' W:',W.numpy(),' b:',b.numpy())
    
print('***** Learning Finished!!')     

***** Start Learning!!
0000 cost:[ 0.97325844 ]  W: [[ 0.81304073]
 [ 1.170205  ]
 [ 0.31772438]
 [-0.44147253]
 [-1.1904342 ]
 [-0.91492796]
 [-0.69127184]
 [ 1.2037741 ]]  b: [1.9922737]
1000 cost:[ 0.48724088 ]  W: [[-0.7444499 ]
 [-3.0180564 ]
 [ 0.13535836]
 [-0.5095887 ]
 [-0.26071465]
 [-2.5602207 ]
 [-0.8383961 ]
 [ 0.08903666]]  b: [0.32701328]
2000 cost:[ 0.48665875 ]  W: [[-0.7648438 ]
 [-3.3271089 ]
 [ 0.24500145]
 [-0.5484393 ]
 [-0.21684285]
 [-2.5529134 ]
 [-0.8670405 ]
 [ 0.11285454]]  b: [0.37881368]
3000 cost:[ 0.4866587 ]  W: [[-0.76468754]
 [-3.3311324 ]
 [ 0.24389823]
 [-0.54806757]
 [-0.21599998]
 [-2.5523198 ]
 [-0.8651107 ]
 [ 0.1138176 ]]  b: [0.3823942]
4000 cost:[ 0.4866587 ]  W: [[-0.7646867]
 [-3.3311386]
 [ 0.2438953]
 [-0.548068 ]
 [-0.2159985]
 [-2.5523171]
 [-0.8651079]
 [ 0.1138189]]  b: [0.3823997]
5000 cost:[ 0.4866587 ]  W: [[-0.76468664]
 [-3.331141  ]
 [ 0.24389535]
 [-0.54806775]
 [-0.21599798]
 [-2.5523167 ]
 [-0.8651065 ]
 [ 0.11381904]]  b: [0

In [8]:
# 회귀 계수, weight과 bias 출력
print('Weight:',W.numpy())
print('Bias:',b.numpy())

Weight: [[-0.76468647]
 [-3.3311446 ]
 [ 0.24389578]
 [-0.5480685 ]
 [-0.21599725]
 [-2.5523152 ]
 [-0.86510605]
 [ 0.11381822]]
Bias: [0.3824026]


In [9]:
# 정확도 측정 : accuracy computation
def predict(X):
    return tf.cast(hypothesis(X) > 0.5,dtype=tf.float32)

# 검증 데이터를 사용하여 예측 (70%:30%)
preds = predict(x_test)
accuracy = tf.reduce_mean(tf.cast(tf.equal(preds,y_test),dtype=tf.float32))

print('Accuracy:',accuracy.numpy())  # Accuracy: 0.78070176

Accuracy: 0.78070176
