In [1]:
# 03_logistic_regression_diabetes
# 당뇨병 진단 2진 분류 모델

import tensorflow as tf
import numpy as np
tf.random.set_seed(5)

In [6]:
# 데이터 불러오기
xy = np.loadtxt('data-03-diabetes.csv',delimiter=',',dtype=np.float32)
xy.shape  # (759, 9)

# 학습용 데이터 분리 : 70%, 531개
x_train = xy[:531,:-1]   # X
y_train = xy[:531,[-1]]  # Y
print(x_train.shape,y_train.shape)  # (531,8)  (531,1)

# 검증용 데이터 분리 : 30%, 228개 
x_test = xy[531:,:-1]    # X
y_test = xy[531:,[-1]]   # Y
print(x_test.shape,y_test.shape)    # (228,8)  (228,1)

(531, 8) (531, 1)
(228, 8) (228, 1)


In [7]:
# 변수 초기화 : weight, bias
# (m,n) * (n,l) = (m,l)   : 행렬의 내적 곱셈 공식
# (531,8) * (8,1) = (531,1)
W = tf.Variable(tf.random.normal([8,1]),name='weight')
b = tf.Variable(tf.random.normal([1]),name='bias')
print(W)
print(b)

<tf.Variable 'weight:0' shape=(8, 1) dtype=float32, numpy=
array([[-0.18030666],
       [-0.95028627],
       [-0.03964049],
       [-0.7425406 ],
       [ 1.3231523 ],
       [-0.61854804],
       [ 0.8540664 ],
       [-0.08899953]], dtype=float32)>
<tf.Variable 'bias:0' shape=(1,) dtype=float32, numpy=array([0.22652863], dtype=float32)>


In [8]:
# 예측 함수(hypothesis) : H(X) = sigmoid(X*W + B)
def hypothesis(X):
    return tf.sigmoid(tf.matmul(X,W) + b)  # 0과 1사이의 값이 출력

In [9]:
# 비용 함수 : logloss, 2진분류모델
def cost_func():
    cost = -tf.reduce_mean(y_train*tf.math.log(hypothesis(x_train)) +
                          (1 - y_train)*tf.math.log(1-hypothesis(x_train)))
    return cost

In [10]:
# 경사 하강법
# learning_rate(학습율)을 0.01 로 설정하여 optimizer객체를 생성
optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)

In [11]:
# 학습 시작
print('***** Start Learning!!')
for step in range(10001):
    optimizer.minimize(cost_func,var_list=[W,b])
    if step % 1000 == 0:
        print('%04d'%step,'cost:[',cost_func().numpy(),']',
             ' W:',W.numpy(),' b:',b.numpy())
print('***** Learning Finished!!') 

***** Start Learning!!
0000 cost:[ 0.8006097 ]  W: [[-0.19030644]
 [-0.9402895 ]
 [-0.02964124]
 [-0.7525403 ]
 [ 1.3131526 ]
 [-0.6285471 ]
 [ 0.8440665 ]
 [-0.09899934]]  b: [0.23652852]
1000 cost:[ 0.48674083 ]  W: [[-0.7548764 ]
 [-3.3620791 ]
 [ 0.21074554]
 [-0.5814701 ]
 [-0.1694179 ]
 [-2.5226405 ]
 [-0.7677587 ]
 [ 0.12745763]]  b: [0.48014042]
2000 cost:[ 0.4866587 ]  W: [[-0.76455224]
 [-3.331515  ]
 [ 0.24341252]
 [-0.5476674 ]
 [-0.21579494]
 [-2.5522876 ]
 [-0.86375153]
 [ 0.11404339]]  b: [0.3838595]
3000 cost:[ 0.4866587 ]  W: [[-0.76468694]
 [-3.3311474 ]
 [ 0.2438957 ]
 [-0.54806787]
 [-0.21599682]
 [-2.5523124 ]
 [-0.8651051 ]
 [ 0.11381982]]  b: [0.38240486]
4000 cost:[ 0.4866587 ]  W: [[-0.7646867 ]
 [-3.3311465 ]
 [ 0.24389622]
 [-0.5480683 ]
 [-0.21599673]
 [-2.552314  ]
 [-0.8651056 ]
 [ 0.11381892]]  b: [0.38240364]
5000 cost:[ 0.4866587 ]  W: [[-0.76468647]
 [-3.3311458 ]
 [ 0.24389595]
 [-0.5480686 ]
 [-0.21599661]
 [-2.5523145 ]
 [-0.8651055 ]
 [ 0.11381891]

In [12]:
# weight과 bias 출력
print('Weight:',W.numpy())
print('Bias:',b.numpy())

Weight: [[-0.76468706]
 [-3.331145  ]
 [ 0.24389529]
 [-0.5480679 ]
 [-0.21599688]
 [-2.5523162 ]
 [-0.8651065 ]
 [ 0.11381915]]
Bias: [0.3824031]


In [14]:
# 평가 : 정확도(accuracy)
def predict(X):
    return tf.cast(hypothesis(X) > 0.5 ,dtype=tf.float32)

# 예측
preds = predict(x_test)   

# 정확도
accuracy = tf.reduce_mean(tf.cast(tf.equal(preds,y_test),dtype=tf.float32))
print('Accuracy:',accuracy.numpy())
# print('Hypothesis:\n',hypothesis(x_test).numpy())
# print('Predict:\n',preds.numpy())

Accuracy: 0.78070176
