## Logistic Regression diabetes
: 당뇨병 진단 예측 2진 분류(Binary Classification) 모델: 

In [1]:
import tensorflow as tf
import numpy as np
tf.random.set_seed(5)

In [2]:
# 데이터 불러오기
xy = np.loadtxt('data-03-diabetes.csv',delimiter=',',dtype=np.float32)
xy.shape   # (759, 9)

# 학습 데이터 분리 : 70% , 531개
x_train = xy[:531,:-1]   # X
y_train = xy[:531,[-1]]  # Y
x_train.shape   # (531, 8)
y_train.shape   # (531, 1)

# 검증 데이터 분리 : 30% , 228개
x_test = xy[531:,:-1]
y_test = xy[531:,[-1]]
x_test.shape  # (228, 8)
y_test.shape  # (228, 1)

(228, 1)

In [3]:
# 변수 초기화 : weight, bias
# (m,n) * (n,l) = (m,l)   : 행렬의 내적 곱셈 공식
# (531,8) * (8,1) = (531,1)
W = tf.Variable(tf.random.normal([8,1]), name = 'weight')
b = tf.Variable(tf.random.normal([1]), name = 'bias')

In [4]:
# 예측 함수(hypothesis) : H(X) = sigmoid(X*W + b)
# tf.sigmoid() : tf.div(1.,1. + tf.exp(-(tf.matmul(X,W) + b)))
def hypothesis(X):
    return tf.sigmoid(tf.matmul(X,W) + b)   # 0과 1사이의 값이 출력

In [5]:
# 비용함수 : logloss, 2진 분류 모델
def cost_func():

    cost = -tf.reduce_mean(y_train*tf.math.log(hypothesis(x_train)) + 
                          (1 - y_train)*tf.math.log(1 - hypothesis(x_train)))
    return cost

In [6]:
# 경사 하강법
# learning_rate(학습율)을 0.01 로 설정하여 optimizer객체를 생성
optimizer = tf.keras.optimizers.Adam(lr=0.01)

In [7]:
# 학습 시작
print('****** Start Learning!!')
for step in range(10001):
    # cost를 minimize 한다
    optimizer.minimize(cost_func,var_list=[W,b]) # W,b를 업데이트
    if step % 100 == 0:
        print('%04d'%step,'cost:[',cost_func().numpy(),']',
             ' W:',W.numpy(),' b:',b.numpy())
        
print('****** Learning Finished!!') 

****** Start Learning!!
0000 cost:[ 0.8006098 ]  W: [[-0.19030644]
 [-0.9402895 ]
 [-0.02964124]
 [-0.7525403 ]
 [ 1.3131526 ]
 [-0.6285471 ]
 [ 0.8440665 ]
 [-0.09899934]]  b: [0.23652852]
0100 cost:[ 0.527278 ]  W: [[-0.6510297 ]
 [-1.9078269 ]
 [ 0.23280829]
 [-1.2235695 ]
 [ 0.70609117]
 [-1.4086562 ]
 [ 0.31652665]
 [-0.42911762]]  b: [0.64593804]
0200 cost:[ 0.50196886 ]  W: [[-0.700372  ]
 [-2.6286898 ]
 [ 0.16853546]
 [-1.2493113 ]
 [ 0.43074816]
 [-1.8856583 ]
 [ 0.07900825]
 [-0.26116478]]  b: [0.66892827]
0300 cost:[ 0.4933479 ]  W: [[-0.71150464]
 [-3.0237336 ]
 [ 0.19624372]
 [-1.170435  ]
 [ 0.25556955]
 [-2.166043  ]
 [-0.13201827]
 [-0.10620081]]  b: [0.6788835]
0400 cost:[ 0.48995656 ]  W: [[-7.1795523e-01]
 [-3.2340925e+00]
 [ 2.1906528e-01]
 [-1.0466374e+00]
 [ 1.3929121e-01]
 [-2.3197508e+00]
 [-3.0422199e-01]
 [ 1.3163153e-03]]  b: [0.67131805]
0500 cost:[ 0.48841515 ]  W: [[-0.7261077 ]
 [-3.3358545 ]
 [ 0.22019203]
 [-0.91839826]
 [ 0.05383499]
 [-2.401011  ]
 [-

5000 cost:[ 0.48665863 ]  W: [[-0.76468664]
 [-3.331145  ]
 [ 0.2438964 ]
 [-0.548069  ]
 [-0.21599677]
 [-2.5523148 ]
 [-0.8651059 ]
 [ 0.11381863]]  b: [0.38240263]
5100 cost:[ 0.48665863 ]  W: [[-0.76468664]
 [-3.331145  ]
 [ 0.24389648]
 [-0.5480691 ]
 [-0.21599674]
 [-2.5523148 ]
 [-0.8651061 ]
 [ 0.11381847]]  b: [0.38240236]
5200 cost:[ 0.48665863 ]  W: [[-0.76468664]
 [-3.331145  ]
 [ 0.24389648]
 [-0.5480691 ]
 [-0.21599679]
 [-2.5523148 ]
 [-0.865106  ]
 [ 0.11381835]]  b: [0.38240236]
5300 cost:[ 0.48665863 ]  W: [[-0.76468664]
 [-3.331145  ]
 [ 0.24389648]
 [-0.5480691 ]
 [-0.21599679]
 [-2.5523148 ]
 [-0.865106  ]
 [ 0.11381836]]  b: [0.38240236]
5400 cost:[ 0.4866587 ]  W: [[-0.76468664]
 [-3.3311448 ]
 [ 0.24389637]
 [-0.548069  ]
 [-0.21599686]
 [-2.552315  ]
 [-0.865106  ]
 [ 0.11381828]]  b: [0.38240236]
5500 cost:[ 0.4866587 ]  W: [[-0.7646866 ]
 [-3.3311448 ]
 [ 0.24389592]
 [-0.54806906]
 [-0.2159969 ]
 [-2.5523152 ]
 [-0.86510634]
 [ 0.11381844]]  b: [0.38240224]


10000 cost:[ 0.4866587 ]  W: [[-0.76468676]
 [-3.3311439 ]
 [ 0.24389665]
 [-0.54806906]
 [-0.21599697]
 [-2.5523157 ]
 [-0.8651073 ]
 [ 0.1138185 ]]  b: [0.38240126]
****** Learning Finished!!


In [8]:
# Weight과 bias 출력
print('Weight:',W.numpy())
print('bias:', b.numpy())

Weight: [[-0.76468676]
 [-3.3311439 ]
 [ 0.24389665]
 [-0.54806906]
 [-0.21599697]
 [-2.5523157 ]
 [-0.8651073 ]
 [ 0.1138185 ]]
bias: [0.38240126]


In [9]:
# 정확도 측정 : accuracy computation
def predict(X):
    return tf.cast(hypothesis(X) > 0.5, dtype=tf.float32)

# 검증 데이터를 사용하여 예측 (70%:30%)
preds = predict(x_test)
accuracy = tf.reduce_mean(tf.cast(tf.equal(preds,y_test),dtype=tf.float32))

print('Accuracy:',accuracy.numpy())
# print('Hypothesis:\n',hypothesis(x_test).numpy())
# print('Predict:\n',preds.numpy())

Accuracy: 0.78070176
