# Logistic (Regression) Classification
Binary Classification에서 선형 Hypothesis로는 Outlier를 반영하기 매우 힘들어져
음의 무한대로 가는 경우 0, 양의 무한대로 가는 경우 1의 값을 갖는 방식의 Hypothesis인 **SIGMOID** 함수 채택
    
$$
y = WX + b \quad => \quad y = \frac{1}{1 + e^{-WX}}
$$

## Logistic Cost Function

$$
\begin{align*}
C(H(x), y) =&\ y\ ?\ -log(H(x)) : -log(1 - H(x))\\
           =&\ y(-log(H(x)) + (1-y)(-log(1 - H(x))
\end{align*}
$$
                
## Minimize Cost - Gradient Decent Algorithm
$$
H(X) = \frac{1}{1 + e^{-W^{T}X}}\\
cost(W)=-\frac{1}{m}\sum {y} log(H(x)) + {(1-y)}(log(1-H(x)) \\
W = W - \alpha\frac{\partial}{\partial W}cost(W)
$$

In [None]:
import tensorflow as tf

# Lab 5 - Logistic Regression

x_data = [[1, 2], [2, 3], [3, 1], [4, 3], [5, 3], [6, 2]] 
y_data = [[0], [0], [0], [1], [1], [1]]

# placeholders for a tensor that will be always fed.
X = tf.placeholder(tf.float32, shape=[None, 2])
Y = tf.placeholder(tf.float32, shape=[None, 1])

W = tf.Variable(tf.random_normal([2, 1]), name='weight')
b = tf.Variable(tf.random_normal([1]), name='bias')

# Hypothesis using sigmoid: tf.div(1., 1. + tf.exp(tf.matmul(X, W)))
hypothesis = tf.sigmoid(tf.matmul(X, W) + b)

# cost/loss function
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) * tf.log(1 - hypothesis))
train = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(cost)

# Accuracy computation
# if hypothesis > 0.5 then True
# else False
predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))

# Launch graph
with tf.Session() as sess:
   # Initialize TensorFlow variables
   sess.run(tf.global_variables_initializer())

   for step in range(10001):
       cost_val, _ = sess.run([cost, train], feed_dict={X: x_data, Y: y_data})
       if step % 200 == 0:
           print(step, cost_val)

   # Accuracy report
   h, c, a = sess.run([hypothesis, predicted, accuracy],
                      feed_dict={X: x_data, Y: y_data})
   print("\nHypothesis: ", h, "\nCorrect (Y): ", c, "\nAccuracy: ", a)



In [1]:
# (csv 파일을 이용) 여러 호르몬 상태를 이용해 당뇨병 여부를 예측하는 예제
import tensorflow as tf
import numpy as np
tf.set_random_seed(777)  # for reproducibility

xy = np.loadtxt('data-03-diabetes.csv', delimiter=',', dtype=np.float32)
x_data = xy[:, 0:-1]
y_data = xy[:, [-1]]

print(x_data.shape, y_data.shape)

# placeholders for a tensor that will be always fed.
X = tf.placeholder(tf.float32, shape=[None, 8])
Y = tf.placeholder(tf.float32, shape=[None, 1])

W = tf.Variable(tf.random_normal([8, 1]), name='weight')
b = tf.Variable(tf.random_normal([1]), name='bias')

# Hypothesis using sigmoid: tf.div(1., 1. + tf.exp(tf.matmul(X, W)))
hypothesis = tf.sigmoid(tf.matmul(X, W) + b)

# cost/loss function
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) *
                       tf.log(1 - hypothesis))

train = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(cost)

# Accuracy computation
# True if hypothesis>0.5 else False
predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))

# Launch graph
with tf.Session() as sess:
    # Initialize TensorFlow variables
    sess.run(tf.global_variables_initializer())

    for step in range(10001):
        cost_val, _ = sess.run([cost, train], feed_dict={X: x_data, Y: y_data})
        if step % 200 == 0:
            print(step, cost_val)

    # Accuracy report
    h, c, a = sess.run([hypothesis, predicted, accuracy],
                       feed_dict={X: x_data, Y: y_data})
    print("\nHypothesis: ", h, "\nCorrect (Y): ", c, "\nAccuracy: ", a)

(759, 8) (759, 1)
0 0.82794
200 0.755181
400 0.726355
600 0.705179
800 0.686631
1000 0.669853
1200 0.654603
1400 0.640737
1600 0.62813
1800 0.616668
2000 0.606246
2200 0.596764
2400 0.588133
2600 0.580271
2800 0.573101
3000 0.566555
3200 0.560571
3400 0.555095
3600 0.550075
3800 0.545466
4000 0.541229
4200 0.537327
4400 0.533729
4600 0.530405
4800 0.527329
5000 0.524479
5200 0.521835
5400 0.519377
5600 0.51709
5800 0.514958
6000 0.512969
6200 0.51111
6400 0.509371
6600 0.507741
6800 0.506213
7000 0.504778
7200 0.503429
7400 0.50216
7600 0.500964
7800 0.499836
8000 0.498771
8200 0.497765
8400 0.496813
8600 0.495912
8800 0.495058
9000 0.494249
9200 0.49348
9400 0.49275
9600 0.492056
9800 0.491396
10000 0.490767

Hypothesis:  [[ 0.44348493]
 [ 0.91536468]
 [ 0.22591157]
 [ 0.93583125]
 [ 0.33763626]
 [ 0.70926887]
 [ 0.94409138]
 [ 0.63417912]
 [ 0.2595304 ]
 [ 0.46434352]
 [ 0.64745134]
 [ 0.20137011]
 [ 0.25898224]
 [ 0.35072374]
 [ 0.74845016]
 [ 0.48230031]
 [ 0.70017725]
 [ 0.9126371