In [2]:
import numpy as np

import tensorflow as tf
tf.__version__

'2.2.0'

# How to minimize cost

__[Gradient descent algorithm]__

- minimize cost function : 비용함수를 최소화 시키는 방법
- It can be applied to more general function : _cost(W1, W2)_ : 변수가 여러개 있을 때 적용 가능

---

1) Start with initial guesses
    - Start at 0,0 (or any other value - random)      # W는 초기값으로 특정값을 줘도 되고 랜덤으로 할당해도 O.K
    - Keeping changing W and b a little bit to try and reduce cost(W, b)     # W, b를 계속해서 조금씩 줄여나가자.
2) Each time you change the parameters, you select the gradient which reduces cost(W, b) the most possible.
    - 기울기가 최소화 되는 것으로 선택하자.    
3) Has an interesting property
    - Where you start can determine which minimum you end up
    
---

alpha는 일반적으로 작은 수(ex. 0.01, 0.001, ...)<br>
가중치는 원래의 가중치에서 비용함수를 가중치에 의해 편미분한 후 alpha를 곱한 것을 빼면서 계속 업그레이드 시킨다.

In [4]:
## Cost function in pure Python

X = np.array([1, 2, 3])
Y = np.array([1, 2, 3])

def cost_func(W, X, Y):
    c = 0
    for i in range(len(X)):
        c += (W*X[i] - Y[i]) ** 2
        
    return c / len(X)

for feed_W in np.linspace(-3, 5, num = 15):
    curr_cost = cost_func(feed_W, X, Y)
    print("{:6.3f} | {:10.5f}".format(feed_W, curr_cost))

-3.000 |   74.66667
-2.429 |   54.85714
-1.857 |   38.09524
-1.286 |   24.38095
-0.714 |   13.71429
-0.143 |    6.09524
 0.429 |    1.52381
 1.000 |    0.00000
 1.571 |    1.52381
 2.143 |    6.09524
 2.714 |   13.71429
 3.286 |   24.38095
 3.857 |   38.09524
 4.429 |   54.85714
 5.000 |   74.66667


In [7]:
## Cost function in Tensorflow

X = np.array([1, 2, 3])
Y = np.array([1, 2, 3])

def cost_func2(W, X, Y):
    hypothesis = W * X
    return tf.reduce_mean(tf.square(hypothesis - y))

W_values = np.linspace(-3, 5, num =15)
cost_values = []

for feed_W in W_values:
    curr_costs = cost_func(feed_W, X, Y)
    cost_values.append(curr_costs)
    print("{:6.3f} | {:10.5f}".format(feed_W, curr_costs))

-3.000 |   74.66667
-2.429 |   54.85714
-1.857 |   38.09524
-1.286 |   24.38095
-0.714 |   13.71429
-0.143 |    6.09524
 0.429 |    1.52381
 1.000 |    0.00000
 1.571 |    1.52381
 2.143 |    6.09524
 2.714 |   13.71429
 3.286 |   24.38095
 3.857 |   38.09524
 4.429 |   54.85714
 5.000 |   74.66667


In [8]:
## Gradient Descent
alpha = 0.01
gradient = tf.reduce_mean(tf.multiply(tf.multiply(W, X)-Y, X))
descent = W - tf.multiply(alpha, gradient)
W.assign(descent)

NameError: name 'W' is not defined

In [26]:
# 실제 활용

tf.compat.v1.set_random_seed(0)     # for reproducibility : 다음에도 똑같이 재현하기 위해
                                    # 1 버전에서는 tf.set_random_seed(seed)

X = [1., 2., 3., 4.]
Y = [1., 2., 3., 4.]

W = tf.Variable(tf.random.normal([1], -100., 100.))   # 1 버전 : tf.random_normal

for step in range(300):
    hypothesis = W * X
    cost = tf.reduce_mean(tf.square(hypothesis - Y))
    
    alpha = 0.01
    gradient = tf.reduce_mean(tf.multiply(tf.multiply(W, X) - Y, X))
    descent = W - tf.multiply(alpha, gradient)
    W.assign(descent)
    
    if step % 10 == 0:
        print("{:5} | {:10.4f} | {:10.6f}".format(step, cost.numpy(), W.numpy()[0]))

    0 | 18829.7812 |  47.348293
   10 |  3959.8613 |  22.254509
   20 |   832.7499 |  10.746943
   30 |   175.1255 |   5.469776
   40 |    36.8285 |   3.049760
   50 |     7.7449 |   1.939984
   60 |     1.6287 |   1.431060
   70 |     0.3425 |   1.197676
   80 |     0.0720 |   1.090651
   90 |     0.0151 |   1.041571
  100 |     0.0032 |   1.019064
  110 |     0.0007 |   1.008742
  120 |     0.0001 |   1.004009
  130 |     0.0000 |   1.001839
  140 |     0.0000 |   1.000843
  150 |     0.0000 |   1.000387
  160 |     0.0000 |   1.000178
  170 |     0.0000 |   1.000081
  180 |     0.0000 |   1.000037
  190 |     0.0000 |   1.000017
  200 |     0.0000 |   1.000008
  210 |     0.0000 |   1.000004
  220 |     0.0000 |   1.000002
  230 |     0.0000 |   1.000001
  240 |     0.0000 |   1.000001
  250 |     0.0000 |   1.000001
  260 |     0.0000 |   1.000001
  270 |     0.0000 |   1.000001
  280 |     0.0000 |   1.000001
  290 |     0.0000 |   1.000001


In [28]:
# W를 특정한 값으로 설정했을 때

tf.compat.v1.set_random_seed(0)     # for reproducibility : 다음에도 똑같이 재현하기 위해
                                    # 1 버전에서는 tf.set_random_seed(seed)

X = [1., 2., 3., 4.]
Y = [1., 2., 3., 4.]

W = tf.Variable([5.0])   # 1 버전 : tf.random_normal

for step in range(300):
    hypothesis = W * X
    cost = tf.reduce_mean(tf.square(hypothesis - Y))
    
    alpha = 0.01
    gradient = tf.reduce_mean(tf.multiply(tf.multiply(W, X) - Y, X))
    descent = W - tf.multiply(alpha, gradient)
    W.assign(descent)
    
    if step % 10 == 0:
        print("{:5} | {:10.4f} | {:10.6f}".format(step, cost.numpy(), W.numpy()[0]))

    0 |   120.0000 |   4.700000
   10 |    25.2357 |   2.696754
   20 |     5.3070 |   1.778102
   30 |     1.1161 |   1.356824
   40 |     0.2347 |   1.163633
   50 |     0.0494 |   1.075039
   60 |     0.0104 |   1.034412
   70 |     0.0022 |   1.015781
   80 |     0.0005 |   1.007237
   90 |     0.0001 |   1.003319
  100 |     0.0000 |   1.001522
  110 |     0.0000 |   1.000698
  120 |     0.0000 |   1.000320
  130 |     0.0000 |   1.000147
  140 |     0.0000 |   1.000067
  150 |     0.0000 |   1.000031
  160 |     0.0000 |   1.000014
  170 |     0.0000 |   1.000006
  180 |     0.0000 |   1.000003
  190 |     0.0000 |   1.000001
  200 |     0.0000 |   1.000001
  210 |     0.0000 |   1.000001
  220 |     0.0000 |   1.000001
  230 |     0.0000 |   1.000001
  240 |     0.0000 |   1.000001
  250 |     0.0000 |   1.000001
  260 |     0.0000 |   1.000001
  270 |     0.0000 |   1.000001
  280 |     0.0000 |   1.000001
  290 |     0.0000 |   1.000001
