In [1]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


# Cost function in pure Python

In [39]:
import numpy as np

X = np.array([1,2,3])
Y = np.array([1,2,3])

def cost_func(W, X, Y):
    cost = 0
    for i in range(len(X)):
        cost += (W * X[i] - Y[i]) ** 2
    
    return cost / len(X)

print("W | cost")
for feed_W in np.linspace(-3, 5, num=15):
    curr_cost = cost_func(feed_W, X, Y)
    print("{:6.3f} | {:10.5f}".format(feed_W, curr_cost))

W | cost
-3.000 |   74.66667
-2.429 |   54.85714
-1.857 |   38.09524
-1.286 |   24.38095
-0.714 |   13.71429
-0.143 |    6.09524
 0.429 |    1.52381
 1.000 |    0.00000
 1.571 |    1.52381
 2.143 |    6.09524
 2.714 |   13.71429
 3.286 |   24.38095
 3.857 |   38.09524
 4.429 |   54.85714
 5.000 |   74.66667


# Cost function in Tensorflow

In [40]:
import tensorflow as tf

X = np.array([1,2,3])
Y = np.array([1,2,3])

def cost_func(W, X, Y):
    hypothesis = W * X

    return tf.reduce_mean(tf.square(hypothesis - Y))

W_values = np.linspace(-3, 5, num=15)
cost_values = []

print("W | cost")
for feed_W in W_values:
    curr_cost = cost_func(feed_W, X, Y)
    cost_values.append(curr_cost)
    print("{:6.3f} | {:10.5f}".format(feed_W, curr_cost))

W | cost
-3.000 |   74.66667
-2.429 |   54.85714
-1.857 |   38.09524
-1.286 |   24.38095
-0.714 |   13.71429
-0.143 |    6.09524
 0.429 |    1.52381
 1.000 |    0.00000
 1.571 |    1.52381
 2.143 |    6.09524
 2.714 |   13.71429
 3.286 |   24.38095
 3.857 |   38.09524
 4.429 |   54.85714
 5.000 |   74.66667


![image](https://user-images.githubusercontent.com/64063767/116809156-ad301700-ab77-11eb-9e7f-7a456208eea0.png)


# Gradient Descent

![image](https://user-images.githubusercontent.com/64063767/116809185-c933b880-ab77-11eb-8d38-e03c9167bc01.png)

In [42]:
# for reproducibility
tf.random.set_seed(0) # tf_ver_1.x: tf.set_random_seed(0)

X = [1., 2., 3., 4.]
Y = [1., 2., 3., 4.]

# 정규분포를 따르는 random number 1개로 W 초기화
W = tf.Variable(tf.random.normal([1], -100., 100.)) # tf_ver_1.x: tf.random_normal([1], -100., 100.))

print("epoch | cost | W")
for epoch in range(301):
    hypothesis = W * X
    cost = tf.reduce_mean(tf.square(hypothesis - Y))

    alpha = 0.01
    gradient = tf.reduce_mean(tf.multiply(tf.multiply(W, X) - Y, X))
    descent = W - tf.multiply(alpha, gradient)
    W.assign(descent)

    if step % 10 == 0:
        print("{:5} | {:10.4f} | {:10.6f}".format(epoch, cost.numpy(), W.numpy()[0]))

epoch | cost | W
    0 | 18829.7812 |  47.348293
    1 | 16111.2324 |  43.872173
    2 | 13785.1738 |  40.656761
    3 | 11794.9404 |  37.682503
    4 | 10092.0449 |  34.931316
    5 |  8635.0059 |  32.386467
    6 |  7388.3271 |  30.032482
    7 |  6321.6372 |  27.855045
    8 |  5408.9512 |  25.840918
    9 |  4628.0337 |  23.977848
   10 |  3959.8613 |  22.254509
   11 |  3388.1562 |  20.660421
   12 |  2898.9912 |  19.185890
   13 |  2480.4495 |  17.821949
   14 |  2122.3347 |  16.560303
   15 |  1815.9226 |  15.393280
   16 |  1553.7488 |  14.313784
   17 |  1329.4263 |  13.315249
   18 |  1137.4902 |  12.391605
   19 |   973.2651 |  11.537235
   20 |   832.7499 |  10.746943
   21 |   712.5217 |  10.015922
   22 |   609.6513 |   9.339727
   23 |   521.6329 |   8.714248
   24 |   446.3221 |   8.135679
   25 |   381.8844 |   7.600503
   26 |   326.7498 |   7.105466
   27 |   279.5754 |   6.647556
   28 |   239.2116 |   6.223989
   29 |   204.6755 |   5.832190
   30 |   175.1255 |   