# Linear Regression

## Hypothesis
$$H(x) = W(x) + b$$  

$x$: training data, actual data  
$W$: Weight  
$b$: bias

## Cost
$$cost(W) = \frac{1}{m} \sum_{i=1}^m (Wx_i - y_i)^2$$
$$cost(W, b) = \frac{1}{m} \sum_{i=1}^m (H(x_i) - y_i)^2$$  

$Wx_i$, $H(x_i)$: expected value  
$y_i$: actual value  
$(H(x_i) - y_i)^2$: loss, error  
$\frac{1}{m} \sum_{i=1}^m ()$: mean value of loss

## Goal

find $W$ and $b$ which minimize $cost(W,b)$

## How to find W and b: Gradient Descent Algorithm 

$$W := W - \alpha \frac{\partial cost(W,b)}{\partial W}$$
$$b := b - \alpha \frac{\partial cost(W,b)}{\partial b}$$  

$\alpha$: learning rate  
$\frac{\partial z}{\partial x}$: gradient

## Exercise1

In [1]:
import tensorflow as tf
tf.enable_eager_execution()  # important

### $H(x) = Wx + b$

In [2]:
x_data = [1., 2., 3., 4., 5.]
y_data = [1., 2., 3., 4., 5.]

W = tf.Variable(2.9)  # random value
b = tf.Variable(0.5)  # random value

# hypothesis = W * x + b
hypothesis = W * x_data + b

### $cost(W, b) = \frac{1}{m} \sum_{i=1}^m (H(x_i) - y_i)^2$

In [3]:
cost = tf.reduce_mean(tf.square(hypothesis - y_data))

* tf.reduce_mean()

In [63]:
v = [1., 2., 3., 4.]
tf.reduce_mean(v)  # 2.5

2.5


* tf.square()

In [64]:
tem = tf.square(3)  # 9

9


### minimize $cost(W,b)$

In [4]:
# learning_rate initialize
learning_rate = 0.01

# Gradient descent
for i in range(100):    
    with tf.GradientTape() as tape:
        hypothesis = W * x_data + b
        cost = tf.reduce_mean(tf.square(hypothesis - y_data))
        
    W_grad, b_grad = tape.gradient(cost, [W, b])
    
    W.assign_sub(learning_rate * W_grad)  # W = W - learning_rate*dcost_dW
    b.assign_sub(learning_rate * b_grad)  # b = b - learning_rate*dcost_db
    
    if i%10 == 0:
        print("{:5}|{:10.4}|{:10.4}|{:10.6}".format(i, W.numpy(), b.numpy(), cost.numpy()))

    0|     2.452|     0.376|     45.66
   10|     1.104|  0.003398|  0.206336
   20|     1.013|  -0.02091|0.00102611
   30|     1.007|  -0.02184|9.26298e-05
   40|     1.006|  -0.02123|8.26522e-05
   50|     1.006|  -0.02053|7.72211e-05
   60|     1.005|  -0.01984|7.2163e-05
   70|     1.005|  -0.01918|6.74368e-05
   80|     1.005|  -0.01854|6.30191e-05
   90|     1.005|  -0.01793|5.88925e-05


* tf.GradientTape()

In [45]:
x = tf.constant(3.)

with tf.GradientTape() as tape:
    tape.watch(x)
    y = x * x

dy_dx = tape.gradient(y, x)  # 6.0

tf.Tensor(6.0, shape=(), dtype=float32)


In [46]:
z = tf.Variable(3.0)

with tf.GradientTape() as tape:
    y = z * z  # z is automatically watched since tf.Variable is trainable variable
    
dy_dz = tape.gradient(y, z)  # 6.0

6.0


In [54]:
a = tf.constant(2.)
d = tf.constant(5.)

with tf.GradientTape() as tape:
    tape.watch([a, d])
    c = a*a + d*d
    
dc_da, dc_dd = tape.gradient(c, [a, d])  # 4, 10

### Predict

In [67]:
print(W * 5. + b)  # expected value: 5
print(W * 2.5 + b)  # expected value: 2.5

tf.Tensor(5.001219, shape=(), dtype=float32)
tf.Tensor(2.4990265, shape=(), dtype=float32)


## Exercise2

In [13]:
import tensorflow as tf
tf.enable_eager_execution()
import numpy as np

### numpy training

In [103]:
vector_form = np.array([1, 2])
print(vector_form.T)
print(vector_form == vector_form.T)

array_form = np.array([[1, 2]])
print(array_form.T)
print(array_form == array_form.T)

[1 2]
[ True  True]
[[1]
 [2]]
[[ True False]
 [False  True]]


In [104]:
array = np.array([[1, 2], [3, 4]])
vector = np.array([1, 1])
arr = np.array([[1, 1]])

# *: dot product
# array * vector = vector * array = vector
print(array.dot(vector))
print(vector.dot(array))
print(vector.dot(arr.T))

# array * array = array
print(array.dot(arr.T))

[3 7]
[3 7]
[4 6]
[2]
[[3]
 [7]]


In [113]:
vector_t = np.array([1, 2, 3])
array_t = np.array([[0, 1], [2, 3]])
num = np.array([1])  # or 1

print(vector_t + num)
print(array_t + num)
print(tf.square(vector_t))

[2 3 4]
[[1 2]
 [3 4]]
tf.Tensor([1 4 9], shape=(3,), dtype=int64)


In [26]:
list = [1, 2, 3]
list2 = [list, list]
print(list2)

l = list2[0]
print(l)

array = np.array([1, 2, 3])
array2 = np.array([array, array])
print(array2)
print(array is array2[0])
print()

ar = array2[:, [-1]]
print(ar)

[[1, 2, 3], [1, 2, 3]]
[1, 2, 3]
[[1 2 3]
 [1 2 3]]
False
[[3]
 [3]]


In [73]:
t_data = np.array([[1., 1., 1.], [2., 2., 2.]], dtype=np.float32)
print(t_data)

t = tf.Variable(tf.random_normal([3, 1]))  # t is trainable variable
t2 = tf.random_normal([3, 1])  # t2 is not trainable variable
print(t.numpy())
print(t2.numpy())

t3 = tf.Variable(np.array([[3, 2, 1], [1, 2, 3]]))
print(t3.numpy())

print(tf.matmul(t_data, t))  # arguments of tf.matmul are must be matrix(not vector)

[[1. 1. 1.]
 [2. 2. 2.]]
[[ 0.40577587]
 [-1.0215772 ]
 [-1.0294142 ]]
[[ 0.17296557]
 [-0.1539809 ]
 [ 1.1171927 ]]
[[3 2 1]
 [1 2 3]]
tf.Tensor(
[[-1.6452155]
 [-3.290431 ]], shape=(2, 1), dtype=float32)


### $H(X) = XW + b$

In [62]:
# data and label
X1 = np.array([73., 80., 75.])
X2 = np.array([93., 88., 93.])
X3 = np.array([89., 91., 90.])
X4 = np.array([96., 98., 100.])
X5 = np.array([73., 66., 70.])
X_data = np.array([X1, X2, X3, X4, X5], dtype=np.float32)
# data type: int, double, float...


y1 = np.array([152.])
y2 = np.array([185.])
y3 = np.array([180.])
y4 = np.array([196.])
y5 = np.array([142.])
Y_data = np.array([y1, y2, y3, y4, y5])

# # weights and bias
# w1 = tf.Variable(10.)
# w2 = tf.Variable(10.)
# w3 = tf.Variable(10.)
# b_2 = tf.Variable(10.)
# W_2 = np.array([w1, w2, w3])

W_2 = tf.Variable(tf.random_normal([3, 1]))
b_2 = tf.Variable(tf.random_normal([1]))

hypothesis_2 = tf.matmul(X_data, W_2) + b_2

### $cost(W,b) = \frac{1}{m} \sum_{i=1}^m (H(X_i) - Y_i)^2$

In [41]:
cost_2 = tf.reduce_mean(tf.square(hypothesis_2 - Y_data))

### minimize $cost(W,b)$

In [65]:
# learning_rate
learning_rate_2 = 0.000001

# gradient descent
for i in range(1000):
    with tf.GradientTape() as tape:
        hypothesis_2 = tf.matmul(X_data, W_2) + b_2
        cost_2 = tf.reduce_mean(tf.square(hypothesis_2 - Y_data))
    
    W_2_grad, b_2_grad = tape.gradient(cost_2, [W_2, b_2])
    
    W_2.assign_sub(learning_rate_2 * W_2_grad)
    b_2.assign_sub(learning_rate_2 * b_2_grad)
    
    # results are changed whenever data is trained
    if i%50 == 0:
        print("{:5}|{:12.4f}".format(i, cost_2.numpy()))

    0|      0.7928
   50|      0.7914
  100|      0.7899
  150|      0.7885
  200|      0.7870
  250|      0.7856
  300|      0.7842
  350|      0.7827
  400|      0.7813
  450|      0.7798
  500|      0.7784
  550|      0.7770
  600|      0.7756
  650|      0.7742
  700|      0.7728
  750|      0.7714
  800|      0.7700
  850|      0.7686
  900|      0.7672
  950|      0.7658


### Predict

In [67]:
test_data = np.array([[73., 80., 75.], [77., 75., 90.], [96, 98, 100], [73, 66, 70], [10, 24, 15]], dtype=np.float32)
predict_data = tf.matmul(test_data, W_2) + b_2
print(predict_data.numpy())

[[150.50476 ]
 [156.96814 ]
 [195.58366 ]
 [142.9451  ]
 [ 25.828838]]
