# Linear Regression

## Hypothesis
$$H(x) = W(x) + b$$  

$x$: training data, actual data  
$W$: Weight  
$b$: bias

## Cost
$$cost(W) = \frac{1}{m} \sum_{i=1}^m (Wx_i - y_i)^2$$
$$cost(W, b) = \frac{1}{m} \sum_{i=1}^m (H(x_i) - y_i)^2$$  

$Wx_i$, $H(x_i)$: expected value  
$y_i$: actual value  
$(H(x_i) - y_i)^2$: loss, error  
$\frac{1}{m} \sum_{i=1}^m ()$: mean value of loss

## Goal

find $W$ and $b$ which minimize $cost(W,b)$

## How to find W and b: Gradient Descent Algorithm 

$$W := W - \alpha \frac{\partial cost(W,b)}{\partial W}$$
$$b := b - \alpha \frac{\partial cost(W,b)}{\partial b}$$  

$\alpha$: learning rate  
$\frac{\partial z}{\partial x}$: gradient

## Exercise1

In [7]:
import tensorflow as tf

### $H(x) = Wx + b$

In [53]:
x_data = [1., 2., 3., 4., 5.]
y_data = [1., 2., 3., 4., 5.]

W = tf.Variable(2.9)  # random value
b = tf.Variable(0.5)  # random value

# hypothesis = W * x + b
hypothesis = W * x_data + b

### $cost(W, b) = \frac{1}{m} \sum_{i=1}^m (H(x_i) - y_i)^2$

In [17]:
cost = tf.reduce_mean(tf.square(hypothesis - y_data))

* tf.reduce_mean()

In [63]:
v = [1., 2., 3., 4.]
tf.reduce_mean(v)  # 2.5

2.5


* tf.square()

In [64]:
tem = tf.square(3)  # 9

9


### minimize $cost(W,b)$

In [66]:
# learning_rate initialize
learning_rate = 0.01

# Gradient descent
for i in range(100):    
    with tf.GradientTape() as tape:
        hypothesis = W * x_data + b
        cost = tf.reduce_mean(tf.square(hypothesis - y_data))
        
    W_grad, b_grad = tape.gradient(cost, [W, b])
    
    W.assign_sub(learning_rate * W_grad)  # W = W - learning_rate*dcost_dW
    b.assign_sub(learning_rate * b_grad)  # b = b - learning_rate*dcost_db
    
    if i%10 == 0:
        print("{:5}|{:10.4}|{:10.4}|{:10.6}".format(i, W.numpy(), b.numpy(), cost.numpy()))

    0|     1.001| -0.004427|3.59126e-06
   10|     1.001| -0.004279|3.35605e-06
   20|     1.001| -0.004137|3.13639e-06
   30|     1.001| -0.003999|2.9308e-06
   40|     1.001| -0.003866|2.73909e-06
   50|     1.001| -0.003737|2.55945e-06
   60|     1.001| -0.003613|2.39215e-06
   70|     1.001| -0.003493|2.23554e-06
   80|     1.001| -0.003376|2.08921e-06
   90|     1.001| -0.003264|1.95242e-06


* tf.GradientTape()

In [45]:
x = tf.constant(3.)

with tf.GradientTape() as tape:
    tape.watch(x)
    y = x * x

dy_dx = tape.gradient(y, x)  # 6.0

tf.Tensor(6.0, shape=(), dtype=float32)


In [46]:
z = tf.Variable(3.0)

with tf.GradientTape() as tape:
    y = z * z  # z is automatically watched since tf.Variable is trainable variable
    
dy_dz = tape.gradient(y, z)  # 6.0

6.0


In [54]:
a = tf.constant(2.)
d = tf.constant(5.)

with tf.GradientTape() as tape:
    tape.watch([a, d])
    c = a*a + d*d
    
dc_da, dc_dd = tape.gradient(c, [a, d])  # 4, 10

### Predict

In [67]:
print(W * 5. + b)  # expected value: 5
print(W * 2.5 + b)  # expected value: 2.5

tf.Tensor(5.001219, shape=(), dtype=float32)
tf.Tensor(2.4990265, shape=(), dtype=float32)


## Exercise2

In [68]:
import numpy as np

### numpy training

In [103]:
vector_form = np.array([1, 2])
print(vector_form.T)
print(vector_form == vector_form.T)

array_form = np.array([[1, 2]])
print(array_form.T)
print(array_form == array_form.T)

[1 2]
[ True  True]
[[1]
 [2]]
[[ True False]
 [False  True]]


In [104]:
array = np.array([[1, 2], [3, 4]])
vector = np.array([1, 1])
arr = np.array([[1, 1]])

# *: dot product
# array * vector = vector * array = vector
print(array.dot(vector))
print(vector.dot(array))
print(vector.dot(arr.T))

# array * array = array
print(array.dot(arr.T))

[3 7]
[3 7]
[4 6]
[2]
[[3]
 [7]]


In [113]:
vector_t = np.array([1, 2, 3])
array_t = np.array([[0, 1], [2, 3]])
num = np.array([1])  # or 1

print(vector_t + num)
print(array_t + num)
print(tf.square(vector_t))

[2 3 4]
[[1 2]
 [3 4]]
tf.Tensor([1 4 9], shape=(3,), dtype=int64)


### $H(X) = XW + b$

In [121]:
# data and label
X1 = np.array([73, 80, 75])
X2 = np.array([93, 88, 93])
X3 = np.array([89, 91, 90])
X4 = np.array([96, 98, 100])
X5 = np.array([73, 66, 70])
X_data = np.array([X1, X2, X3, X4, X5])  # array

y1 = 152
y2 = 185
y3 = 180
y4 = 196
y5 = 142
Y_data = np.array([y1, y2, y3, y4, y5])  # vector

# # weights and bias
# w1 = tf.Variable(10.)
# w2 = tf.Variable(10.)
# w3 = tf.Variable(10.)
# b_2 = tf.Variable(10.)
# W_2 = np.array([w1, w2, w3])  # vector

W_2 = tf.Variable(tf.random_normal([3, 1]))
b_2 = tf.Variable(tf.random_normal([1]))

hypothesis_2 = X_data.dot(W_2) + b_2  # vector

ValueError: setting an array element with a sequence.

### $cost(W,b) = \frac{1}{m} \sum_{i=1}^m (H(X_i) - Y_i)^2$

In [116]:
cost_2 = tf.reduce_mean(tf.square(hypothesis_2 - Y_data))

tf.Tensor(5793889.5, shape=(), dtype=float32)


### minimize $cost(W,b)$

In [120]:
# learning_rate
learning_rate_2 = 0.000001

# gradient descent
for i in range(1000):
    with tf.GradientTape() as tape:
        hypothesis_2 = X1.dot() + b_2
        cost_2 = tf.reduce_mean(tf.square(hypothesis_2 - Y_data))
    
    w1_grad, w2_grad, w3_grad, b_2_grad = tape.gradient(cost_2, [w1, w2, w3, b_2])
    
    w1.assign_sub(learning_rate_2 * w1_grad)
    w2.assign_sub(learning_rate_2 * w2_grad)
    w3.assign_sub(learning_rate_2 * w3_grad)
    b_2.assign_sub(learning_rate_2 * b_2_grad)
    
    if i%50 == 0:
        print("{:5}|{:12.4f}".format(i, cost.numpy()))

AttributeError: 'numpy.ndarray' object has no attribute '_id'