# 04 - Multi-variable Linear Regression

<img width="200" src="https://i.imgur.com/hbPVe1T.png">


In [1]:
import tensorflow as tf
import numpy as np

tf.__version__

'2.12.0'

# Multi-variable linear regression
Predicting exam score - regression using three inputs (x1, x2, x3)

x1 (quiz 1) | x2 (quiz 2) | x3 (mid 1) | Y (final)
---- | ---- | ----| ----
73 | 80 | 75 | 152
93 | 88 | 93 | 185
89 | 91 | 90 | 180
96 | 98 | 100 | 196
73 | 66 | 70 | 142


Test Scores for General Psychology ( https://goo.gl/g2T8Kp )

# Matrix multiplication

## dot product(=scalar product, 내적)
<img src="https://www.mathsisfun.com/algebra/images/matrix-multiply-a.svg" >


https://www.mathsisfun.com/algebra/matrix-multiplying.html

# Multi-feature regression

### Hypothesis

$$ H(x) = w x + b $$

$$ H(x_1, x_2, x_3) = w_1 x_1 + w_2 x_2 + w_3 x_3 + b $$

# Hypothesis using matrix

$$ H(x_1, x_2, x_3) = \underline{w_1 x_1 + w_2 x_2 + w_3 x_3} + b $$

$$ w_1 x_1 + w_2 x_2 + w_3 x_3 $$ 

$$ \begin{pmatrix} w_{ 1 } & w_{ 2 } & w_{ 3 } \end{pmatrix}\cdot \begin{pmatrix} x_{ 1 } \\ x_{ 2 } \\ x_{ 3 } \end{pmatrix} $$

$$ WX $$ (W, X 는 matrix)

# Hypothesis without b

$$ H(x_1, x_2, x_3) = w_1 x_1 + w_2 x_2 + w_3 x_3 + b$$

$$ = b + w_1 x_1 + w_2 x_2 + w_3 x_3 $$

$$ = \begin{pmatrix} b & x_{ 1 } & x_{ 2 } & x_{ 3 } \end{pmatrix}\cdot \begin{pmatrix} 1 \\ w_{ 1 } \\ w_{ 2 } \\ w_{ 3 } \end{pmatrix} $$

$$ = XW $$



# Hypothesis using matrix 

### Many x instances

$$ \begin{pmatrix} x_{ 11 } & x_{ 12 } & x_{ 13 } \\ x_{ 21 } & x_{ 22 } & x_{ 23 } \\ x_{ 31 } & x_{ 32 } & x_{ 33 }\\ x_{ 41 } & x_{ 42 } & x_{ 43 }\\ x_{ 51 } & x_{ 52 } & x_{ 53 }\end{pmatrix} \cdot \begin{pmatrix} w_{ 1 } \\ w_{ 2 } \\ w_{ 3 } \end{pmatrix}=\begin{pmatrix} x_{ 11 }w_{ 1 }+x_{ 12 }w_{ 2 }+x_{ 13 }w_{ 3 } \\ x_{ 21 }w_{ 1 }+x_{ 22 }w_{ 2 }+x_{ 23 }w_{ 3 }\\ x_{ 31 }w_{ 1 }+x_{ 32 }w_{ 2 }+x_{ 33 }w_{ 3 } \\ x_{ 41 }w_{ 1 }+x_{ 42 }w_{ 2 }+x_{ 43 }w_{ 3 } \\ x_{ 51 }w_{ 1 }+x_{ 52 }w_{ 2 }+x_{ 53 }w_{ 3 } \end{pmatrix} $$

$$ [5, 3] \cdot [3, 1] = [5, 1] $$

$$ H(X) = XW $$

5는 데이터(instance)의 수, 3은 변수(feature)의 수, 1은 결과

# Hypothesis using matrix (n output)

$$ [n, 3] \cdot [?, ?] = [n, 2] $$

$$ H(X) = XW $$

* n은 데이터(instance)의 개수, 2는 결과 값의 개수로 주어진다.
* 이때, W [?, ?] ⇒ [3, 2]

# WX vs XW

### Theory (Lecture) :
 $$ H(x) = Wx + b  $$

### TensorFlow (Implementation) :

$$ H(X) = XW $$

# Simple Example (2 variables)

x1 | x2 | y
---- | ---- | ----
1  |  0  |  1
0  |  2  |  2
3  |  0  |  3
0  |  4  |  4
5  |  0  |  5

In [2]:
tf.random.set_seed(0)  # for reproducibility

In [20]:
x1_data = [1, 0, 3, 0, 5]
x2_data = [0, 2, 0, 4, 0]
y_data  = [1, 2, 3, 4, 5]

W1 = tf.Variable(tf.random.uniform([1], -1.0, 1.0))
W2 = tf.Variable(tf.random.uniform([1], -1.0, 1.0))
b  = tf.Variable(tf.random.uniform([1], -1.0, 1.0))

learning_rate = tf.Variable(0.001)

for i in range(1000+1):
    with tf.GradientTape() as tape:
        hypothesis = W1 * x1_data + W2 * x2_data + b
        cost = tf.reduce_mean(tf.square(hypothesis - y_data))
    W1_grad, W2_grad, b_grad = tape.gradient(cost, [W1, W2, b])
    W1.assign_sub(learning_rate * W1_grad)
    W2.assign_sub(learning_rate * W2_grad)
    b.assign_sub(learning_rate * b_grad)

    if i % 50 == 0:
        print("{:5} | {:10.6f} | {:10.4f} | {:10.4f} | {:10.6f}".format(
          i, cost.numpy(), W1.numpy()[0], W2.numpy()[0], b.numpy()[0]))


    0 |  15.836255 |    -0.3235 |     0.9638 |  -0.570880
   50 |   3.479142 |     0.4034 |     1.0201 |  -0.359012
  100 |   0.768021 |     0.7440 |     1.0433 |  -0.259107
  150 |   0.172895 |     0.9034 |     1.0519 |  -0.211398
  200 |   0.042070 |     0.9779 |     1.0544 |  -0.187986
  250 |   0.013180 |     1.0126 |     1.0544 |  -0.175865
  300 |   0.006694 |     1.0286 |     1.0534 |  -0.168982
  350 |   0.005144 |     1.0358 |     1.0523 |  -0.164528
  400 |   0.004688 |     1.0388 |     1.0511 |  -0.161200
  450 |   0.004478 |     1.0400 |     1.0501 |  -0.158401
  500 |   0.004328 |     1.0402 |     1.0491 |  -0.155857
  550 |   0.004194 |     1.0399 |     1.0482 |  -0.153443
  600 |   0.004067 |     1.0395 |     1.0474 |  -0.151103
  650 |   0.003944 |     1.0390 |     1.0466 |  -0.148813
  700 |   0.003826 |     1.0385 |     1.0458 |  -0.146563
  750 |   0.003711 |     1.0379 |     1.0451 |  -0.144348
  800 |   0.003599 |     1.0374 |     1.0444 |  -0.142165
  850 |   0.00

# Simple Example (2 variables with Matrix)

In [21]:
x_data = [
    [1., 0., 3., 0., 5.],
    [0., 2., 0., 4., 0.]
]
y_data  = [1, 2, 3, 4, 5]

W = tf.Variable(tf.random.uniform([1, 2], -1.0, 1.0))
b = tf.Variable(tf.random.uniform([1], -1.0, 1.0))

learning_rate = tf.Variable(0.001)

for i in range(1000+1):
    with tf.GradientTape() as tape:
        hypothesis = tf.matmul(W, x_data) + b # (1, 2) * (2, 5) = (1, 5)
        cost = tf.reduce_mean(tf.square(hypothesis - y_data))

        W_grad, b_grad = tape.gradient(cost, [W, b])
        W.assign_sub(learning_rate * W_grad)
        b.assign_sub(learning_rate * b_grad)
    
    if i % 50 == 0:
        print("{:5} | {:10.6f} | {:10.4f} | {:10.4f} | {:10.6f}".format(
            i, cost.numpy(), W.numpy()[0][0], W.numpy()[0][1], b.numpy()[0]))

    0 |   5.365101 |     0.0477 |     0.9469 |   0.360085
   50 |   1.251087 |     0.4749 |     0.9231 |   0.455467
  100 |   0.326902 |     0.6782 |     0.9011 |   0.492705
  150 |   0.114357 |     0.7759 |     0.8842 |   0.503913
  200 |   0.062980 |     0.8237 |     0.8724 |   0.503729
  250 |   0.049111 |     0.8477 |     0.8649 |   0.498749
  300 |   0.044394 |     0.8603 |     0.8604 |   0.491892
  350 |   0.042091 |     0.8675 |     0.8582 |   0.484416
  400 |   0.040505 |     0.8721 |     0.8574 |   0.476840
  450 |   0.039172 |     0.8753 |     0.8576 |   0.469358
  500 |   0.037950 |     0.8778 |     0.8585 |   0.462033
  550 |   0.036791 |     0.8801 |     0.8599 |   0.454872
  600 |   0.035678 |     0.8821 |     0.8614 |   0.447867
  650 |   0.034603 |     0.8840 |     0.8632 |   0.441004
  700 |   0.033562 |     0.8858 |     0.8650 |   0.434271
  750 |   0.032553 |     0.8875 |     0.8669 |   0.427659
  800 |   0.031574 |     0.8893 |     0.8688 |   0.421159
  850 |   0.03

# Hypothesis without b

In [22]:
import tensorflow as tf

# 앞의 코드에서 bias(b)를 행렬에 추가
x_data = [
    [1., 1., 1., 1., 1.], # bias(b)
    [1., 0., 3., 0., 5.], 
    [0., 2., 0., 4., 0.]
]
y_data  = [1, 2, 3, 4, 5]

W = tf.Variable(tf.random.uniform([1, 3], -1.0, 1.0)) # [1, 3]으로 변경하고, b 삭제

learning_rate = 0.001
optimizer = tf.keras.optimizers.experimental.SGD(learning_rate)

for i in range(1000+1):
    with tf.GradientTape() as tape:
        hypothesis = tf.matmul(W, x_data) # b가 없다
        cost = tf.reduce_mean(tf.square(hypothesis - y_data))

    grads = tape.gradient(cost, [W])
    optimizer.apply_gradients(grads_and_vars=zip(grads,[W]))
    if i % 50 == 0:
        print("{:5} | {:10.6f} | {:10.4f} | {:10.4f} | {:10.4f}".format(
            i, cost.numpy(), W.numpy()[0][0], W.numpy()[0][1], W.numpy()[0][2]))

    0 |   5.061083 |    -0.1876 |     0.7384 |     0.0218
   50 |   1.884164 |    -0.0478 |     0.8843 |     0.3560
  100 |   0.740304 |     0.0314 |     0.9429 |     0.5693
  150 |   0.301569 |     0.0769 |     0.9642 |     0.7061
  200 |   0.126378 |     0.1033 |     0.9703 |     0.7942
  250 |   0.054731 |     0.1185 |     0.9707 |     0.8511
  300 |   0.025011 |     0.1271 |     0.9694 |     0.8881
  350 |   0.012559 |     0.1317 |     0.9680 |     0.9123
  400 |   0.007287 |     0.1338 |     0.9669 |     0.9281
  450 |   0.005016 |     0.1344 |     0.9662 |     0.9386
  500 |   0.004005 |     0.1340 |     0.9658 |     0.9456
  550 |   0.003523 |     0.1330 |     0.9658 |     0.9503
  600 |   0.003266 |     0.1317 |     0.9659 |     0.9536
  650 |   0.003104 |     0.1301 |     0.9661 |     0.9560
  700 |   0.002984 |     0.1285 |     0.9664 |     0.9577
  750 |   0.002883 |     0.1267 |     0.9668 |     0.9590
  800 |   0.002791 |     0.1249 |     0.9673 |     0.9601
  850 |   0.00

# Custom Gradient
* f.keras.optimizers.experimental.XXX(): optimizer
* optimizer.apply_gradients(): update

In [23]:
# Multi-variable linear regression (1)

X = tf.constant([[1., 2.], 
                 [3., 4.]])
y = tf.constant([[1.5], [3.5]])

W = tf.Variable(tf.random.normal([2, 1]))
b = tf.Variable(tf.random.normal([1]))

# Create an optimizer
optimizer = tf.keras.optimizers.experimental.SGD(learning_rate=0.01)

n_epoch = 1000+1
print("epoch | cost")
for i in range(n_epoch):
    # Use tf.GradientTape() to record the gradient of the cost function
    with tf.GradientTape() as tape:
        y_pred = tf.matmul(X, W) + b
        cost = tf.reduce_mean(tf.square(y_pred - y))

    # calculates the gradients of the loss
    grads = tape.gradient(cost, [W, b])
    
    # updates parameters (W and b)
    optimizer.apply_gradients(grads_and_vars=zip(grads, [W, b]))
    if i % 50 == 0:
        print("{:5} | {:10.6f}".format(i, cost.numpy()))


epoch | cost
    0 |   6.079908
   50 |   0.024227
  100 |   0.016564
  150 |   0.011325
  200 |   0.007743
  250 |   0.005294
  300 |   0.003620
  350 |   0.002475
  400 |   0.001692
  450 |   0.001157
  500 |   0.000791
  550 |   0.000541
  600 |   0.000370
  650 |   0.000253
  700 |   0.000173
  750 |   0.000118
  800 |   0.000081
  850 |   0.000055
  900 |   0.000038
  950 |   0.000026
 1000 |   0.000018


# Predicting exam score
regression using three inputs (x1, x2, x3)

x1 (quiz 1) | x2 (quiz 2) | x3 (mid 1) | Y (final)
---- | ---- | ----| ----
73 | 80 | 75 | 152
93 | 88 | 93 | 185
89 | 91 | 90 | 180
96 | 98 | 100 | 196
73 | 66 | 70 | 142

In [24]:
tf.random.set_seed(1)  # for reproducibility


```python
x1 = [ 73.,  93.,  89.,  96.,  73.]
x2 = [ 80.,  88.,  91.,  98.,  66.]
x3 = [ 75.,  93.,  90., 100.,  70.]
Y  = [152., 185., 180., 196., 142.]

# weights
w1 = tf.Variable(10.)
w2 = tf.Variable(10.)
w3 = tf.Variable(10.)
b  = tf.Variable(10.)

hypothesis = w1 * x1 +  w2 * x2 + w3 * x3 + b        
```

In [25]:
# data and label
x1 = [ 73.,  93.,  89.,  96.,  73.]
x2 = [ 80.,  88.,  91.,  98.,  66.]
x3 = [ 75.,  93.,  90., 100.,  70.]
Y  = [152., 185., 180., 196., 142.]

# weights
w1 = tf.Variable(10.)
w2 = tf.Variable(10.)
w3 = tf.Variable(10.)
b  = tf.Variable(10.)

learning_rate = 0.000001

for i in range(1000+1):
    # tf.GradientTape() to record the gradient of the cost function
    with tf.GradientTape() as tape:
        hypothesis = w1 * x1 +  w2 * x2 + w3 * x3 + b
        cost = tf.reduce_mean(tf.square(hypothesis - Y))
    # calculates the gradients of the cost
    w1_grad, w2_grad, w3_grad, b_grad = tape.gradient(cost, [w1, w2, w3, b])
    
    # update w1,w2,w3 and b
    w1.assign_sub(learning_rate * w1_grad)
    w2.assign_sub(learning_rate * w2_grad)
    w3.assign_sub(learning_rate * w3_grad)
    b.assign_sub(learning_rate * b_grad)

    if i % 50 == 0:
      print("{:5} | {:12.4f}".format(i, cost.numpy()))

    0 | 5793889.5000
   50 |   64291.1562
  100 |     715.2903
  150 |       9.8461
  200 |       2.0152
  250 |       1.9252
  300 |       1.9210
  350 |       1.9177
  400 |       1.9145
  450 |       1.9114
  500 |       1.9081
  550 |       1.9050
  600 |       1.9018
  650 |       1.8986
  700 |       1.8955
  750 |       1.8923
  800 |       1.8892
  850 |       1.8861
  900 |       1.8829
  950 |       1.8798
 1000 |       1.8767


## Multi-variable linear regression (1)
*  random  초기화: tf.random.normal()


In [26]:
# data and label
x1 = [ 73.,  93.,  89.,  96.,  73.]
x2 = [ 80.,  88.,  91.,  98.,  66.]
x3 = [ 75.,  93.,  90., 100.,  70.]
Y  = [152., 185., 180., 196., 142.]

# random weights
w1 = tf.Variable(tf.random.normal([1]))
w2 = tf.Variable(tf.random.normal([1]))
w3 = tf.Variable(tf.random.normal([1]))
b  = tf.Variable(tf.random.normal([1]))

learning_rate = 0.000001

for i in range(1000+1):
    # tf.GradientTape() to record the gradient of the cost function
    with tf.GradientTape() as tape:
        hypothesis = w1 * x1 +  w2 * x2 + w3 * x3 + b
        cost = tf.reduce_mean(tf.square(hypothesis - Y))
    # calculates the gradients of the cost
    w1_grad, w2_grad, w3_grad, b_grad = tape.gradient(cost, [w1, w2, w3, b])
    
    # update w1,w2,w3 and b
    w1.assign_sub(learning_rate * w1_grad)
    w2.assign_sub(learning_rate * w2_grad)
    w3.assign_sub(learning_rate * w3_grad)
    b.assign_sub(learning_rate * b_grad)

    if i % 50 == 0:
      print("{:5} | {:12.4f}".format(i, cost.numpy()))


    0 |   72715.3281
   50 |     831.9130
  100 |      34.2250
  150 |      25.3068
  200 |      25.1411
  250 |      25.0727
  300 |      25.0054
  350 |      24.9385
  400 |      24.8717
  450 |      24.8050
  500 |      24.7385
  550 |      24.6723
  600 |      24.6062
  650 |      24.5403
  700 |      24.4745
  750 |      24.4090
  800 |      24.3435
  850 |      24.2784
  900 |      24.2134
  950 |      24.1485
 1000 |      24.0838


## Multi-variable linear regression (2)

* Matrix 사용

In [27]:
data = np.array([
    # X1,   X2,    X3,   y
    [ 73.,  80.,  75., 152. ],
    [ 93.,  88.,  93., 185. ],
    [ 89.,  91.,  90., 180. ],
    [ 96.,  98., 100., 196. ],
    [ 73.,  66.,  70., 142. ]
], dtype=np.float32)

# slice data
X = data[:, :-1]
y = data[:, [-1]]

W = tf.Variable(tf.random.normal([3, 1]))
b = tf.Variable(tf.random.normal([1]))

learning_rate = 0.000001

# hypothesis, prediction function
def predict(X):
    return tf.matmul(X, W) + b

print("epoch | cost")

n_epochs = 2000
for i in range(n_epochs+1):
    # tf.GradientTape() to record the gradient of the cost function
    with tf.GradientTape() as tape:
        cost = tf.reduce_mean((tf.square(predict(X) - y)))

    # calculates the gradients of the loss
    W_grad, b_grad = tape.gradient(cost, [W, b])

    # updates parameters (W and b)
    W.assign_sub(learning_rate * W_grad)
    b.assign_sub(learning_rate * b_grad)
    
    if i % 100 == 0:
        print("{:5} | {:10.4f}".format(i, cost.numpy()))

epoch | cost
    0 | 11871.5801
  100 |     5.3927
  200 |     3.9299
  300 |     3.9279
  400 |     3.9260
  500 |     3.9242
  600 |     3.9224
  700 |     3.9205
  800 |     3.9187
  900 |     3.9169
 1000 |     3.9151
 1100 |     3.9132
 1200 |     3.9114
 1300 |     3.9095
 1400 |     3.9077
 1500 |     3.9059
 1600 |     3.9041
 1700 |     3.9022
 1800 |     3.9004
 1900 |     3.8986
 2000 |     3.8967


In [28]:
W.numpy()

array([[0.19330743],
       [0.07296816],
       [1.7452403 ]], dtype=float32)

In [29]:
b.numpy()

array([-1.1811429], dtype=float32)

In [30]:
tf.matmul(X, W) + b

<tf.Tensor: shape=(5, 1), dtype=float32, numpy=
array([[149.66078],
       [185.52501],
       [179.73495],
       [199.05128],
       [139.91302]], dtype=float32)>

## predict

In [31]:
Y # labels, 실제값

[152.0, 185.0, 180.0, 196.0, 142.0]

In [32]:
predict(X).numpy() # prediction, 예측값

array([[149.66078],
       [185.52501],
       [179.73495],
       [199.05128],
       [139.91302]], dtype=float32)

In [33]:
# 새로운 데이터에 대한 예측

predict([[ 89.,  95.,  92.],[ 84.,  92.,  85.]]).numpy() 

array([[183.5173 ],
       [170.11519]], dtype=float32)

# TensorFlow2 Style

In [40]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, Input
import seaborn as sns

tf.random.set_seed(2)

In [41]:
data = np.array([
    # X1,   X2,    X3,   y
    [ 73.,  80.,  75., 152. ],
    [ 93.,  88.,  93., 185. ],
    [ 89.,  91.,  90., 180. ],
    [ 96.,  98., 100., 196. ],
    [ 73.,  66.,  70., 142. ]
], dtype=np.float32)

# slice data
X = data[:, :-1]
y = data[:, [-1]]

In [42]:
linear_regression_model = tf.keras.Sequential([
    Input(shape=([3,])),
    layers.Dense(units=1)
])

linear_regression_model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_1 (Dense)             (None, 1)                 4         
                                                                 
Total params: 4
Trainable params: 4
Non-trainable params: 0
_________________________________________________________________


In [43]:
linear_regression_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.000001),
    loss='mean_absolute_error')

In [45]:
history = linear_regression_model.fit(
    X, y, epochs=2000, verbose=2)

Epoch 1/2000
1/1 - 0s - loss: 170.3459 - 7ms/epoch - 7ms/step
Epoch 2/2000
1/1 - 0s - loss: 170.3457 - 7ms/epoch - 7ms/step
Epoch 3/2000
1/1 - 0s - loss: 170.3454 - 7ms/epoch - 7ms/step
Epoch 4/2000
1/1 - 0s - loss: 170.3452 - 6ms/epoch - 6ms/step
Epoch 5/2000
1/1 - 0s - loss: 170.3449 - 6ms/epoch - 6ms/step
Epoch 6/2000
1/1 - 0s - loss: 170.3447 - 6ms/epoch - 6ms/step
Epoch 7/2000
1/1 - 0s - loss: 170.3444 - 9ms/epoch - 9ms/step
Epoch 8/2000
1/1 - 0s - loss: 170.3441 - 6ms/epoch - 6ms/step
Epoch 9/2000
1/1 - 0s - loss: 170.3439 - 5ms/epoch - 5ms/step
Epoch 10/2000
1/1 - 0s - loss: 170.3436 - 5ms/epoch - 5ms/step
Epoch 11/2000
1/1 - 0s - loss: 170.3434 - 6ms/epoch - 6ms/step
Epoch 12/2000
1/1 - 0s - loss: 170.3431 - 6ms/epoch - 6ms/step
Epoch 13/2000
1/1 - 0s - loss: 170.3428 - 6ms/epoch - 6ms/step
Epoch 14/2000
1/1 - 0s - loss: 170.3426 - 6ms/epoch - 6ms/step
Epoch 15/2000
1/1 - 0s - loss: 170.3423 - 6ms/epoch - 6ms/step
Epoch 16/2000
1/1 - 0s - loss: 170.3421 - 6ms/epoch - 6ms/step
E

In [46]:
linear_regression_model.predict(X)



array([[ 2.7335303 ],
       [ 0.13092245],
       [ 1.6102964 ],
       [ 3.5104887 ],
       [-2.1336992 ]], dtype=float32)

Load Data

# Load Data with Multi-variable-Linear-Regression

### Load Data

In [47]:
xy = np.loadtxt('data-01-test-score.csv', delimiter=',', dtype=np.float32)
x_train = xy[:, 0:-1]
y_train = xy[:, [-1]]

print(x_train.shape, y_train.shape)
print(xy)

(25, 3) (25, 1)
[[ 73.  80.  75. 152.]
 [ 93.  88.  93. 185.]
 [ 89.  91.  90. 180.]
 [ 96.  98. 100. 196.]
 [ 73.  66.  70. 142.]
 [ 53.  46.  55. 101.]
 [ 69.  74.  77. 149.]
 [ 47.  56.  60. 115.]
 [ 87.  79.  90. 175.]
 [ 79.  70.  88. 164.]
 [ 69.  70.  73. 141.]
 [ 70.  65.  74. 141.]
 [ 93.  95.  91. 184.]
 [ 79.  80.  73. 152.]
 [ 70.  73.  78. 148.]
 [ 93.  89.  96. 192.]
 [ 78.  75.  68. 147.]
 [ 81.  90.  93. 183.]
 [ 88.  92.  86. 177.]
 [ 78.  83.  77. 159.]
 [ 82.  86.  90. 177.]
 [ 86.  82.  89. 175.]
 [ 78.  83.  85. 175.]
 [ 76.  83.  71. 149.]
 [ 96.  93.  95. 192.]]


In [72]:
W = tf.Variable(tf.random.normal([3, 1]))
b = tf.Variable(tf.random.normal([1]))

learning_rate = 1e-5

# hypothesis, prediction function
def predict(X):
    return tf.matmul(X, W) + b

print("epoch | cost")

n_epochs = 2000
for i in range(n_epochs+1):
    # tf.GradientTape() to record the gradient of the cost function
    with tf.GradientTape() as tape:
        cost = tf.reduce_mean((tf.square(predict(x_train) - y_train)))

    # calculates the gradients of the loss
    W_grad, b_grad = tape.gradient(cost, [W, b])

    # updates parameters (W and b)
    W.assign_sub(learning_rate * W_grad)
    b.assign_sub(learning_rate * b_grad)
    
    if i % 100 == 0:
        print("{:5} | {:10.4f}".format(i, cost.numpy()))

epoch | cost
    0 | 37285.6172
  100 |    43.6932
  200 |    40.6736
  300 |    37.9030
  400 |    35.3604
  500 |    33.0271
  600 |    30.8854
  700 |    28.9194
  800 |    27.1145
  900 |    25.4572
 1000 |    23.9353
 1100 |    22.5376
 1200 |    21.2536
 1300 |    20.0741
 1400 |    18.9904
 1500 |    17.9945
 1600 |    17.0791
 1700 |    16.2377
 1800 |    15.4642
 1900 |    14.7528
 2000 |    14.0986


In [79]:
def predict(X):
    return tf.matmul(X, W) + b

test = tf.expand_dims(x_train[0], 0)
predict(test).numpy() # prediction, 예측값

array([[156.51265]], dtype=float32)

In [63]:
linear_regression_model = tf.keras.Sequential([
    Input(shape=([3,])),
    layers.Dense(units=1)
])

linear_regression_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
    loss='mean_absolute_error')

history = linear_regression_model.fit(x_train, y_train, epochs=2000, verbose=2)

Epoch 1/2000
1/1 - 1s - loss: 17.8235 - 644ms/epoch - 644ms/step
Epoch 2/2000
1/1 - 0s - loss: 17.5825 - 13ms/epoch - 13ms/step
Epoch 3/2000
1/1 - 0s - loss: 17.3415 - 6ms/epoch - 6ms/step
Epoch 4/2000
1/1 - 0s - loss: 17.1005 - 6ms/epoch - 6ms/step
Epoch 5/2000
1/1 - 0s - loss: 16.8595 - 7ms/epoch - 7ms/step
Epoch 6/2000
1/1 - 0s - loss: 16.6185 - 7ms/epoch - 7ms/step
Epoch 7/2000
1/1 - 0s - loss: 16.3775 - 6ms/epoch - 6ms/step
Epoch 8/2000
1/1 - 0s - loss: 16.1365 - 7ms/epoch - 7ms/step
Epoch 9/2000
1/1 - 0s - loss: 15.8955 - 7ms/epoch - 7ms/step
Epoch 10/2000
1/1 - 0s - loss: 15.6545 - 9ms/epoch - 9ms/step
Epoch 11/2000
1/1 - 0s - loss: 15.4135 - 6ms/epoch - 6ms/step
Epoch 12/2000
1/1 - 0s - loss: 15.1725 - 6ms/epoch - 6ms/step
Epoch 13/2000
1/1 - 0s - loss: 14.9315 - 6ms/epoch - 6ms/step
Epoch 14/2000
1/1 - 0s - loss: 14.6905 - 6ms/epoch - 6ms/step
Epoch 15/2000
1/1 - 0s - loss: 14.4495 - 6ms/epoch - 6ms/step
Epoch 16/2000
1/1 - 0s - loss: 14.2085 - 7ms/epoch - 7ms/step
Epoch 17/20

In [64]:
linear_regression_model.predict(x_train[0:1])



array([[152.79922]], dtype=float32)