# 5. 딥러닝에 필요한 최적화 이론

#### Scikit-Learn을 이용한 선형회귀

In [1]:
# training data
x_train = [[1], [2], [3], [4]]
y_train = [[0], [-1], [-2], [-3]]

# Scikit-Learn의 선형회귀 예제
from sklearn import linear_model
reg = linear_model.LinearRegression()
reg.fit(x_train, y_train)

print(reg.score(x_train,y_train))
print(reg.coef_)
print(reg.intercept_)
print(reg.predict([[5]]))

1.0
[[-1.]]
[1.]
[[-4.]]


In [2]:
# TensorFlow의 선형회귀 예제
import tensorflow as tf

# Model Parameter
W = tf.Variable([.3], dtype=tf.float32)
b = tf.Variable([-.3], dtype=tf.float32)

@tf.function
def linear_model(x):
    return W * x + b

@tf.function
def loss(y,predict):
    return tf.reduce_sum(tf.square(y-predict))

# training data
x_train = [[1], [2], [3], [4]]
y_train = [[0], [-1], [-2], [-3]]

MaxIter = 1000
lr = 0.01

for i in range(MaxIter):
    with tf.GradientTape() as tape:
        current_loss = loss(y_train, linear_model(x_train))
        dw, db = tape.gradient(current_loss, [W, b])
        
        if i%100 == 0:
            print(i, W.numpy(), b.numpy(), current_loss.numpy())
        
        W.assign_sub(lr * dw)
        b.assign_sub(lr * db)

0 [0.3] [-0.3] 23.66
100 [-0.84079814] [0.53192717] 0.14636436
200 [-0.95227844] [0.8596927] 0.01315132
300 [-0.98569524] [0.9579423] 0.0011816786
400 [-0.99571204] [0.98739296] 0.000106178064
500 [-0.99871475] [0.99622124] 9.539404e-06
600 [-0.9996148] [0.9988674] 8.568733e-07
700 [-0.99988455] [0.99966055] 7.6948666e-08
800 [-0.99996537] [0.99989825] 6.908479e-09
900 [-0.99998957] [0.99996936] 6.2447114e-10


## 5.1 딥러닝에 나타나는 최적화 문제

In [3]:
import tensorflow as tf
from tensorflow.keras import layers

x_train = [[1.], [2.], [3.], [4.]]
y_train = [[0.], [-1.], [-2.], [-3.]]

In [4]:
x = layers.Input(shape=(1))
y = layers.Dense(1)(x)
linear_model = tf.keras.Model(inputs=x, outputs=y)

W, b = linear_model.weights
loss = tf.keras.losses.MeanSquaredError()

linear_model.compile(loss=loss, optimizer=tf.keras.optimizers.SGD(0.01))
_ = linear_model.fit(x_train, y_train, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [5]:
linear_model = tf.keras.Sequential([
    layers.InputLayer(input_shape=(1)),
    layers.Dense(1)
])

W, b = linear_model.weights
loss = tf.keras.losses.MeanSquaredError()

linear_model.compile(loss=loss, optimizer=tf.keras.optimizers.SGD(0.01))
_ = linear_model.fit(x_train, y_train, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [6]:
class MyLinearModel(tf.keras.Model):
  def __init__(self):
    super(MyLinearModel, self).__init__()
    self.y = layers.Dense(1)

  def call(self, x):
    return self.y(x)

linear_model = MyLinearModel()
loss = tf.keras.losses.MeanSquaredError()
linear_model.compile(loss=loss, optimizer=tf.keras.optimizers.SGD(0.01))
_ = linear_model.fit(x_train, y_train, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
