#### 텐서와 연산

In [3]:
import tensorflow as tf

In [4]:
tf.constant([[1,2,3],[4,5,6]]) # matrix

<tf.Tensor: shape=(2, 3), dtype=int32, numpy=
array([[1, 2, 3],
       [4, 5, 6]])>

In [5]:
tf.constant(42) # scalar

<tf.Tensor: shape=(), dtype=int32, numpy=42>

In [6]:
t = tf.constant([[1.,2.,3.],[4.,5.,6.]])
t.shape, t.dtype

(TensorShape([2, 3]), tf.float32)

In [7]:
t[:, 1:]

<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[2., 3.],
       [5., 6.]], dtype=float32)>

In [8]:
t[..., 1]

<tf.Tensor: shape=(2,), dtype=float32, numpy=array([2., 5.], dtype=float32)>

In [9]:
t[..., 1, tf.newaxis] # 차원 추가

<tf.Tensor: shape=(2, 1), dtype=float32, numpy=
array([[2.],
       [5.]], dtype=float32)>

In [10]:
t + 10 # tf.add(t, 10)  / t.__add__(10)

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[11., 12., 13.],
       [14., 15., 16.]], dtype=float32)>

In [11]:
tf.square(t)

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[ 1.,  4.,  9.],
       [16., 25., 36.]], dtype=float32)>

In [12]:
t @ tf.transpose(t) # tf.matmul() ; 행렬곱

# GPU커널이 원소가 추가된 순서를 보장하지 않는 리듀스 알고리즘 사용
# tf.reduce_mean() = np.mean()
# tf.reduce_sum() = np.sum()
# tf.reduce_max() = np.max()
# tf.math.log() = np.log()
# tf.transpose(t) = t.T

<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[14., 32.],
       [32., 77.]], dtype=float32)>

In [13]:
# 저수준 API
from tensorflow import keras
K = keras.backend
K.square(K.transpose(t)) + 10

<tf.Tensor: shape=(3, 2), dtype=float32, numpy=
array([[11., 26.],
       [14., 35.],
       [19., 46.]], dtype=float32)>

#### 텐서와 넘파이

넘파이는 기본 64비트 사용  
텐서플로는 기본 32비트 사용  
넘파이 배열로 텐서 만들 때, dtype=tf.float32로 지정해야함

In [14]:
import numpy as np
a = np.array([2., 4., 5.])
tf.constant(a)

<tf.Tensor: shape=(3,), dtype=float64, numpy=array([2., 4., 5.])>

In [15]:
t.numpy() # np.array(t)

array([[1., 2., 3.],
       [4., 5., 6.]], dtype=float32)

In [16]:
tf.square(a)

<tf.Tensor: shape=(3,), dtype=float64, numpy=array([ 4., 16., 25.])>

In [17]:
np.square(t)

array([[ 1.,  4.,  9.],
       [16., 25., 36.]], dtype=float32)

#### 타입 변환 : 성능 감소 시킴  
-> 텐서플로는 어떤 타입 변환도 자동으로 수행하지 않음  
- 호환되지 않은 타입의 텐서로 연산 실행 => 예외발생 !  
ex. 정수 + 실수 -> 예외  
    32비트 + 64비트 -> 예외

In [18]:
tf.constant(2.) + tf.constant(40)

InvalidArgumentError: cannot compute AddV2 as input #1(zero-based) was expected to be a float tensor but is a int32 tensor [Op:AddV2] name: 

In [19]:
tf.constant(2.) + tf.constant(40., dtype=tf.float64)

InvalidArgumentError: cannot compute AddV2 as input #1(zero-based) was expected to be a float tensor but is a double tensor [Op:AddV2] name: 

In [20]:
t2 = tf.constant(40., dtype=tf.float64)
tf.constant(2.) + tf.cast(t2, tf.float32) # tf.cast() : 타입변환

<tf.Tensor: shape=(), dtype=float32, numpy=42.0>

#### 변수  
tf.Variable : 원소값 바꾸면 새로운 텐서 만들어짐

In [21]:
v = tf.Variable([[1., 2., 3.], [4., 5., 6.]])
v

<tf.Variable 'Variable:0' shape=(2, 3) dtype=float32, numpy=
array([[1., 2., 3.],
       [4., 5., 6.]], dtype=float32)>

In [22]:
# assign() : 변수값 변경 -> inplace됨 
v.assign(2 * v)

<tf.Variable 'UnreadVariable' shape=(2, 3) dtype=float32, numpy=
array([[ 2.,  4.,  6.],
       [ 8., 10., 12.]], dtype=float32)>

In [23]:
v[0, 1].assign(42)

<tf.Variable 'UnreadVariable' shape=(2, 3) dtype=float32, numpy=
array([[ 2., 42.,  6.],
       [ 8., 10., 12.]], dtype=float32)>

In [24]:
v[:, 2].assign([0., 1.])

<tf.Variable 'UnreadVariable' shape=(2, 3) dtype=float32, numpy=
array([[ 2., 42.,  0.],
       [ 8., 10.,  1.]], dtype=float32)>

In [25]:
# scatter_nd_update, scatter_update() : 개별 원소 수정
v.scatter_nd_update(indices = [[0,0], [1,2]], updates=[100., 200.])

<tf.Variable 'UnreadVariable' shape=(2, 3) dtype=float32, numpy=
array([[100.,  42.,   0.],
       [  8.,  10., 200.]], dtype=float32)>

사용자 정의 모델과 훈련 알고리즘

#### 사용자 정의 손실 함수

In [26]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

housing = fetch_california_housing()
X_train_full, X_test, y_train_full, y_test = train_test_split(
    housing.data, housing.target.reshape(-1, 1), random_state=42)
X_train, X_valid, y_train, y_valid = train_test_split(
    X_train_full, y_train_full, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_valid_scaled = scaler.transform(X_valid)
X_test_scaled = scaler.transform(X_test)

In [27]:
# 손실함수 : huber
# 평균제곱오차 : 큰 오차에 너무 과한 벌칙 가해 정확하지 않은 모델
# 평균절댓값오차 : 이상치에 관대해 훈련이 수렴되기까지 시간 걸림, 정밀하게 훈련되지 않음
def huber_fn(y_true, y_pred) :
    error = y_true - y_pred
    is_small_error = tf.abs(error) < 1
    squared_loss = tf.square(error) / 2
    linear_loss = tf.abs(error) - 0.5
    return tf.where(is_small_error, squared_loss, linear_loss)

In [28]:
input_shape = X_train.shape[1:]

model = keras.models.Sequential([
    keras.layers.Dense(30, activation="selu", kernel_initializer="lecun_normal",
                       input_shape=input_shape),
    keras.layers.Dense(1),
])

In [29]:
model.compile(loss=huber_fn, optimizer='nadam', metrics=['mae'])

In [30]:
model.fit(X_train_scaled, y_train, epochs=2, validation_data=(X_valid_scaled, y_valid))

Epoch 1/2
Epoch 2/2


<keras.src.callbacks.History at 0x15ee7d14b10>

#### 사용자 정의 요소를 가진 모델 저장하고 로드

In [31]:
def create_huber(threshold=1.0):
    def huber_fn(y_true, y_pred):
        error = y_true - y_pred
        is_small_error = tf.abs(error) < threshold
        squared_loss = tf.square(error) / 2
        linear_loss  = threshold * tf.abs(error) - threshold**2 / 2
        return tf.where(is_small_error, squared_loss, linear_loss)
    return huber_fn

In [32]:
model.compile(loss=create_huber(2.0), optimizer="nadam", metrics=["mae"])

In [33]:
model.fit(X_train_scaled, y_train, epochs=2,
          validation_data=(X_valid_scaled, y_valid))

Epoch 1/2
Epoch 2/2


<keras.src.callbacks.History at 0x15ee8fa5e90>

In [34]:
class HuberLoss(keras.losses.Loss):
    def __init__(self, threshold=1.0, **kwargs):
        self.threshold = threshold
        super().__init__(**kwargs)
    def call(self, y_true, y_pred):
        error = y_true - y_pred
        is_small_error = tf.abs(error) < self.threshold
        squared_loss = tf.square(error) / 2
        linear_loss  = self.threshold * tf.abs(error) - self.threshold**2 / 2
        return tf.where(is_small_error, squared_loss, linear_loss)
    def get_config(self):
        base_config = super().get_config()
        return {**base_config, "threshold": self.threshold}

In [35]:
model = keras.models.Sequential([
    keras.layers.Dense(30, activation="selu", kernel_initializer="lecun_normal",
                       input_shape=input_shape),
    keras.layers.Dense(1),
])

model.compile(loss=HuberLoss(2.), optimizer="nadam", metrics=["mae"])

model.fit(X_train_scaled, y_train, epochs=2,
          validation_data=(X_valid_scaled, y_valid))

Epoch 1/2
Epoch 2/2


<keras.src.callbacks.History at 0x15eea23dcd0>

#### 활성화함수, 초기화, 규제, 제한

In [36]:
def my_softplus(z) :  # = keras.activations.softplus() = tf.nn.softplus()
    return tf.math.log(tf.exp(z) + 1.0)

def my_glorot_initializer(shape, dtype=tf.float32) :  # = keras.initializers.glorot_normal()
    stddev = tf.sqrt(2. / (shape[0] + shape[1]))
    return tf.random_normal(shape, stddev = stddev, dtype=dtype)

def my_l1_regularizer(weights) :  # = keras.regularizers.l1(0.01)
    return tf.reduce_sum(tf.abs(0.01 * weights))

def my_positive_weights(weights) :  # 양수의 가중치만 남김 = keras.constraints.nonneg() = tf.nn.relu()
    return tf.where(weights < 0., tf.zeros_like(weights), weights)

In [37]:
layer = keras.layers.Dense(30, activation=my_softplus,
                          kernel_initializer = my_glorot_initializer,
                          kernel_regularizer=  my_l1_regularizer,
                          kernel_constraint = my_positive_weights)

#### 사용자 정의 지표  

지표는 미분이 가능하지 않거나 모든 곳에서 그레이디언트가 0이어도 괜찮다

In [38]:
model.compile(loss='mse', optimizer='nadam', metrics=[create_huber(2.0)])

In [39]:
# 진짜 양성 개수와 거짓 양성 개수를 기록하고, 필요할 때 정밀도 계산할 수 있는 객체 필요 => keras.metrics.Precision
precision = keras.metrics.Precision()
precision([0, 1, 1, 1, 0, 1, 0, 1], [1, 1, 0, 1, 0, 1, 0, 1])

<tf.Tensor: shape=(), dtype=float32, numpy=0.8>

In [40]:
precision([0, 1, 0, 0, 1, 0, 1, 1], [1, 0, 1, 1, 0, 0, 0, 0]) # 위에것까지 전체 정밀도
# 배치마다 점진적으로 업데이트 됨 -> 스트리밍 지표

<tf.Tensor: shape=(), dtype=float32, numpy=0.5>

In [41]:
precision.result() # 현재 지표값

<tf.Tensor: shape=(), dtype=float32, numpy=0.5>

In [42]:
precision.variables # 진짜 양성과 거짓 양성 기록

[<tf.Variable 'true_positives:0' shape=(1,) dtype=float32, numpy=array([4.], dtype=float32)>,
 <tf.Variable 'false_positives:0' shape=(1,) dtype=float32, numpy=array([4.], dtype=float32)>]

In [43]:
precision.reset_states() # 두 변수가 0.0으로 초기화(리셋)

#### 사용자 정의 층

keras.layers.Flatten, keras.layers.ReLU : 가중치 없는 층  
-> keras.layers.Lambda 층으로 가중치 필요없는 사용자 정의 층을 만들기 위해 감쌈

In [44]:
# 입력에 지수함수 적용하는 층
exponential_layer = keras.layers.Lambda(lambda x:tf.exp(x))

In [45]:
class MyDense(keras.layers.Layer):
    def __init__(self, units, activation=None, **kwargs):
        super().__init__(**kwargs)
        self.units = units
        self.activation = keras.activations.get(activation)

    def build(self, batch_input_shape):
        self.kernel = self.add_weight(
            name="kernel", shape=[batch_input_shape[-1], self.units],
            initializer="glorot_normal")
        self.bias = self.add_weight(
            name="bias", shape=[self.units], initializer="zeros")
        super().build(batch_input_shape) # 마지막에 호출해야 함
        
    def call(self, X):
        return self.activation(X @ self.kernel + self.bias)

    def compute_output_shape(self, batch_input_shape): # 층의 출력 크기 반환
        return tf.TensorShape(batch_input_shape.as_list()[:-1] + [self.units])

    def get_config(self):
        base_config = super().get_config()
        return {**base_config, "units": self.units,
                "activation": keras.activations.serialize(self.activation)}

In [46]:
# 훈련하는 동안 가우스 잡음 추가(규제 목적), 테스트 시 아무것도 하지 않는 층
class MyGaussianNoise(keras.layers.Layer) :
    def __init__(self, stddev, **kwargs) :
        super().__init__(**kwargs)
        self.stddev = stddev
        
    def call(self, X, training=None) :
        if training :
            noise = tf.random.normal(tf.shape(X), stddev=self.stddev)
            return X + noise
        else :
            return X
    
    def compute_output_shape(self, batch_input_shape) :
        return batch_input_shape

#### 사용자 정의 모델

In [47]:
# 스킵 연결이 있는 잔차 블록 층
# 다른 층 포함
class ResidualBlock(keras.layers.Layer):
    def __init__(self, n_layers, n_neurons, **kwargs):
        super().__init__(**kwargs)
        self.hidden = [keras.layers.Dense(n_neurons, activation="elu",
                                          kernel_initializer="he_normal")
                       for _ in range(n_layers)]

    def call(self, inputs):
        Z = inputs
        for layer in self.hidden:
            Z = layer(Z)
        return inputs + Z

In [48]:
class ResidualRegressor(keras.models.Model):
    def __init__(self, output_dim, **kwargs):
        super().__init__(**kwargs)
        self.hidden1 = keras.layers.Dense(30, activation="elu",
                                          kernel_initializer="he_normal")
        self.block1 = ResidualBlock(2, 30)
        self.block2 = ResidualBlock(2, 30)
        self.out = keras.layers.Dense(output_dim)

    def call(self, inputs):
        Z = self.hidden1(inputs)
        for _ in range(1 + 3):
            Z = self.block1(Z)
        Z = self.block2(Z)
        return self.out(Z)

In [49]:
X_new_scaled = X_test_scaled

model = ResidualRegressor(1)
model.compile(loss="mse", optimizer="nadam")
history = model.fit(X_train_scaled, y_train, epochs=5)
score = model.evaluate(X_test_scaled, y_test)
y_pred = model.predict(X_new_scaled)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [50]:
block1 = ResidualBlock(2, 30)
model = keras.models.Sequential([
    keras.layers.Dense(30, activation="elu", kernel_initializer="he_normal"),
    block1, block1, block1, block1,
    ResidualBlock(2, 30),
    keras.layers.Dense(1)
])

model.compile(loss="mse", optimizer="nadam")
history = model.fit(X_train_scaled, y_train, epochs=5)
score = model.evaluate(X_test_scaled, y_test)
y_pred = model.predict(X_new_scaled)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


#### 모델 구성 요소에 기반한 손실과 지표

In [51]:
# 맨 위의 은닉층에 보조 출력 가짐 -> 보조 출력에 연결된 손실 : 재구성 손실
# 재구성 손실은 가능한 많은 정보 유지하도록 유도
class ReconstructingRegressor(keras.Model):
    def __init__(self, output_dim, **kwargs):
        super().__init__(**kwargs)
        self.hidden = [keras.layers.Dense(30, activation="selu",
                                          kernel_initializer="lecun_normal")
                       for _ in range(5)]
        self.out = keras.layers.Dense(output_dim)
        self.reconstruction_mean = keras.metrics.Mean(name="reconstruction_error")

    def build(self, batch_input_shape):
        n_inputs = batch_input_shape[-1]
        self.reconstruct = keras.layers.Dense(n_inputs)
        #super().build(batch_input_shape)

    def call(self, inputs, training=None):
        Z = inputs
        for layer in self.hidden:
            Z = layer(Z)
        reconstruction = self.reconstruct(Z)
        recon_loss = tf.reduce_mean(tf.square(reconstruction - inputs))
        self.add_loss(0.05 * recon_loss)
        if training:
            result = self.reconstruction_mean(recon_loss)
            self.add_metric(result)
        return self.out(Z)

#### 자동 미분을 사용해 그레이디언트 계산

In [69]:
def f(w1, w2) :
    return 3 * w1 ** 2 + 2 * w1 * w2

In [70]:
w1, w2 = 5, 3
eps = 1e-6
(f(w1 + eps, w2) - f(w1, w2)) / eps # 도함수

36.000003007075065

In [71]:
(f(w1, w2 + eps) - f(w1, w2)) / eps # 도함수

10.000000003174137

In [72]:
# 자동 미분
w1, w2 = tf.Variable(5.), tf.Variable(3.)
with tf.GradientTape() as tape :
    z = f(w1, w2)
    
gradients = tape.gradient(z, [w1, w2])

In [73]:
gradients

[<tf.Tensor: shape=(), dtype=float32, numpy=36.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=10.0>]

In [57]:
with tf.GradientTape() as tape :
    z = f(w1, w2)
    
dz_dw1 = tape.gradient(z, w1) # 36텐서
dz_dw2 = tape.gradient(z, w2) # gradient() 호출되면 자동으로 테이프 즉시 지워짐 -> 두번 호출하면 예외발생

RuntimeError: A non-persistent GradientTape can only be used to compute one set of gradients (or jacobians)

In [74]:
with tf.GradientTape(persistent=True) as tape :
    z = f(w1, w2)

dz_dw1 = tape.gradient(z, w1) # 36텐서
dz_dw2 = tape.gradient(z, w2) # 10텐서
del tape # 사용이 끝난 후 테이브 삭제해 리소스 해체

In [75]:
# 테이프는 변수(Variable)가 포함된 연산만을 기록 -> 아니면 None 반환
c1, c2 = tf.constant(5.), tf.constant(3.)
with tf.GradientTape() as tape :
    z = f(c1, c2)
    
gradients = tape.gradient(z, [c1, c2])
gradients # None 반환

[None, None]

In [76]:
with tf.GradientTape() as tape :
    tape.watch(c1) # 감시하여 모든 연산 기록하도록 강제
    tape.watch(c2)
    z = f(c1, c2)

gradients = tape.gradient(z, [c1, c2])
gradients

# 입력이 작을 때 변동 폭이 큰 활성화 함수에 대한 규제 손실 구현할 때 유용
# 입력은 변수가 아니므로 테이프에 기록을 명시적으로 알려주어야

[<tf.Tensor: shape=(), dtype=float32, numpy=36.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=10.0>]

In [77]:
with tf.GradientTape() as tape:
    z1 = f(w1, w2 + 2.)
    z2 = f(w1, w2 + 5.)
    z3 = f(w1, w2 + 7.)

tape.gradient([z1, z2, z3], [w1, w2])

[<tf.Tensor: shape=(), dtype=float32, numpy=136.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=30.0>]

In [79]:
# 한 번의 정방향 계산, 역방향 계산으로 모든 그레이디언트 동시에 계산 가능
# 여러 손실이 포함된 벡터의 합의 그레이디언트 계산
# 개별 그레이디언트 계산 -> 테이프의 jacobian() : 벡터에 있는 각 손실마다 후진 자동 미분 수행
with tf.GradientTape(persistent=True) as hessian_tape:
    with tf.GradientTape() as jacobian_tape:
        z = f(w1, w2)
    jacobians = jacobian_tape.gradient(z, [w1, w2])
hessians = [hessian_tape.gradient(jacobian, [w1, w2])
            for jacobian in jacobians]
del hessian_tape

print(jacobians)
print()
print(hessians)

[<tf.Tensor: shape=(), dtype=float32, numpy=36.0>, <tf.Tensor: shape=(), dtype=float32, numpy=10.0>]

[[<tf.Tensor: shape=(), dtype=float32, numpy=6.0>, <tf.Tensor: shape=(), dtype=float32, numpy=2.0>], [<tf.Tensor: shape=(), dtype=float32, numpy=2.0>, None]]


In [63]:
# 신경망 일부분에 그레이디언트가 역전파되지 않도록 막기 -> tf.stop_gardient() ; 정방향 계산할 때 입력 반환(= tf.identity())
# 역전파 시에는 그레이디언트 전파하지 않고, 상수처럼 동작
def f(w1, w2) :
    return 3 * w1 ** 2 + tf.stop_gradient(2 * w1 * w2)

with tf.GradientTape() as tape :
    z = f(w1, w2) # stop_gradient() 없을 때와 결과 같다.

gradients = tape.gradient(z, [w1, w2])
gradients

[<tf.Tensor: shape=(), dtype=float32, numpy=30.0>, None]

In [80]:
# 큰 입력에 대한 그레이디언트 -> NaN 반환 (부동소수점 정밀도 오류)
x = tf.Variable([100.])
with tf.GradientTape() as tape :
    z = my_softplus(x)
    
tape.gradient(z, [x])

[<tf.Tensor: shape=(1,), dtype=float32, numpy=array([nan], dtype=float32)>]

In [81]:
# 큰 입력에서도 그레이디언트 올바르게 계산
@tf.custom_gradient
def my_better_softplus(z) :
    exp = tf.exp(z)
    def my_softplus_gradients(grad) :
        return grad / (1 + 1/exp)
    return tf.math.log(exp + 1), my_softplus_gradients

In [82]:
# tf.where() 사용해 값이 클 때 입력 그대로 반환
def my_better_softplus(z):
    return tf.where(z > 30., z, tf.math.log(tf.exp(z) + 1.))

In [83]:
x = tf.Variable([1000.])
with tf.GradientTape() as tape:
    z = my_better_softplus(x)

z, tape.gradient(z, [x])

(<tf.Tensor: shape=(1,), dtype=float32, numpy=array([1000.], dtype=float32)>,
 [<tf.Tensor: shape=(1,), dtype=float32, numpy=array([nan], dtype=float32)>])

#### 사용자 정의 훈련 반복

In [84]:
# fit() 메서드는 하나의 옵티마이저만 사용 -> compile()에 하나의 옵티마이저만 지정 가능
l2_reg = keras.regularizers.l2(0.05)
model = keras.models.Sequential([
    keras.layers.Dense(30, activation='elu', kernel_initializer='he_normal',
                      kernel_regularizer=l2_reg),
    keras.layers.Dense(1, kernel_regularizer=l2_reg)
])
# 훈련 반복 직접 다루기에 컴파일할 필요 없다.

In [85]:
# 훈련셋에서 샘플 배치 랜덤하게 추출(중복 허용)
def random_batch(X, y, batch_size=32) :
    idx = np.random.randint(len(X), size=batch_size)
    return X[idx], y[idx]

In [86]:
# 훈련상태 출력
def print_status_bar(iteration, total, loss, metrics=None):
    metrics = " - ".join(["{}: {:.4f}".format(m.name, m.result())
                         for m in [loss] + (metrics or [])])
    end = "" if iteration < total else "\n"
    print("\r{}/{} - ".format(iteration, total) + metrics,
          end=end)
    
# 이 대신 tqdm 라이브러리 사용할 수 있다.

In [87]:
n_epochs = 5
batch_size = 32
n_steps = len(X_train) // batch_size
optimizer = keras.optimizers.Nadam(learning_rate=0.01)
loss_fn = keras.losses.mean_squared_error
mean_loss = keras.metrics.Mean()
metrics = [keras.metrics.MeanAbsoluteError()]

In [88]:
# 훈련 반복
# 훈련 반복이 훈련과 테스트 시에 다르게 동작하는 층(배치정규화, 드롭아웃)을 다루지 못함
# -> training=True 호출해 필요한 모든 층에 매개변수 전파되도록 해야
for epoch in range(1, n_epochs + 1):
    print("Epoch {}/{}".format(epoch, n_epochs))
    for step in range(1, n_steps + 1):
        X_batch, y_batch = random_batch(X_train_scaled, y_train)
        with tf.GradientTape() as tape:
            y_pred = model(X_batch)
            main_loss = tf.reduce_mean(loss_fn(y_batch, y_pred))
            loss = tf.add_n([main_loss] + model.losses)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables)) # 가중치에 다른 변환 적용하려면 apply_gradients() 전에 호출
        for variable in model.variables:  # 모델에 가중치 제한 추가하여 훈련 반복 수정
            if variable.constraint is not None:
                variable.assign(variable.constraint(variable))
        mean_loss(loss)
        for metric in metrics:
            metric(y_batch, y_pred)
        print_status_bar(step * batch_size, len(y_train), mean_loss, metrics)
    print_status_bar(len(y_train), len(y_train), mean_loss, metrics)
    for metric in [mean_loss] + metrics:
        metric.reset_states() # 평균손실과 지푯값 초기화

Epoch 1/5
11610/11610 - mean: 1.3807 - mean_absolute_error: 0.5897
Epoch 2/5
11610/11610 - mean: 0.6811 - mean_absolute_error: 0.5269
Epoch 3/5
11610/11610 - mean: 0.6477 - mean_absolute_error: 0.5160
Epoch 4/5
11610/11610 - mean: 0.6701 - mean_absolute_error: 0.5265
Epoch 5/5
11610/11610 - mean: 0.6517 - mean_absolute_error: 0.5259


텐서플로 함수와 그래프

In [89]:
def cube(x) :
    return x ** 3

In [90]:
cube(2)

8

In [91]:
cube(tf.constant(2.0))

<tf.Tensor: shape=(), dtype=float32, numpy=8.0>

In [92]:
# 텐서플로 함수 : tf.function() -> 빠름
tf_cube = tf.function(cube)
tf_cube

<tensorflow.python.eager.polymorphic_function.polymorphic_function.Function at 0x15ef3b27710>

In [93]:
tf_cube(2)

<tf.Tensor: shape=(), dtype=int32, numpy=8>

In [94]:
tf_cube(tf.constant(2.0))

<tf.Tensor: shape=(), dtype=float32, numpy=8.0>

In [95]:
@tf.function
def tf_cube(x) :
    return x ** 3

In [97]:
tf_cube.python_function(2) # 원본 파이썬 함수

8

케라스가 파이썬 함수 -> 텐서플로 함수로 바꾸지 못하게 하려면 dynamic=True로 지정  
혹은 compile()시, run_eagerly=True로 지정  

텐서플로 함수는 입력 크기와 데이터 타입에 맞춰 그래프 생성 -> 다형성

#### 오토그래프와 트레이싱

오토그래프는 모든 제어문을 텐서플로 연산으로 바꿈  
while -> tf.while_loop()  
if -> tf.cond()  
for -> loop_body()

In [98]:
@tf.function
def add_10(x) :
    for i in range(10) :
        x += 1
    return x

In [99]:
add_10(tf.constant(5))

<tf.Tensor: shape=(), dtype=int32, numpy=15>

In [100]:
@tf.function
def add_10(x) :
    condition = lambda i, x : tf.less(i, 10)
    body = lambda i, x : (tf.add(i, 1), tf.add(x, 1))
    final_i, final_x = tf.while_loop(condition, body, [tf.constant(0), x])
    return final_x

In [103]:
add_10(tf.constant(5))

<tf.Tensor: shape=(), dtype=int32, numpy=15>

In [104]:
@tf.function
def add_10(x) :
    for i in tf.range(10) :
        x = x + 1
    return x

# 연습문제

In [105]:
# 12) 층 정규화 수행하는 사용자 정의 층 구현
# b. call() 메서드는 샘플의 특성마다 평균과 표준편차 계산해야 한다. 전체 샘플의 평균과 분산을 반환하는 tf.nn.moments(inputs, axes=-1, keepdims=True) 사용
class LayerNormalization(keras.layers.Layer):
    def __init__(self, eps=0.001, **kwargs):
        super().__init__(**kwargs)
        self.eps = eps

    def build(self, batch_input_shape):
        self.alpha = self.add_weight(
            name="alpha", shape=batch_input_shape[-1:],
            initializer="ones")
        self.beta = self.add_weight(
            name="beta", shape=batch_input_shape[-1:],
            initializer="zeros")
        super().build(batch_input_shape) # must be at the end

    def call(self, X):
        mean, variance = tf.nn.moments(X, axes=-1, keepdims=True)
        return self.alpha * (X - mean) / (tf.sqrt(variance + self.eps)) + self.beta

    def compute_output_shape(self, batch_input_shape):
        return batch_input_shape

    def get_config(self):
        base_config = super().get_config()
        return {**base_config, "eps": self.eps}

In [106]:
# c. 사용자 정의 층이 keras.layers.LayerNormalization층과 동일한 출력을 만드는지 확인
X = X_train.astype(np.float32)

custom_layer_norm = LayerNormalization()
keras_layer_norm = keras.layers.LayerNormalization()

tf.reduce_mean(keras.losses.mean_absolute_error(
    keras_layer_norm(X), custom_layer_norm(X)))

<tf.Tensor: shape=(), dtype=float32, numpy=3.9782837e-08>

In [107]:
random_alpha = np.random.rand(X.shape[-1])
random_beta = np.random.rand(X.shape[-1])

custom_layer_norm.set_weights([random_alpha, random_beta])
keras_layer_norm.set_weights([random_alpha, random_beta])

tf.reduce_mean(keras.losses.mean_absolute_error(
    keras_layer_norm(X), custom_layer_norm(X)))

<tf.Tensor: shape=(), dtype=float32, numpy=2.2977115e-08>

In [108]:
# 13) 사용자 정의 훈련 반복 사용해 MNIST 모델 훈련
# a. 에포크, 반복, 평균 훈련 손실, 에포크의 평균 정확도, 에포크 끝에서 검증 손실과 정확도 출력
(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.fashion_mnist.load_data()
X_train_full = X_train_full.astype(np.float32) / 255.
X_valid, X_train = X_train_full[:5000], X_train_full[5000:]
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]
X_test = X_test.astype(np.float32) / 255.

In [109]:
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]),
    keras.layers.Dense(100, activation="relu"),
    keras.layers.Dense(10, activation="softmax"),
])

In [110]:
n_epochs = 5
batch_size = 32
n_steps = len(X_train) // batch_size
optimizer = keras.optimizers.Nadam(learning_rate=0.01)
loss_fn = keras.losses.sparse_categorical_crossentropy
mean_loss = keras.metrics.Mean()
metrics = [keras.metrics.SparseCategoricalAccuracy()]

In [113]:
from tqdm.notebook import trange
from collections import OrderedDict

with trange(1, n_epochs + 1, desc="All epochs") as epochs:
    for epoch in epochs:
        with trange(1, n_steps + 1, desc="Epoch {}/{}".format(epoch, n_epochs)) as steps:
            for step in steps:
                X_batch, y_batch = random_batch(X_train, y_train)
                with tf.GradientTape() as tape:
                    y_pred = model(X_batch)
                    main_loss = tf.reduce_mean(loss_fn(y_batch, y_pred))
                    loss = tf.add_n([main_loss] + model.losses)
                gradients = tape.gradient(loss, model.trainable_variables)
                optimizer.apply_gradients(zip(gradients, model.trainable_variables))
                for variable in model.variables:
                    if variable.constraint is not None:
                        variable.assign(variable.constraint(variable))                    
                status = OrderedDict()
                mean_loss(loss)
                status["loss"] = mean_loss.result().numpy()
                for metric in metrics:
                    metric(y_batch, y_pred)
                    status[metric.name] = metric.result().numpy()
                steps.set_postfix(status)
            y_pred = model(X_valid)
            status["val_loss"] = np.mean(loss_fn(y_valid, y_pred))
            status["val_accuracy"] = np.mean(keras.metrics.sparse_categorical_accuracy(
                tf.constant(y_valid, dtype=np.float32), y_pred))
            steps.set_postfix(status)
        for metric in [mean_loss] + metrics:
            metric.reset_states()

All epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1/5:   0%|          | 0/1718 [00:00<?, ?it/s]

Epoch 2/5:   0%|          | 0/1718 [00:00<?, ?it/s]

Epoch 3/5:   0%|          | 0/1718 [00:00<?, ?it/s]

Epoch 4/5:   0%|          | 0/1718 [00:00<?, ?it/s]

Epoch 5/5:   0%|          | 0/1718 [00:00<?, ?it/s]

In [114]:
# b. 상위 층과 하위 층에 학습률이 다른 옵티마이저 사용
lower_layers = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]),
    keras.layers.Dense(100, activation="relu"),
])
upper_layers = keras.models.Sequential([
    keras.layers.Dense(10, activation="softmax"),
])
model = keras.models.Sequential([
    lower_layers, upper_layers
])

In [115]:
lower_optimizer = keras.optimizers.SGD(learning_rate=1e-4)
upper_optimizer = keras.optimizers.Nadam(learning_rate=1e-3)

In [116]:
n_epochs = 5
batch_size = 32
n_steps = len(X_train) // batch_size
loss_fn = keras.losses.sparse_categorical_crossentropy
mean_loss = keras.metrics.Mean()
metrics = [keras.metrics.SparseCategoricalAccuracy()]

In [117]:
with trange(1, n_epochs + 1, desc="All epochs") as epochs:
    for epoch in epochs:
        with trange(1, n_steps + 1, desc="Epoch {}/{}".format(epoch, n_epochs)) as steps:
            for step in steps:
                X_batch, y_batch = random_batch(X_train, y_train)
                with tf.GradientTape(persistent=True) as tape:
                    y_pred = model(X_batch)
                    main_loss = tf.reduce_mean(loss_fn(y_batch, y_pred))
                    loss = tf.add_n([main_loss] + model.losses)
                for layers, optimizer in ((lower_layers, lower_optimizer),
                                          (upper_layers, upper_optimizer)):
                    gradients = tape.gradient(loss, layers.trainable_variables)
                    optimizer.apply_gradients(zip(gradients, layers.trainable_variables))
                del tape
                for variable in model.variables:
                    if variable.constraint is not None:
                        variable.assign(variable.constraint(variable))                    
                status = OrderedDict()
                mean_loss(loss)
                status["loss"] = mean_loss.result().numpy()
                for metric in metrics:
                    metric(y_batch, y_pred)
                    status[metric.name] = metric.result().numpy()
                steps.set_postfix(status)
            y_pred = model(X_valid)
            status["val_loss"] = np.mean(loss_fn(y_valid, y_pred))
            status["val_accuracy"] = np.mean(keras.metrics.sparse_categorical_accuracy(
                tf.constant(y_valid, dtype=np.float32), y_pred))
            steps.set_postfix(status)
        for metric in [mean_loss] + metrics:
            metric.reset_states()

All epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1/5:   0%|          | 0/1718 [00:00<?, ?it/s]

Epoch 2/5:   0%|          | 0/1718 [00:00<?, ?it/s]

Epoch 3/5:   0%|          | 0/1718 [00:00<?, ?it/s]

Epoch 4/5:   0%|          | 0/1718 [00:00<?, ?it/s]

Epoch 5/5:   0%|          | 0/1718 [00:00<?, ?it/s]