In [None]:
import tensorflow as tf
import numpy as np
from tensorflow import keras

## 뭐뭐있나 확인하기

In [None]:
# keras
#[m for m in dir(keras) if not m.startswith("_")]

# 활성화 함수 
[m for m in dir(keras.activations) if not m.startswith("_")]

# 가중치 초기화 
#[name for name in dir(keras.initializers) if not name.startswith("_")]

# 스케쥴링
#[name for name in dir(keras.optimizers.schedules) if not name.startswith("_")]

['deserialize',
 'elu',
 'exponential',
 'get',
 'hard_sigmoid',
 'linear',
 'relu',
 'selu',
 'serialize',
 'sigmoid',
 'softmax',
 'softplus',
 'softsign',
 'swish',
 'tanh']

## [DNN Model]

*   가중치 초기화
*   L1, L2 정규화
*   맥스노름
*   배치정규화
*   드롭아웃


In [None]:
model = keras.models.Sequential([
     
    # Flatten
    keras.layers.Flatten(input_shape=[28, 28]),
     
    # Dense
    keras.layers.Dense(300, 
                       activation="relu",
                       kernel_initializer="he_normal"), # 가중치 초기화
                       kernel_regularizer=keras.regularizers.l1(0.1), # L1, L2 정규화
                       kernel_constraint=keras.constraints.max_norm(1.) # 맥스노름
                       ) 
     
    # 드롭아웃 + 배치정규화 (활성화함수 앞에서) : 이게 더 좋대
    keras.layers.Dense(128, use_bias=False),
    keras.layers.BatchNormalization(),
    keras.layers.Activation('relu'),
    keras.layers.Dropout(rate=0.2),

    # 드롭아웃 + 배치정규화 (활성화함수 뒤에서)
    keras.layers.Dense(128, activation='relu'),
    keras.layers.BatchNormalization(),
    keras.layers.Dropout(rate=0.2),

    # 출력층
    keras.layers.Dense(10, activation="softmax")
])

## [CNN Model]



CBAMD

In [None]:
# padding = 'same'이면 제로패딩 사용 O, valid'면 제로패딩 사용 X
# pool_size 정수로 하면 높이 너비 둘 다 같게 적용됨. 두 개 정수의 튜플로 하면 높이 너비 각각.

model = keras.models.Sequential([
    
    keras.layers.Conv2D(16, 3, strides=2, padding='same', activation='relu', input_shape=(150, 150 ,3)),
    keras.layers.MaxPooling2D(pool_size=2),
     
    #  CBAMD (Conv-BatchNorm-Activation-MaxPool-Dropout)
    keras.layers.Conv2D(32, 3, padding='same'),
    keras.layers.BatchNormalization(),
    keras.layers.Activation('relu'),
    keras.layers.MaxPooling2D(2),
    keras.layers.Dropout(0.2),
     
    keras.layers.Conv2D(64, 3, padding='same', activation='relu'),
    keras.layers.MaxPooling2D(),
 
    # Flatten + DNN
    keras.layers.Flatten(),
    keras.layers.Dense(128, activation='relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(10, activation='softmax'),
])

## [Compile]


In [None]:
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

### - optimizer

In [None]:
# SGD / 모멘텀 / 네스테로프 (clipvalue, clipnorm : 그래디언트 클리핑)
optimizer=keras.optimizers.SGD(lr=1e-3, clipvalue=1.0, clipnorm=1.0),
optimizer=keras.optimizers.SGD(lr=0.001, momentum=0.9)
optimizer=keras.optimizers.SGD(lr=0.001, momentum=0.9, nesterov=True)

# AdaGrad
optimizer = keras.optimizers.Adagrad(lr=0.001)

# RMSProp
optimizer = keras.optimizers.RMSprop(lr=0.001, rho=0.9)

# Adam / Adamax / Nadam
optimizer = keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999)
optimizer = keras.optimizers.Adamax(lr=0.001, beta_1=0.9, beta_2=0.999)
optimizer = keras.optimizers.Nadam(lr=0.001, beta_1=0.9, beta_2=0.999)

## [Early Stopping]

In [None]:
##### Early Stopping #####

early_stopping_cb = keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=20, restore_best_weights = True)

###### Fit할때 콜백함수 넣어주기
model.fit(x_train, y_train, epochs=10, callbacks=[early_stopping_cb])

## [학습률 스케쥴링]


1. optimizer의 decay 파라미터 이용

2. optimizer의 learning_rate에 schedule을 전달

3. 콜백

4. 사용자정의 콜백


### 1. optimizer의 decay 파라미터 이용

In [None]:
# 거듭제곱 스케쥴링, 에포크마다가 아닌 스텝(배치)마다 업데이트
model.compile(loss='binary_crossentropy',
              optimizer=keras.optimizers.SGD(lr=0.01, decay=1e-4),
              metrics=['accuracy'])

### 2. optimizer의 learning_rate에 schedule을 전달

In [None]:
# 지수기반 스케쥴링
initial_learning_rate = 0.1
lr_schedule = keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate, decay_steps=100000, decay_rate=0.96, staircase=True
)

model.compile(loss='binary_crossentropy',
              optimizer=keras.optimizers.RMSprop(learning_rate=lr_schedule),
              metrics=['accuracy'])

### 3. 학습률 스케쥴링 - 콜백함수

In [None]:
 ###### LearningRateScheduler, 에포크마다 학습률 스케쥴링
 
def scheduler(epoch): # 에포크 이용
    # 지수기반 스케쥴링
    return 0.01 * 0.1**(epoch / 20)
 
def scheduler(epoch, lr): # 에포크, 학습률 이용
    # 지수기반 스케쥴링
    return lr * 0.1**(1 / 20)
 
def scheduler(epoch):
    # 기간별 고정 스케쥴링
    if epoch < 5:
        return 0.01
    elif epoch < 15:
        return 0.005
    else:
        return 0.001
 
lr_scheduler_cb = keras.callbacks.LearningRateScheduler(scheduler)


###### ReduceLROnPlateau 

# 성능 기반 스케쥴링 : 5번의 연속적인 에포크동안 성능 향상이 일어나지 않으면 학습률에 0.5 곱한다
lr_scheduler_cb = keras.callbacks.ReduceLROnPlateau(factor=0.5, patience=5)


###### Fit할때 콜백함수 넣어주기
model.fit(x_train, y_train, epochs=10, callbacks=[lr_scheduler_cb])

#### 4. 학습률 스케쥴링 - 사용자 정의 콜백

https://www.tensorflow.org/guide/keras/custom_callback

In [None]:
###### 스텝마다 지수기반 학습률 스케쥴링
K = keras.backend
class ExponentialDecay(keras.callbacks.Callback):
    def __init__(self, s=40000):
        super().__init__()
        self.s = s
 
    def on_batch_begin(self, batch, logs=None):
        # 노트: 에포크마다 `batch` 매개변수가 재설정됩니다
        lr = K.get_value(self.model.optimizer.lr)
        K.set_value(self.model.optimizer.lr, lr * 0.1**(1 / s))
 
    def on_epoch_end(self, epoch, logs=None):
        logs = logs or {}
        logs['lr'] = K.get_value(self.model.optimizer.lr)
         
         
n_epochs = 25
s = 20 * len(X_train) // 32 # 20 에포크 동안 스텝 횟수 (배치 크기 = 32)
exp_decay = ExponentialDecay(s)

model.fit(x_train, y_train, epochs=10, callbacks=[exp_decay])

###### 에포크마다 지수기반 학습률 스케쥴링 & 학습률, loss 기록
K = keras.backend
class ExponentialLearningRate(keras.callbacks.Callback):
    def __init__(self, factor):
        self.factor = factor
        self.rates = []
        self.losses = []
    def on_batch_end(self, batch, logs):
        self.rates.append(K.get_value(self.model.optimizer.lr))
        self.losses.append(logs["loss"])
        K.set_value(self.model.optimizer.lr, self.model.optimizer.lr * self.factor)
         
expon_lr = ExponentialLearningRate(factor=1.005) # fit 후에 plt.plot(expon_lr.rates, expon_lr.losses)

model.fit(x_train, y_train, epochs=10, callbacks=[expon_lr])

## [사용자 정의 콜백]

In [None]:
# 목표 정확도 달성시 학습 중단
class myCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        if(logs.get('accuracy')>0.998):
            print("\nReached 99.8% accuracy so cancelling training!")
            self.model.stop_training = True
 
callbacks = myCallback()

# train/val 비율 프린트(오버피팅 감지)
class PrintValTrainRatioCallback(keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs):
        print("\nval/train: {:.2f}".format(logs["val_loss"] / logs["loss"]))
         
val_train_ratio_cb = PrintValTrainRatioCallback()
 

# 체크포인트 저장 (학습 도중 컴퓨터가 문제를 일으키는 경우를 대비해서)

# early stopping이랑 같이 쓰는거 아니면, fit 끝나고 model = keras.models.load_model("my_keras_model.h5") 로 최상의 모델로 롤백해줘야됨)
model_checkpoint_cb = keras.callbacks.ModelCheckpoint("my_cifar10_model.h5", save_best_only=True)

## [레이블 전처리] one-hot encoding  (scalar -> one-hot vector)




In [None]:
import tensorflow as tf

# data load : Fashion MNIST
mnist = tf.keras.datasets.fashion_mnist
(x_train, y_train),(x_test, y_test) = mnist.load_data()

# preprocess
x_train, x_test = x_train / 255.0, x_test / 255.0

######################################################
##### one-hot encoding (scalar -> one-hot vector) ####
######################################################
y_train = tf.keras.utils.to_categorical(y_train, num_classes=len(set(y_train)))
y_test = tf.keras.utils.to_categorical(y_test, num_classes=len(set(y_test)))

# modeling
model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)), # input_shape 안써줘도 됨.
  tf.keras.layers.Dense(128, activation=tf.nn.relu),
  tf.keras.layers.Dense(10, activation=tf.nn.softmax)
])

# compile
model.compile(optimizer='adam',
              loss='categorical_crossentropy', # sparse_categorical_crossentropy 아님!!!
              metrics=['accuracy'])

# fit
model.fit(x_train, y_train, epochs=5)

# evaluate
test_loss = model.evaluate(x_test, y_test)

## [레이블 전처리] one-hot encoding 반대  (one-hot vector -> scalar)


In [None]:
import tensorflow as tf

# data load : Fashion MNIST
mnist = tf.keras.datasets.fashion_mnist
(x_train, y_train),(x_test, y_test) = mnist.load_data()

# preprocess
x_train, x_test = x_train / 255.0, x_test / 255.0

# one-hot encoding (scalar -> one-hot vector) 데이터가 원레 원핫이었다고 치고..
y_train = tf.keras.utils.to_categorical(y_train, num_classes=len(set(y_train)))
y_test = tf.keras.utils.to_categorical(y_test, num_classes=len(set(y_test)))

#####################################################
###### sparse lable (one-hot vector -> scalar) ######
######################################################
y_train = np.argmax(y_train, axis=1)
y_test = np.argmax(y_test, axis=1)


# modeling
model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(), # input_shape 안써줘도 됨.
  tf.keras.layers.Dense(128, activation=tf.nn.relu),
  tf.keras.layers.Dense(10, activation=tf.nn.softmax)
])

# compile
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy', # 그냥 categorical_crossentropy 아님!!
              metrics=['accuracy'])

# fit
model.fit(x_train, y_train, epochs=5)

# evaluate
test_loss = model.evaluate(x_test, y_test)

## [CNN Output Shape 계산기]

Output shape 계산식 : [(input_size-Kernel_size + padding*2)/stride] + 1 = output_size



input_size, 커널사이즈, 패딩, 스트라이드가 각각 몇 일때 output_shape 구하기

In [None]:
import tensorflow as tf
import numpy as np
from tensorflow import keras

def get_output_size(input_size, kernel_size, stride = 1, padding = 'valid'):
     
    model = keras.models.Sequential([
        keras.layers.Conv2D(16, kernel_size, 
                            strides=stride, 
                            padding=padding, 
                            activation='relu', 
                            input_shape=(input_size, input_size ,3)),
        keras.layers.Dense(1)
    ])

    return model.layers[0].output_shape[1]

In [None]:
get_output_size(input_size = 32, kernel_size = 3)

30

In [None]:
get_output_size(input_size = 32, kernel_size = 3, stride = 2)

15

In [None]:
get_output_size(input_size = 32, kernel_size = 3, stride = 2, padding = 'same')

16

In [None]:
get_output_size(input_size = 32, kernel_size = 3, stride = 2, padding = 'valid')

15

## [CNN 상황에 맞는 kernel_size, stride, padding 값 구하기] 

(input, output사이즈 알고있을때)

In [None]:
def find_kernel_size_and_stride(input_size, output_size, kernel_size = 0, stride = 0):
 
    # kernel_size를 모르면
    if kernel_size == 0:
        ks = range(1, 10)
    # kernel_size를 알면
    else:
        ks = [kernel_size]
 
    # stride를 모르면
    if stride == 0:
        ss = range(1, 10)
    # stride를 알면
    else:
        ss = [stride]
 
    for k in ks:
        for s in ss:
            if get_output_size(input_size, k, s, padding = 'valid') == output_size:
                print("kernel_size : %d, stride : %d, padding = 'valid'"%(k, s))
            if get_output_size(input_size, k, s, padding = 'same') == output_size:
                print("kernel_size : %d, stride : %d, padding = 'same'"%(k, s))

In [None]:
 # input_shape가 32인데 output_shape를 15로 하려면, kernel_size, stride를 몇으로 해야돼?
 find_kernel_size_and_stride(input_size = 32, output_size = 15)

kernel_size : 3, stride : 2, padding = 'valid'
kernel_size : 4, stride : 2, padding = 'valid'


In [None]:
 # input_shape가 32인데 output_shape를 15로 하려고 하는데, kernel_size가 3이래. stride를 몇으로 해야돼?
find_kernel_size_and_stride(input_size = 32, output_size = 15, kernel_size = 3)

kernel_size : 3, stride : 2, padding = 'valid'


## [Transfer Learning]


### 1. 가져와서 그거 그대로 쓰기


In [None]:
# 예전에 만들어서 저장했던 모델
my_model.save("my_model_A.h5")
 
# 기존 모델 로딩
model_A = keras.models.load_model("my_model_A.h5")
 
 

 
# 마지막 층(출력층)은 버리면서 가져오기
model_B_on_A = keras.models.Sequential(model_A.layers[:-1])
 
# 출력층 새로 붙이기
model_B_on_A.add(keras.layers.Dense(1, activation="sigmoid"))
 
# 기존 모델의 층 Freezing 시키기
for layer in model_B_on_A.layers[:-1]:
    layer.trainable = False
 
# 컴파일 하기
model_B_on_A.compile(loss="binary_crossentropy",
                     optimizer=keras.optimizers.SGD(lr=1e-3),
                     metrics=["accuracy"])
 
 
# 학습시키기
 
# 기존모델 Freezing 풀기
 
# lr 낮춰서 다시 학습시키기

### 2. 복사해서 가져오기

In [None]:
# 예전에 만들어서 저장했던 모델
my_model.save("my_model_A.h5")
 
# 기존 모델 로딩
model_A = keras.models.load_model("my_model_A.h5")
  
# 모델 구조 복사해오기
model_A_clone = keras.models.clone_model(model_A)
 
# 가중치 복사해오기
model_A_clone.set_weights(model_A.get_weights())

### 3. inception_v3

In [None]:
# Import all the necessary files!
import os
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import Model
 
 
# Download the inception v3 weights
!wget --no-check-certificate \
    https://storage.googleapis.com/mledu-datasets/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5 \
    -O /tmp/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5
 
# Import the inception model 
from tensorflow.keras.applications.inception_v3 import InceptionV3
 
# Create an instance of the inception model from the local pre-trained weights
local_weights_file = '/tmp/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5'
 
 
# pre-trained 모델 로드
pre_trained_model = InceptionV3(input_shape = (150, 150, 3),
                                include_top = False,
                                weights = None)
 
# pre-trained 가중치 로드                               
pre_trained_model.load_weights(local_weights_file)
 
# 전부 Freezing
for layer in pre_trained_model.layers:
  layer.trainable = False
 
# pre-trained 모델 output layer 정보
last_layer = pre_trained_model.get_layer('mixed7')
print('last layer output shape: ', last_layer.output_shape)
last_output = last_layer.output
 
# pre-trained 모델 뒤에 붙일 새로운 네트워크 구성
x = keras.layers.Flatten()(last_output)
x = keras.layers.Dense(1024, activation='relu')(x)
x = keras.layers.Dropout(0.2)(x)                 
x = keras.layers.Dense(1, activation='sigmoid')(x)          
 
# 최종 모델 구성 (input, output 지정하기)
model = keras.Model( pre_trained_model.input, x)
 
# 컴파일
model.compile(optimizer = RMSprop(lr=0.0001),
              loss = 'binary_crossentropy',
              metrics = ['accuracy'])

### 4. Xception

In [None]:
base_model = keras.applications.xception.Xception(weights="imagenet", include_top=False)
 
avg = keras.layers.GlobalAveragePooling2D()(base_model.output)
output = keras.layers.Dense(n_classes, activation="softmax")(avg)
model = keras.models.Model(inputs=base_model.input, outputs=output)
 
# 훈련 초기에는 사전훈련된 layer의 가중치를 Freezing
for layer in base_model.layers:
    layer.trainable = False
 
# 컴파일
optimizer = keras.optimizers.SGD(lr=0.2, momentum=0.9, decay=0.01)
model.compile(optimizer=optimizer, loss="sparse_categorical_crossentropy", metrics=["accuracy"])
 
# 학습
history = model.fit(train_set,
                    steps_per_epoch=int(0.75 * dataset_size / batch_size),
                    validation_data=valid_set,
                    validation_steps=int(0.15 * dataset_size / batch_size),
                    epochs=5)
 
# 새로 추가한 상위층이 적당히 학습이 되었으니 Freezing 풀고 다시 전체 layer 학습
for layer in base_model.layers:
    layer.trainable = True
 
# 다시 컴파일
optimizer = keras.optimizers.SGD(learning_rate=0.01, momentum=0.9,
                                 nesterov=True, decay=0.001)
model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer,
              metrics=["accuracy"])
# 다시 학습
history = model.fit(train_set,
                    steps_per_epoch=int(0.75 * dataset_size / batch_size),
                    validation_data=valid_set,
                    validation_steps=int(0.15 * dataset_size / batch_size),
                    epochs=40)