程式B-1

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Lambda
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import MaxPooling2D
import numpy as np
import logging
import copy
tf.get_logger().setLevel(logging.ERROR)

MAX_MODEL_SIZE = 500000
CANDIDATE_EVALUATIONS = 500
EVAL_EPOCHS = 3
FINAL_EPOCHS = 20


# 宣告搜尋空間相關變數 (可用層類型、對應參數等)
layer_types = ['DENSE', 'CONV2D', 'MAXPOOL2D']
param_values = dict([('size', [16, 64, 256, 1024, 4096]),
                ('activation', ['relu', 'tanh', 'elu']),
                ('kernel_size', [(1, 1), (2, 2), (3, 3), (4, 4)]),
                ('stride', [(1, 1), (2, 2), (3, 3), (4, 4)]),
                ('dropout', [0.0, 0.4, 0.7, 0.9])])

layer_params = dict([('DENSE', ['size', 'activation', 'dropout']),
                     ('CONV2D', ['size', 'activation',
                                 'kernel_size', 'stride',
                                 'dropout']),
                     ('MAXPOOL2D', ['kernel_size', 'stride',
                                    'dropout'])])

# 載入資料集
cifar_dataset = keras.datasets.cifar10
(train_images, train_labels), (test_images,
                    test_labels) = cifar_dataset.load_data()

# 標準化各樣本
mean = np.mean(train_images)
stddev = np.std(train_images)
train_images = (train_images - mean) / stddev
test_images = (test_images - mean) / stddev

# 將標籤轉為 one-hot 編碼.
train_labels = to_categorical(train_labels,
                              num_classes=10)
test_labels = to_categorical(test_labels,
                             num_classes=10)


Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


程式B-2

In [None]:
# 生成模型架構草案的相關函式

def generate_random_layer(layer_type):
    layer = {}
    layer['layer_type'] = layer_type
    params = layer_params[layer_type]
    for param in params:
        values = param_values[param]
        layer[param] = values[np.random.randint(0, len(values))]
    return layer

def generate_model_definition():
    layer_count = np.random.randint(2, 9)
    non_dense_count = np.random.randint(1, layer_count)
    layers = []
    for i in range(layer_count):
        if i < non_dense_count:
            layer_type = layer_types[np.random.randint(1, 3)]
            layer = generate_random_layer(layer_type)
        else:
            layer = generate_random_layer('DENSE')
        layers.append(layer)
    return layers

def compute_weight_count(layers):
    last_shape = (32, 32, 3)
    total_weights = 0
    for layer in layers:
        layer_type = layer['layer_type']
        if layer_type == 'DENSE':
            size = layer['size']
            weights = size * (np.prod(last_shape) + 1)
            last_shape = (layer['size'])
        else:
            stride = layer['stride']
            if layer_type == 'CONV2D':
                size = layer['size']
                kernel_size = layer['kernel_size']
                weights = size * ((np.prod(kernel_size) *
                                   last_shape[2]) + 1)
                last_shape = (np.ceil(last_shape[0]/stride[0]),
                              np.ceil(last_shape[1]/stride[1]),
                              size)
            elif layer_type == 'MAXPOOL2D':
                weights = 0
                last_shape = (np.ceil(last_shape[0]/stride[0]),
                              np.ceil(last_shape[1]/stride[1]),
                              last_shape[2])
        total_weights += weights
    total_weights += ((np.prod(last_shape) + 1) * 10)
    return total_weights


程式B-3

In [None]:
# 根據架構草案來建構模型、然後評估的函式

def add_layer(model, params, prior_type):
    layer_type = params['layer_type']
    if layer_type == 'DENSE':
        if prior_type != 'DENSE':
            model.add(Flatten())
        size = params['size']
        act = params['activation']
        model.add(Dense(size, activation=act))
    elif layer_type == 'CONV2D':
        size = params['size']
        act = params['activation']
        kernel_size = params['kernel_size']
        stride = params['stride']
        model.add(Conv2D(size, kernel_size, activation=act,
                         strides=stride, padding='same'))
    elif layer_type == 'MAXPOOL2D':
        kernel_size = params['kernel_size']
        stride = params['stride']
        model.add(MaxPooling2D(pool_size=kernel_size,
                               strides=stride, padding='same'))
    dropout = params['dropout']
    if(dropout > 0.0):
        model.add(Dropout(dropout))

def create_model(layers):
    tf.keras.backend.clear_session()
    model = Sequential()
    model.add(Lambda(lambda x: x, input_shape=(32, 32, 3)))
    prev_layer = 'LAMBDA' # Dummy layer to set input_shape
    for layer in layers:
        add_layer(model, layer, prev_layer)
        prev_layer = layer['layer_type']
    model.add(Dense(10, activation='softmax'))
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam', metrics=['accuracy'])
    return model

def create_and_evaluate_model(model_definition):
    weight_count = compute_weight_count(model_definition)
    if weight_count > MAX_MODEL_SIZE:
        return 0.0
    model = create_model(model_definition)
    history = model.fit(train_images, train_labels,
                        validation_data=(test_images, test_labels),
                        epochs=EVAL_EPOCHS, batch_size=64,
                        verbose=2, shuffle=False)
    acc = history.history['val_accuracy'][-1]
    print('Size: ', weight_count)
    print('Accuracy: %5.2f' %acc)
    return acc


程式B-4

In [None]:
# 隨機搜尋
np.random.seed(7)
val_accuracy = 0.0
for i in range(CANDIDATE_EVALUATIONS):
    valid_model = False
    while(valid_model == False):
        model_definition = generate_model_definition()
        acc = create_and_evaluate_model(model_definition)
        if acc > 0.0:
            valid_model = True
    if acc > val_accuracy:
        best_model = model_definition
        val_accuracy = acc
    print('Random search, best accuracy: %5.2f' %val_accuracy)


Epoch 1/3
782/782 - 11s - loss: 3.0953 - accuracy: 0.1094 - val_loss: 2.1659 - val_accuracy: 0.1682 - 11s/epoch - 14ms/step
Epoch 2/3
782/782 - 10s - loss: 2.3533 - accuracy: 0.1322 - val_loss: 2.1156 - val_accuracy: 0.1740 - 10s/epoch - 13ms/step
Epoch 3/3
782/782 - 9s - loss: 2.3027 - accuracy: 0.1477 - val_loss: 2.0820 - val_accuracy: 0.1964 - 9s/epoch - 11ms/step
Size:  306570.0
Accuracy:  0.20
Random search, best accuracy:  0.20
Epoch 1/3
782/782 - 33s - loss: 2.4531 - accuracy: 0.1010 - val_loss: 2.3265 - val_accuracy: 0.1071 - 33s/epoch - 42ms/step
Epoch 2/3
782/782 - 31s - loss: 2.4032 - accuracy: 0.1008 - val_loss: 2.3277 - val_accuracy: 0.1000 - 31s/epoch - 40ms/step
Epoch 3/3
782/782 - 33s - loss: 2.4113 - accuracy: 0.1022 - val_loss: 2.3164 - val_accuracy: 0.0998 - 33s/epoch - 42ms/step
Size:  154650.0
Accuracy:  0.10
Random search, best accuracy:  0.20
Epoch 1/3
782/782 - 12s - loss: 2.5023 - accuracy: 0.1015 - val_loss: 2.2976 - val_accuracy: 0.1366 - 12s/epoch - 15ms/ste

KeyboardInterrupt: ignored

程式B-5

In [None]:
# 登山法輔助函式
def tweak_model(model_definition):
    layer_num = np.random.randint(0, len(model_definition))
    last_layer = len(model_definition) - 1
    for first_dense, layer in enumerate(model_definition):
        if layer['layer_type'] == 'DENSE':
            break
    if np.random.randint(0, 2) == 1:
        delta = 1
    else:
        delta = -1
    if np.random.randint(0, 2) == 1:
        # 加/減層
        if len(model_definition) < 3:
            delta = 1 # 該架構不允許減層
        if delta == -1:
            # 減層
            if layer_num == 0 and first_dense == 1:
                layer_num += 1 # 非密集層至少要一道
            if layer_num == first_dense and layer_num == last_layer:
                layer_num -= 1 # 密集層至少要一道
            del model_definition[layer_num]
        else:
            # 加層
            if layer_num < first_dense:
                layer_type = layer_types[np.random.randint(1, 3)]
            else:
                layer_type = 'DENSE'
            layer = generate_random_layer(layer_type)
            model_definition.insert(layer_num, layer)
    else:
        # 調整超參數
        layer = model_definition[layer_num]
        layer_type = layer['layer_type']
        params = layer_params[layer_type]
        param = params[np.random.randint(0, len(params))]
        current_val = layer[param]
        values = param_values[param]
        index = values.index(current_val)
        max_index = len(values)
        new_val = values[(index + delta) % max_index]
        layer[param] = new_val


# 以隨機搜尋找出的最佳架構作為登山法起點
model_definition = best_model

for i in range(CANDIDATE_EVALUATIONS):
    valid_model = False
    while(valid_model == False):
        old_model_definition = copy.deepcopy(model_definition)
        tweak_model(model_definition)
        acc = create_and_evaluate_model(model_definition)
        if acc > 0.0:
            valid_model = True
        else:
            model_definition = old_model_definition
    if acc > val_accuracy:
        best_model = copy.deepcopy(model_definition)
        val_accuracy = acc
    else:
        model_definition = old_model_definition
    print('Hill climbing, best accuracy: %5.2f' %val_accuracy)


Epoch 1/3
782/782 - 8s - loss: 2.1973 - accuracy: 0.2593 - val_loss: 1.8195 - val_accuracy: 0.3574 - 8s/epoch - 10ms/step
Epoch 2/3
782/782 - 8s - loss: 1.9063 - accuracy: 0.3214 - val_loss: 1.7451 - val_accuracy: 0.3820 - 8s/epoch - 10ms/step
Epoch 3/3
782/782 - 9s - loss: 1.8439 - accuracy: 0.3432 - val_loss: 1.7031 - val_accuracy: 0.3962 - 9s/epoch - 11ms/step
Size:  23946.0
Accuracy:  0.40
Hill climbing, best accuracy:  0.40
Epoch 1/3
782/782 - 11s - loss: 3.0463 - accuracy: 0.1513 - val_loss: 2.0062 - val_accuracy: 0.2990 - 11s/epoch - 14ms/step
Epoch 2/3
782/782 - 9s - loss: 2.1347 - accuracy: 0.2019 - val_loss: 1.9466 - val_accuracy: 0.3153 - 9s/epoch - 11ms/step
Epoch 3/3
782/782 - 10s - loss: 2.1082 - accuracy: 0.2121 - val_loss: 1.9244 - val_accuracy: 0.3176 - 10s/epoch - 13ms/step
Size:  438986.0
Accuracy:  0.32
Hill climbing, best accuracy:  0.40
Epoch 1/3
782/782 - 9s - loss: 2.0624 - accuracy: 0.2476 - val_loss: 1.7930 - val_accuracy: 0.3558 - 9s/epoch - 12ms/step
Epoch 2

程式B-6

In [None]:
# 最後脫穎而出的模型經充分訓練後、進行最後評估
model = create_model(best_model)
model.summary()
model.compile(loss='categorical_crossentropy',
              optimizer='adam', metrics=['accuracy'])
history = model.fit(
    train_images, train_labels, validation_data =
    (test_images, test_labels), epochs=FINAL_EPOCHS, batch_size=64,
    verbose=2, shuffle=True)
