# Chapter 5 Code

Covers code for Chapter 5, "Automating Model Design with Meta-Optimization", of *Modern Deep Learning Design and Application*.

---

## Installing & Importing Libraries

In [None]:
# installing packages
!pip install hyperopt
!pip install hyperas
!pip install autokeras

# array processing
import numpy as np

# deep learning staple libraries
import tensorflow as tf
from tensorflow import keras

# meta-optimization
import hyperopt
import hyperas

# neural architecture search
import autokeras as ak

Collecting hyperas
  Downloading hyperas-0.4.1-py3-none-any.whl (9.1 kB)
Installing collected packages: hyperas
Successfully installed hyperas-0.4.1
Collecting autokeras
  Downloading autokeras-1.0.16-py3-none-any.whl (166 kB)
[K     |████████████████████████████████| 166 kB 599 kB/s 
[?25hCollecting tensorflow<=2.5.0,>=2.3.0
  Downloading tensorflow-2.5.0-cp37-cp37m-manylinux2010_x86_64.whl (454.3 MB)
[K     |████████████████████████████████| 454.3 MB 14 kB/s 
Collecting gast==0.4.0
  Downloading gast-0.4.0-py3-none-any.whl (9.8 kB)
Collecting h5py~=3.1.0
  Downloading h5py-3.1.0-cp37-cp37m-manylinux1_x86_64.whl (4.0 MB)
[K     |████████████████████████████████| 4.0 MB 49.7 MB/s 
Collecting grpcio~=1.34.0
  Downloading grpcio-1.34.1-cp37-cp37m-manylinux2014_x86_64.whl (4.0 MB)
[K     |████████████████████████████████| 4.0 MB 19.3 MB/s 
[?25hCollecting tensorflow-estimator<2.6.0,>=2.5.0rc0
  Downloading tensorflow_estimator-2.5.0-py2.py3-none-any.whl (462 kB)


---

## Loading Data

For data, we'll use the CIFAR-10 dataset, with a small adaptation to decrease the size of the dataset for faster meta-optimization training - only data instances with a label of 0 or 1 are included.

In [None]:
# load cifar-10 data
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()

# get training data
valid_train_indices = (y_train.reshape(len(y_train))==0)|(y_train.reshape(len(y_train))==1)
x_train = x_train[valid_train_indices]
y_train = y_train[valid_train_indices]

# get testing data
valid_test_indices = (y_test.reshape(len(y_test))==0)|(y_test.reshape(len(y_test))==1)
x_test = x_test[valid_test_indices]
y_test = y_test[valid_test_indices]

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


---

## HyperOpt

### Finding Minimum of a Continuous Function

Code to use Bayesian optimization via TPE algorithm to find the minimum of the function $f(x)=(x-1)^2$.

In [None]:
# define the search space
from hyperopt import hp
space = {'x':hp.normal('x', mu=0, sigma=10)}

# define objective function
def obj_func(params):
    return (params['x']-1)**2

# perform minimization procedure
from hyperopt import fmin, tpe
best = fmin(obj_func, space, algo=tpe.suggest, max_evals=500)

100%|██████████| 500/500 [00:02<00:00, 220.81trial/s, best loss: 1.1111692817169986e-05]


### Finding Minimum of a Non-continuous Function Using Statuses

Code to find the minimum of the function $\left|\frac{1}{x}\right| + x^2$, a function that is undefined at $x=0$, to demonstrate the usage of `ok` and `fail` statuses.

In [None]:
# define the search space
from hyperopt import hp
space = {'x':hp.normal('x', mu=0, sigma=10)}

# define objective function
def obj_func(params):
    if params['x']==0:
        return {'status':'fail'}
    return {'loss':np.abs(1/params['x'])+params['x']**2,
            'status':'ok'}

# perform minimization procedure
from hyperopt import fmin, tpe
best = fmin(obj_func, space, algo=tpe.suggest, max_evals=500)

100%|██████████| 500/500 [00:02<00:00, 205.73trial/s, best loss: 1.8898816910206948]


### Finding Optimal Optimizer and Learning Rate in CNN

In [None]:
# define search space
from keras.optimizers import Adam, RMSprop, SGD
optimizers = [Adam, RMSprop, SGD]
space = {'optimizer':hp.choice('optimizer',optimizers),
         'lr':hp.lognormal('lr', mu=0.005, sigma=0.001)}

# import necessary model and layers
from keras.models import Sequential
import keras.layers as L

# build objective function
def objective(params):

    # build model
    model = Sequential()
    model.add(L.Input((32,32,3)))
    for i in range(4):
        model.add(L.Conv2D(32, (3,3), activation='relu'))
    model.add(L.Flatten())
    model.add(L.Dense(64, activation='relu'))
    model.add(L.Dense(1, activation='sigmoid'))

    # compile
    optimizer = params['optimizer'](lr=params['lr'])
    model.compile(loss='binary_crossentropy',
                  optimizer=optimizer,
                  metrics=['accuracy'])

    # fit
    model.fit(x_train, y_train, epochs=1, verbose=0) # increase epochs for better performance

    # evaluate accuracy (second elem. w/ .evaluate())
    acc = model.evaluate(x_test, y_test, verbose=0)[1]

    # return negative of acc such that smaller = better
    return -acc

# perform search
best = fmin(objective, space, algo=tpe.suggest, max_evals=1) # increase evals for better performance

  0%|          | 0/1 [00:00<?, ?trial/s, best loss=?]

  "The `lr` argument is deprecated, use `learning_rate` instead.")



100%|██████████| 1/1 [00:53<00:00, 53.81s/trial, best loss: -0.5]


### Finding Optimal Model Architecture

In [None]:
# specify important parameters for search space
min_num_convs = 3
max_num_convs = 8
min_num_dense = 2
max_num_dense = 5

# obtain list of dropout rates
conv_drs, dense_drs = [], []
for layer in range(max_num_convs):
    conv_drs.append(hp.normal(f'c{layer}', 0.15, 0.1))
for layer in range(max_num_dense):
    dense_drs.append(hp.normal(f'd{layer}', 0.2, 0.1))

# define search space
space = {'#convs':hp.quniform('#convs',
                              min_num_convs,
                              max_num_convs,
                              q=1),
         '#dense':hp.quniform('#dense',
                              min_num_dense,
                              max_num_dense,
                              q=1),
         'conv_dr':conv_drs,
         'dense_dr':dense_drs}

# define objective function
def objective(params):
    
    # convert set of params to list for mutability
    conv_drs = list(params['conv_dr'])
    dense_drs = list(params['dense_dr'])
    
    # make sure dropout rate is 0 <= r < 1
    for ind in range(len(conv_drs)):
        if conv_drs[ind] > 0.9:
            conv_drs[ind] = 0.9
        if conv_drs[ind] < 0:
            conv_drs[ind] = 0
    for ind in range(len(dense_drs)):
        if dense_drs[ind] > 0.9:
            dense_drs[ind] = 0.9
        if dense_drs[ind] < 0:
            dense_drs[ind] = 0
    
    # build model template + input
    model = Sequential()
    model.add(L.Input((32,32,3))) 

    # build convolutional component
    for ind in range(int(params['#convs'])):

        # add convolutional layer
        model.add(L.Conv2D(32, (3,3), activation='relu'))

        # add corresponding dropout rate
        model.add(L.Dropout(conv_drs[ind]))

    # add flattening for dense component
    model.add(L.Flatten()) 
    
    # build dense component
    for ind in range(int(params['#dense'])):

        # add dense layer
        model.add(L.Dense(32, activation='relu'))

        # add corresponding dropout rate
        model.add(L.Dropout(dense_drs[ind]))
    
    # add output
    model.add(L.Dense(1, activation='sigmoid'))
    
    # compile
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

    # fit
    model.fit(x_train, y_train, epochs=1, verbose=0) # increase epochs for better performance

    # evaluate accuracy (second elem. w/ .evaluate())
    acc = model.evaluate(x_test, y_test, verbose=0)[1]

    # return negative of acc such that smaller = better
    return -acc

# perform search
best = fmin(objective, space, algo=tpe.suggest, max_evals=1) # increase evals for better performance

100%|██████████| 1/1 [01:08<00:00, 68.88s/trial, best loss: -0.5]


---

## Hyperas

### Optimizing Training Procedure

In [None]:
from hyperas.distributions import choice, lognormal
from keras.optimizers import Adam, RMSprop, SGD

# define data feeder function
def data():
    # load cifar-10 dat
    (x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()

    # get training data
    valid_train_indices = (y_train.reshape(len(y_train))==0)|(y_train.reshape(len(y_train))==1)
    x_train = x_train[valid_train_indices]
    y_train = y_train[valid_train_indices]

    # get testing data
    valid_test_indices = (y_test.reshape(len(y_test))==0)|(y_test.reshape(len(y_test))==1)
    x_test = x_test[valid_test_indices]
    y_test = y_test[valid_test_indices]
    
    return x_train, y_train, x_test, y_test

# define objective function
def obj_func(x_train, y_train, x_test, y_test):

    # import keras layers and sequential model
    from keras.models import Sequential
    import keras.layers as L
    
    # define model
    model = Sequential()
    model.add(L.Input((32,32,3)))
    for i in range(4):
        model.add(L.Conv2D(32, (3,3), activation='relu'))
    model.add(L.Flatten())
    model.add(L.Dense(64, activation='relu'))
    model.add(L.Dense(1, activation='sigmoid'))
    
    # sample lr and optimizer (not instantiated yet)
    lr = {{lognormal(mu=0.005, sigma=0.001)}}
    optimizer_obj = {{choice([Adam, RMSprop, SGD])}}

    # instantiate sampled optimizer with sampled lr
    optimizer = optimizer_obj(lr=lr)
    
    # compile with sampled parameters
    model.compile(loss='binary_crossentropy',
                  optimizer=optimizer,
                  metrics=['accuracy'])
    
    # fit and evaluate
    model.fit(x_train, y_train, epochs=1, verbose=0) # increase epochs for better performance
    acc = model.evaluate(x_test, y_test, verbose=0)[1]

    # return loss, OK status, and trained candidate model
    return {'loss':-acc, 'status':'ok', 'model':model}

# run minimization procedure
from hyperas import optim
from hyperopt import tpe, Trials
best_pms, best_model = optim.minimize(model=obj_func,
                                      data=data,
                                      algo=tpe.suggest,
                                      max_evals=1, # increase evals for better performance
                                      trials=Trials(),
                                      notebook_name='name') # change this to reflect your notebook location and name

Make sure to add/remove the `notebook_name` parameter from `optim.minimize()` as appropriate depending on if you are using Jupyter Notebook or not.

### Optimizing Architecture

In [None]:
# define objective function
def obj_func(x_train, y_train, x_test, y_test):
    
    # create rounding function
    import keras.layers as L
    r = lambda x_: 0 if x_<0 else (0.9 if x_>0.9 else x_)
    
    # import keras layers and sequential model
    from keras.models import Sequential
    import keras.layers as L

    # create model template and input
    model = keras.models.Sequential()
    model.add(L.Input((32,32,3)))

    # build convolutional component
    for ind in range(int({{quniform(3,8,1)}})):
        model.add(L.Conv2D(32, (3,3), activation='relu'))
        model.add(L.Dropout(r({{normal(0.2,0.1)}})))
    
    # add flattening layer for FC component
    model.add(L.Flatten())
    
    # build FC component
    for ind in range(int({{quniform(2,5,1)}})):
        model.add(L.Dense(32, activation='relu'))
        model.add(L.Dropout(r({{normal(0.2,0.1)}})))
    
    # add output layer
    model.add(L.Dense(1, activation='sigmoid'))
     
    # compile, fit, evaluate, and return
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    model.fit(x_train, y_train, epochs=1, verbose=0) # increase epochs for better performance
    acc = model.evaluate(x_test, y_test, verbose=0)[1]
    return {'loss':-acc, 'status':'ok', 'model':model}

# run minimization procedure
best_pms, best_model = optim.minimize(model=obj_func,
                                      data=data,
                                      algo=tpe.suggest,
                                      max_evals=1, # increase evals for better performance
                                      trials=Trials(),
                                      notebook_name='name') # change this to reflect your notebook location and name

---

## Auto-Keras

Note that Auto-Keras is quite memory consuming. If you run multiple meta-optimization campaigns in one session, expect for memory problems.

### Simple Image Block

In [None]:
# define architecture
inp = ak.ImageInput()
imageblock = ak.ImageBlock()(inp)
output = ak.ClassificationHead()(imageblock)

# aggregate into model
search = ak.AutoModel(
    inputs=inp, outputs=output, max_trials=1 # increase max trials for better performance
)

# fit
search.fit(x_train, y_train, epochs=1) # increase epochs for better performance

# export model
best_model = search.export_model()

### Custom Search Space

In [None]:
# define architecture
inp = ak.ImageInput()
aug = ak.ImageAugmentation(translation_factor=0.1,
                           vertical_flip=False,
                           horizontal_flip=True)(inp)
resnetblock = ak.ResNetBlock(pretrained=True,
                             version=None)(aug)
denseblock = ak.DenseBlock()(resnetblock)
output = ak.ClassificationHead()(denseblock)

# aggregate into model
search = ak.AutoModel(
    inputs=inp, outputs=output, max_trials=1 # increase max trials for better performance
)

# fit
search.fit(x_train, y_train, epochs=1) # increase epochs for better performance

# export model
best_model = search.export_model()

### Nonlinear Topology

In [None]:
# define architecture
inp = ak.ImageInput()
resnetblock = ak.ResNetBlock(pretrained=True)(inp)
xceptionblock = ak.XceptionBlock(pretrained=True)(inp)
merge = ak.Merge()([resnetblock, xceptionblock])
denseblock = ak.DenseBlock()(merge)
output = ak.ClassificationHead()(denseblock)

# aggregate into model
search = ak.AutoModel(
    inputs=inp, outputs=output, max_trials=1 # increase max trials for better performance
)

# fit
search.fit(x_train, y_train, epochs=1) # increase epochs for better performance

# export model
best_model = search.export_model()

---

## NASNet

In [None]:
# import nasnet
from keras.applications.nasnet import NASNetLarge

# define architecture
inp = L.Input((32,32,3))
resize = tf.keras.layers.experimental.preprocessing.Resizing(331,331)(inp)
nasnet = NASNetLarge(include_top=False)(resize)
pool = L.GlobalMaxPooling2D()(nasnet)
process = L.Dense(64, activation='relu')(nasnet)
process2 = L.Dense(64, activation='relu')(process)
output = L.Dense(10, activation='softmax')(process2)
model = keras.models.Model(inputs=inp, outputs=output)

# compile
model.compile(loss='sparse_categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

# fit model
model.fit(x_train, y_train, epochs=1) # increase epochs for better performance

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/nasnet/NASNet-large-no-top.h5


---

## Versions

See versions for all libraries used if you run into any errors.

In [None]:
!pip list

Package                        Version              Location
------------------------------ -------------------- --------------
absl-py                        0.12.0
affine                         2.3.0
aiobotocore                    1.3.1
aiohttp                        3.7.4.post0
aiohttp-cors                   0.7.0
aioitertools                   0.7.1
aioredis                       1.3.1
albumentations                 1.0.1
alembic                        1.6.5
allennlp                       2.5.0
altair                         4.1.0
annoy                          1.17.0
ansiwrap                       0.8.4
anyio                          3.2.0
appdirs                        1.4.4
argon2-cffi                    20.1.0
arrow                          1.1.0
arviz                          0.11.2
asn1crypto                     1.4.0
astunparse                     1.6.3
async-generator                1.10
async-timeout                  3.0.1
attrs                    

---