# Hands-on Tutorial: Introduction to 
# Incremental Learning + Auto-tuning during Neural Architecture Search 

### Requirements:

GPU(s) with CUDA support

Python >= 3.6

Tensorflow = 2.3

CUDA = 10.2

CuDNN = 7.6.2

### Imports

In [1]:
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
import tensorflow_datasets as tfds
import matplotlib.pyplot as plt
import numpy as np
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras import Model


#### Set GPU memory usage limit
##### Try/experiment with respect to the complexity of the model and size of the batch data

In [2]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    # Restrict TensorFlow to only allocate 1GB * 4 of memory on the first GPU
    try:
        tf.config.experimental.set_virtual_device_configuration(
            gpus[0],
            [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=1024 * 4)])
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Virtual devices must be set before GPUs have been initialized
        print(e)
        
tf.debugging.set_log_device_placement(True)

1 Physical GPUs, 1 Logical GPUs


### Data

In [3]:
dataset = datasets.cifar10.load_data()

##### Helper function to slice/split the dataset with target labels 

In [4]:
def custom_dataset_loader(dataset = dataset, allowed_labels =None):
  
    filtered_training_data = []
    filtered_labels = []
    # Pick only the data with required labels
    for i, _ in enumerate(dataset[0][0]):
        if dataset[0][1][i] in allowed_labels:
            filtered_training_data.append(dataset[0][0][i]/np.float32(255))
            filtered_labels.append(dataset[0][1][i])

    train_dataset = tf.data.Dataset.from_tensor_slices((filtered_training_data, filtered_labels))
    # TODO: new_dataset = train_dataset+old_dataset
    train_dataset.shuffle(len(list(train_dataset)))
    
    # Split into train and test (80%, 20%)
    trainind_dataset_size = int(len(list(train_dataset))*0.8)
    train_ds,test_ds = train_dataset.take(trainind_dataset_size), train_dataset.skip(trainind_dataset_size)

    # Split the images and labels in train and test datasets
    train_images,train_labels= np.array(list(train_ds))[:,0],np.array(list(train_ds))[:,1]
    train_images,train_labels = tf.convert_to_tensor(train_images.tolist()), tf.convert_to_tensor(train_labels.tolist())
    test_images,test_labels = np.array(list(test_ds))[:,0],np.array(list(test_ds))[:,1]
    test_images,test_labels = tf.convert_to_tensor(test_images.tolist()),tf.convert_to_tensor(test_labels.tolist())
    print('Shape of training images after filtering labels',np.shape(train_images))

    return train_images,train_labels,test_images,test_labels

### Custom Feature extractor and Classifier Models

#### CNN has two basic parts: 
Feature extractors with one or more convolutional layers,

Classifier, which is usually single or multi fully connected layers.   

In [5]:
class FeatureExtractor(Model):
    def __init__(self,trainable):
        super(FeatureExtractor, self).__init__()
        self.conv1 = Conv2D(32, 3, activation='relu',trainable = trainable)
        self.maxpool = MaxPooling2D((2,2))
        self.conv2 = Conv2D(64, 3, activation='relu',trainable = trainable)
        self.conv3 = Conv2D(64,3,activation='relu',trainable = trainable)
    def call(self, x):
        x = self.conv1(x)
        x = self.maxpool(x)
        x = self.conv2(x)
        x = self.maxpool(x)
        x = self.conv3(x)
        return x

class Classifier(Model):
    def __init__(self, n_classes):
        super(Classifier, self).__init__()
        self.flatten = Flatten()
        self.n_classes = n_classes
        self.d1 = Dense(64, activation='relu')
        self.d2 = Dense(n_classes)

    def call(self, x):
        x = self.flatten(x)
        x = self.d1(x)
        return self.d2(x)


### Instantiate and test the model wrappers

In [6]:
x = Classifier(10)
x.build((64,)) # Build using the input size
var= [var.name for var in x.trainable_variables]
var

['dense/kernel:0', 'dense/bias:0', 'dense_1/kernel:0', 'dense_1/bias:0']

### Sample Model: Wrapper for both Feature extractor and Classifier 

In [7]:
class SampleModel(Model):
    def __init__(self, feature_extractor,classifier, is_online = False):
        super(SampleModel, self).__init__()
        self.feature_extractor=feature_extractor
        if is_online:
            self.feature_extractor.trainable = False
            # Clone the old classifier weights and  set it non-trainable
            classifier.trainable = False
            # Add new prediction layer and set it trainable
            self.new_prediction_layer = tf.keras.layers.Dense(classifier.n_classes,trainable=True)
            self.classifier= tf.keras.Sequential([classifier, self.new_prediction_layer])
        else:
            self.classifier = classifier

    def call(self, x):
        features = self.feature_extractor(x)
        x = self.classifier(features)

        return x

In [8]:
### Instantiate classifier and the feature extractor

In [9]:
feature_extractor = FeatureExtractor(trainable=True)
classifier = Classifier(10)
model = SampleModel(feature_extractor,classifier,is_online=False)

In [10]:
model.build((None,32,32,3))

In [11]:
model.summary()

Model: "sample_model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
feature_extractor (FeatureEx multiple                  56320     
_________________________________________________________________
classifier_1 (Classifier)    multiple                  66250     
Total params: 122,570
Trainable params: 122,570
Non-trainable params: 0
_________________________________________________________________


In [12]:
feature_extractor1 = FeatureExtractor(trainable=False)
classifier1 = Classifier(10)
model2 = SampleModel(feature_extractor,classifier,is_online=True)


In [13]:
model2.build((None,32,32,3))
model2.summary()

Model: "sample_model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
feature_extractor (FeatureEx multiple                  56320     
_________________________________________________________________
dense_6 (Dense)              (None, 10)                110       
_________________________________________________________________
sequential (Sequential)      (None, 10)                66360     
Total params: 122,680
Trainable params: 110
Non-trainable params: 122,570
_________________________________________________________________


### Training Loop


#### Dataloader

In [14]:
old_labels = [0,1,2,3,4,5,6,7]
new_labels = []
allowed_labels = old_labels + new_labels
train_images, train_labels, test_images,test_labels = custom_dataset_loader(dataset = dataset, 
                                                                          allowed_labels = allowed_labels)

Shape of training images after filtering labels (32000, 32, 32, 3)


#### Checkpoints and Callbacks

In [15]:
import os
checkpoint_path = "training_1/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

#### Train using Mirror strategy

In [16]:
strategy = tf.distribute.MirroredStrategy()

with strategy.scope():

    feature_extractor = FeatureExtractor(trainable=False)
    classifier = Classifier(n_classes=8)
    model = SampleModel(feature_extractor,classifier,is_online = True)
    # Compile model
    model.compile(optimizer='adam',
                loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                metrics=['acc'])

    # Create a callback that saves the model's weights
    cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                  save_weights_only=True,
                                                  verbose=1)
    # Train the model
    history = model.fit(train_images, train_labels, epochs=10, 
                      validation_data=(test_images, test_labels),callbacks=[cp_callback])

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)
Epoch 1/10
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replic

### Pretrained Feature extractor model (ResNet) and Custom classifier model

#### Dataloader

In [17]:
old_labels = [0,1,2,3,4,5,6,7]
new_labels = [] # remaining labels are 8 and 9
allowed_labels = old_labels + new_labels
train_images,train_labels,test_images,test_labels = custom_dataset_loader(dataset = dataset, 
                                                                          allowed_labels = allowed_labels)

Shape of training images after filtering labels (32000, 32, 32, 3)


#### Check points and callbacks


In [18]:
checkpoint_path = "training_2/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

In [19]:
class Classifier(Model):
    def __init__(self, n_classes):
        super(Classifier, self).__init__()
        self.flatten = Flatten()
        self.n_classes = n_classes
        self.d1 = Dense(64, activation='relu')
        self.d2 = Dense(n_classes)

    def call(self, x):
        x = self.flatten(x)
        x = self.d1(x)
        return self.d2(x)


In [20]:
# This custom classifier is assumed to be pretrained. A new classifier layer will be created and will be set to trainable
# while instantiating SampleModel class
feature_extractor = tf.keras.applications.ResNet50()
classifier = Classifier(n_classes = 8)
model = SampleModel(feature_extractor,classifier,is_online=True)

In [21]:
model.build((None,32,32,3))
model.summary()

Model: "sample_model_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
resnet50 (Functional)        (None, 1000)              25636712  
_________________________________________________________________
dense_12 (Dense)             (None, 8)                 72        
_________________________________________________________________
sequential_2 (Sequential)    (None, 8)                 64656     
Total params: 25,701,368
Trainable params: 72
Non-trainable params: 25,701,296
_________________________________________________________________


#### Train only the classifier
Note: There is a part of classifier already trained at previous run;
Therefore, by adding new classifier inside sample model instance, will set the previous sequential layer of classifier False. Only the new instance of the layer appended to the classifier will be trained

In [22]:
strategy = tf.distribute.MirroredStrategy()
with strategy.scope():

    feature_extractor = tf.keras.applications.ResNet50()
    classifier = Classifier(n_classes=8)
    model = SampleModel(feature_extractor,classifier,is_online = True)
    # Compile model
    model.compile(optimizer='adam',
                loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                metrics=['acc'])

    # Create a callback that saves the model's weights
    cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                  save_weights_only=True,
                                                  verbose=1)
    # Train the model
    history = model.fit(train_images, train_labels, epochs=10, 
                      validation_data=(test_images, test_labels),callbacks=[cp_callback])

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)
Epoch 1/10

Epoch 00001: saving model to training_2\cp.ckpt
Epoch 2/10
Epoch 00002: saving model to training_2\cp.ckpt
Epoch 3/10
Epoch 00003: saving model to training_2\cp.ckpt
Epoch 4/10
Epoch 00004: saving model to training_2\cp.ckpt
Epoch 5/10
Epoch 00005: saving model to training_2\cp.ckpt
Epoch 6/10
Epoch 00006: saving model to training_2\cp.ckpt
Epoch 7/10
Epoch 00007: saving model to training_2\cp.ckpt
Epoch 8/10
Epoch 00008: saving model to training_2\cp.ckpt
Epoch 9/10
Epoch 00009: saving model to training_2\cp.ckpt
Epoch 10/10
Epoch 00010: saving model to training_2\cp.ckpt


### Auto tune wrapper with TF callbacks

#### Sample model with Pretrained Resnet as feature extractor and new custom classifier 

In [23]:
class Classifier(Model):
    def __init__(self, n_units, n_layers, dropout_rate, n_classes):
        super(Classifier, self).__init__()
        self.flatten = Flatten()
        self.n_units = n_units
        self.n_layers = n_layers
        self.dropout_rate = dropout_rate
        self.n_classes = n_classes

        self.model= tf.keras.models.Sequential()
        for layer in range(n_layers):
            self.model.add(tf.keras.layers.Dense(self.n_units, activation='relu', 
                                                 kernel_initializer='glorot_normal',
                                                 kernel_regularizer='l2'))
            self.model.add(tf.keras.layers.Dropout(self.dropout_rate))

        self.model.add(tf.keras.layers.Dense(self.n_classes, activation = tf.nn.softmax,
                                             kernel_initializer='glorot_normal', 
                                             kernel_regularizer='l2'))
    def call(self, x):
        x = self.flatten(x)
        x = self.model(x)
        return x

class SampleModel(Model):
    def __init__(self, feature_extractor,classifier):
        super(SampleModel, self).__init__()
        self.feature_extractor=feature_extractor
        self.feature_extractor.trainable = False
        self.classifier = classifier

    def call(self, x):
        features = self.feature_extractor(x)
        x = self.classifier(features)
        return x

### Tensorboard to visualize the performance of the network with combination of hyperparameters

In [24]:
!rm -rf ./logs/

In [25]:
import tensorflow as tf
from tensorboard.plugins.hparams import api as hp
# GPU Distributed training with Mirror Strategy
strategy = tf.distribute.MirroredStrategy()

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)


### Discrete combination of all hyperparameters; Random search is another option.

In [37]:
HP_NUM_UNITS = hp.HParam('HP_NUM_UNITS', hp.Discrete([8,64]))
HP_NUM_LAYERS = hp.HParam('HP_NUM_LAYERS',hp.Discrete([1,5]))
HP_OPTIMIZER = hp.HParam('HP_OPTIMIZER', hp.Discrete(['adam', 'nadam']))
# HP_OPTIMIZER = hp.HParam('HP_OPTIMIZER', hp.Discrete(['adam']))
HP_DROPOUT = hp.HParam('HP_DROPOUT', hp.RealInterval(0.1,0.2))


In [38]:
METRIC_ACCURACY = 'accuracy'

with tf.summary.create_file_writer('logs/hparam_tuning').as_default():
    hp.hparams_config(
    hparams=[HP_NUM_UNITS, HP_NUM_LAYERS, HP_DROPOUT, HP_OPTIMIZER],
    metrics=[hp.Metric(METRIC_ACCURACY, display_name='Accuracy')],
    )

### Adapt tensorflow runs to log hyperparameters and metrics 

In [39]:
def train_test_model(run_dir, hparams):
    with strategy.scope():
        feature_extractor = tf.keras.applications.ResNet50()
        classifier = Classifier(hparams[HP_NUM_UNITS], 
                                hparams[HP_NUM_LAYERS],
                                hparams[HP_DROPOUT],
                                n_classes=8)  # Change the number of classes
        model = SampleModel(feature_extractor,classifier)
        
        model.compile(optimizer=hparams[HP_OPTIMIZER],
                        loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                        metrics=['accuracy'])

        cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                        save_weights_only=True,
                                                        verbose=1)
        
        hparams_callback = tf.keras.callbacks.TensorBoard(run_dir + "/keras")

        history = model.fit(train_images, train_labels, epochs=3) # Just 3 epochss
        _, accuracy = model.evaluate(test_images,test_labels)
        return accuracy

For each run, log an hparams summary with the hyperparameters and final accuracy:

In [40]:
def run(run_dir, hparams):
    with tf.summary.create_file_writer(run_dir).as_default():
        hp.hparams(hparams)  # record the values used in this trial
        accuracy = train_test_model(run_dir,hparams)
        tf.summary.scalar(METRIC_ACCURACY, accuracy, step=1)

In [None]:
session_num = 0
for num_layers in HP_NUM_LAYERS.domain.values:
    for num_units in HP_NUM_UNITS.domain.values:
        for dropout_rate in (HP_DROPOUT.domain.min_value, HP_DROPOUT.domain.max_value):
            for optimizer in HP_OPTIMIZER.domain.values:
                hparams = {
                    HP_NUM_LAYERS:num_layers,
                    HP_NUM_UNITS: num_units,
                    HP_DROPOUT: dropout_rate,
                    HP_OPTIMIZER: optimizer,
                }
                run_name = "run-%d" % session_num
                print('--- Starting trial: %s' % run_name)
                print({h.name: hparams[h] for h in hparams})
                run('logs/hparam_tuning/' + run_name, hparams)
                session_num += 1


--- Starting trial: run-0
{'HP_NUM_LAYERS': 1, 'HP_NUM_UNITS': 8, 'HP_DROPOUT': 0.1, 'HP_OPTIMIZER': 'adam'}
Epoch 1/3
Epoch 2/3
Epoch 3/3
--- Starting trial: run-1
{'HP_NUM_LAYERS': 1, 'HP_NUM_UNITS': 8, 'HP_DROPOUT': 0.1, 'HP_OPTIMIZER': 'nadam'}
Epoch 1/3
Epoch 2/3
Epoch 3/3
--- Starting trial: run-2
{'HP_NUM_LAYERS': 1, 'HP_NUM_UNITS': 8, 'HP_DROPOUT': 0.2, 'HP_OPTIMIZER': 'adam'}
Epoch 1/3
Epoch 2/3
Epoch 3/3
--- Starting trial: run-3
{'HP_NUM_LAYERS': 1, 'HP_NUM_UNITS': 8, 'HP_DROPOUT': 0.2, 'HP_OPTIMIZER': 'nadam'}
Epoch 1/3
Epoch 2/3
Epoch 3/3
--- Starting trial: run-4
{'HP_NUM_LAYERS': 1, 'HP_NUM_UNITS': 64, 'HP_DROPOUT': 0.1, 'HP_OPTIMIZER': 'adam'}
Epoch 1/3
Epoch 2/3
Epoch 3/3
--- Starting trial: run-5
{'HP_NUM_LAYERS': 1, 'HP_NUM_UNITS': 64, 'HP_DROPOUT': 0.1, 'HP_OPTIMIZER': 'nadam'}
Epoch 1/3
Epoch 2/3
Epoch 3/3
--- Starting trial: run-6
{'HP_NUM_LAYERS': 1, 'HP_NUM_UNITS': 64, 'HP_DROPOUT': 0.2, 'HP_OPTIMIZER': 'adam'}
Epoch 1/3
Epoch 2/3
Epoch 3/3
--- Starting trial: r

Epoch 1/3
Epoch 2/3
Epoch 3/3
--- Starting trial: run-8
{'HP_NUM_LAYERS': 5, 'HP_NUM_UNITS': 8, 'HP_DROPOUT': 0.1, 'HP_OPTIMIZER': 'adam'}
Epoch 1/3
Epoch 2/3
Epoch 3/3
--- Starting trial: run-9
{'HP_NUM_LAYERS': 5, 'HP_NUM_UNITS': 8, 'HP_DROPOUT': 0.1, 'HP_OPTIMIZER': 'nadam'}
Epoch 1/3
Epoch 2/3
Epoch 3/3
--- Starting trial: run-10
{'HP_NUM_LAYERS': 5, 'HP_NUM_UNITS': 8, 'HP_DROPOUT': 0.2, 'HP_OPTIMIZER': 'adam'}
Epoch 1/3
Epoch 2/3
Epoch 3/3

In [36]:
%load_ext tensorboard
%tensorboard --logdir logs/hparam_tuning

Reusing TensorBoard on port 6006 (pid 1260), started 2:35:12 ago. (Use '!kill 1260' to kill it.)