In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"

import numpy as np
from sklearn.utils import shuffle
from tensorflow.keras.datasets.cifar10 import load_data
import tensorflow as tf

# Sometimes TF will spam WARNINGS, in order to shut it - set to print only errors
# tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

import tensorflow.keras.layers as L

from typing import List, Optional

from .micro_child import MicroChild
from .micro_lstm import MicroLSTM

In [6]:
BATCH_SIZE = 160 # 160
SEED = None

(Xtrain, Ytrain), (Xtest, Ytest) = load_data()

Xtrain = Xtrain / 255
Xtest = Xtest / 255

Ytrain = Ytrain.astype(np.int32)
Ytest = Ytest.astype(np.int32)


def pre_process(x, cutout_size=None, seed=None):
    x = tf.pad(x, [[4, 4], [4, 4], [0, 0]])
    x = tf.image.random_crop(x, [32, 32, 3], seed=seed)
    x = tf.image.random_flip_left_right(x, seed=seed)
    if cutout_size is not None:
        mask = tf.ones([cutout_size, cutout_size], dtype=tf.int32)
        start = tf.random_uniform([2], minval=0, maxval=32, dtype=tf.int32)
        mask = tf.pad(mask, [[cutout_size + start[0], 32 - start[0]],
                           [cutout_size + start[1], 32 - start[1]]])
        mask = mask[cutout_size: cutout_size + 32,
                  cutout_size: cutout_size + 32]
        mask = tf.reshape(mask, [32, 32, 1])
        mask = tf.tile(mask, [1, 1, 3])
        x = tf.where(tf.equal(mask, 0), x=x, y=tf.zeros_like(x))
    return x


def data_generator(x_data, y_data, use_preprocess=True):
    assert len(x_data) == len(y_data)
    x_data, y_data = shuffle(x_data, y_data)
    counter = 0
    while True:
        yield (
            pre_process(x_data[counter]) if use_preprocess else x_data[counter], 
            y_data[counter]
        )
        counter += 1
        if counter == len(x_data):
            break

train_dataset = tf.data.Dataset.from_generator(
     lambda: data_generator(Xtrain, Ytrain),
     output_signature=(
         tf.TensorSpec(shape=(32, 32, 3), dtype=tf.float32),
         tf.TensorSpec(shape=(1,), dtype=tf.int32)
     )
)
train_dataset = train_dataset.repeat()
train_dataset = train_dataset.batch(BATCH_SIZE)

test_dataset = tf.data.Dataset.from_generator(
     lambda: data_generator(Xtest, Ytest, use_preprocess=False),
     output_signature=(
         tf.TensorSpec(shape=(32, 32, 3), dtype=tf.float32),
         tf.TensorSpec(shape=(1,), dtype=tf.int32)
     )
)
test_dataset = test_dataset.batch(BATCH_SIZE)

iterations_per_epoch = len(Xtrain)//BATCH_SIZE

In [7]:
iter_train_dataset = iter(train_dataset)
x,y=next(iter_train_dataset)

In [9]:
micro_lstm = MicroLSTM(
    num_branches=5, num_cells=5, lstm_size=64, # temperature=5.0, 
    tanh_constant=1.1, entropy_weight=0.0001,
    op_tanh_reduce=2.5, decay=0.99
)

In [10]:
opt_nas = tf.keras.optimizers.SGD(
    learning_rate=tf.keras.optimizers.schedules.CosineDecayRestarts(
        0.05, first_decay_steps=iterations_per_epoch * 10, 
        t_mul=2, m_mul=1, alpha=5e-4
    ),
    momentum=0.9, nesterov=True
)
"""
opt_nas = tf.keras.optimizers.Adam(
    learning_rate=tf.keras.optimizers.schedules.CosineDecayRestarts(
        0.05, first_decay_steps=iterations_per_epoch * 10, 
        t_mul=2, m_mul=1, alpha=5e-4
    ),
    beta_1=0.0, epsilon=1e-3
)
"""

'\nopt_nas = tf.keras.optimizers.Adam(\n    learning_rate=tf.keras.optimizers.schedules.CosineDecayRestarts(\n        0.05, first_decay_steps=iterations_per_epoch * 10, \n        t_mul=2, m_mul=1, alpha=5e-4\n    ),\n    beta_1=0.0, epsilon=1e-3\n)\n'

In [11]:
opt_controller = tf.keras.optimizers.Adam(
    learning_rate=0.0035, 
    beta_1=0.0, # beta_1=0.0, 0.1 
    epsilon=1e-3
)

In [12]:
micro_child = MicroChild(
    input_shape=(32, 32, 3),
    nas_controller=micro_lstm,
    opt_nas=opt_nas,
    opt_controller=opt_controller,
    num_cells=5, num_layers=6, out_filters=20, # 48
    keep_prob=0.9, drop_path_keep_prob=0.6,
    # l2_reg=1e-4, # TODO: Look at the comment inside this module how to fix it
    use_aux_heads=True, clip_mode=ClipGradsMode.NORM
)

MicroNasModel configured to be dynamic-builded. 
Use additional aux-head for training in layer_id=6


In [13]:
micro_child.fit_controller(
    train_dataset, test_dataset, 
    steps_per_epoch_for_model=iterations_per_epoch,
    num_epochs_for_model=1, num_epoch_for_controller=30, 
    epochs=50
)

  0%|          | 0/312 [00:00<?, ?it/s]

Epoch=1/50
Train nas-model...


  0%|          | 0/312 [00:21<?, ?it/s]


InvalidArgumentError: {{function_node __wrapped__Unique_device_/job:localhost/replica:0/task:0/device:GPU:0}} unique expects a 1D vector. [Op:Unique]