In [None]:

#NAS PARAMETERS
CONTROLLER_SAMPLING_EPOCHS = 10
SAMPLES_PER_CONTROLLER_EPOCH = 20
CONTROLLER_TRAINING_EPOCHS = 5
ARCHITECTURE_TRAINING_EPOCHS = 5
CONTROLLER_LOSS_ALPHA = 0.9

#CONTROLLER(LSTM) PARAMETERS
CONTROLLER_LSTM_DIM = 100
CONTROLLER_OPTIMIZER = 'Adam'
CONTROLLER_LEARNING_RATE = 0.01
CONTROLLER_DECAY = 0.1
CONTROLLER_MOMENTUM = 0.0
CONTROLLER_USE_PREDICTOR = False

#CNN ARCH PARAMETERS
MAX_ARCHITECTURE_LENGTH = 8
MLP_DECAY = 0.0
MLP_MOMENTUM = 0.0
MLP_LOSS_FUNCTION = 'categorical_crossentropy'
MLP_ONE_SHOT = True

#As using MNIST dataset for checking of CNN training and testing
TARGET_CLASSES = 10
TOP_N = 5

In [None]:

import os
import warnings
import pandas as pd
import tensorflow
import tensorflow.keras
from tensorflow.keras.optimizers import *
from keras.models import Sequential
from keras.layers import Flatten, Dense, Dropout, Conv2D, SeparableConv2D, DepthwiseConv2D, Conv2DTranspose
from keras.layers import MaxPooling2D, AveragePooling2D, GlobalMaxPooling2D, GlobalAveragePooling2D, BatchNormalization
#import CNNCONSTANTS
#from CNNCONSTANTS import *
class CNNSearchSpace(object):

    def __init__(self, target_classes):

        self.target_classes = target_classes
        self.vocab = self.vocab_dict()


    def vocab_dict(self):

        #---------------------------Hyperparameter pool selection----------------------------------#

        #For fully connected
        nodes = [8, 16, 32, 64, 128, 256, 512]
        act_funcs = ['sigmoid', 'tanh', 'relu', 'elu', 'selu', 'swish']

        #For Convolutional Layers
        conv_layers=['conv2d','separableconv2d','depthwiseconv2d','conv2dtranspose']
        conv_filter_size=[3,5,7,9]
        conv_filters=[16,32,64,96,128,160,192,224,256]
        conv_padding= ['same','valid']
        conv_stride=[2,3]
        conv_weight_initializers=['HeNormal','HeUniform','RandomNormal','RandomUniform']
        conv_bias_initializers=['HeNormal','HeUniform','RandomNormal','RandomUniform']
        conv_regularizers=['l1','l2','l1_l2']

        #For Pooling Layers
        pool_layers=['maxpool2d','avgpool2d','globalmaxpool2d','globalavgpool2d']
        pool_size=[2,3,4,5]
        pool_stride=[1,2,3,4,5]
        pool_padding=['same','valid']

        #RegularizationLayers
        #reg_layers=['dropout','spatialDropout','alphaDropout']
        reg_layers=['dropout']
        dropout_rate=[0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]

        #Learning rate (Not included in the Search Space, this we will use in the training part on our own, that's why semi-automatic)
        self.lr=[0.1,0.2,0.3,0.4,0.5,0.6]
        self.batch_size=[2,4,8,16,32]
        self.learning_optimimzer=['adam','rms','sgd']

        # initialize lists for keys and values of the vocabulary
        layer_params = []
        layer_id = []

        #---------------Starting creation of Voacb from which Controller will create a sequence-------------------------#

        ind=1

        for a in conv_layers:
          for b in conv_filters:
            for c in conv_filter_size:
              for d in conv_stride:
                for e in conv_padding:
                  for f in conv_weight_initializers:
                    for g in conv_bias_initializers:
                      for h in conv_regularizers:
                        if a is 'depthwiseconv2d':
                            layer_params.append((a,c,d,e,f,g,h))
                            layer_id.append(ind)
                            ind=ind+1
                        else:
                            layer_params.append((a,b,c,d,e,f,g,h))
                            layer_id.append(ind)
                            ind=ind+1

        self.conv_id=ind-1
        for a in pool_layers:
          for b in pool_size:
            for c in pool_stride:
              for d in pool_padding:
                  if a=="globalavgpool2d" or a=="globalmaxpool2d":
                      layer_params.append((a))
                      layer_id.append(ind)
                      ind+=1
                  else:
                      layer_params.append((a,b,c,d))
                      layer_id.append(ind)
                      ind=ind+1

        self.pool_id=ind-1
        for i in range(len(nodes)):
            for j in range(len(act_funcs)):
                layer_params.append((nodes[i], act_funcs[j]))
                layer_id.append(ind)
                ind=ind+1

        self.fully_id=ind-1
        for a in reg_layers:
          for b in dropout_rate:
            layer_params.append((a,b))
            layer_id.append(ind)
            ind=ind+1

        self.reg_layer_id=ind-1
        # zip the id and configurations into a dictionary
        vocab = dict(zip(layer_id, layer_params))

        # add Flatten and BatchNormalization in the volcabulary
        vocab[len(vocab)+1] = (('Flatten'))
        vocab[len(vocab) + 1] = (('BatchNormalization'))

        # add the final softmax/sigmoid layer in the vocabulary
        if self.target_classes == 2:
            vocab[len(vocab) + 1] = (self.target_classes - 1, 'sigmoid')
        else:
            vocab[len(vocab) + 1] = (self.target_classes, 'softmax')
        return vocab

#--------------------------------------------Search Space Created--------------------------------------------#

	# function to encode a sequence of configuration tuples
    def encode_sequence(self, sequence):
        keys = list(self.vocab.keys())
        values = list(self.vocab.values())
        encoded_sequence = []
        for value in sequence:
            encoded_sequence.append(keys[values.index(value)])
        return encoded_sequence


	# function to decode a sequence back to configuration tuples
    def decode_sequence(self, sequence):
        keys = list(self.vocab.keys())
        values = list(self.vocab.values())
        decoded_sequence = []
        for key in sequence:
            decoded_sequence.append(values[keys.index(key)])
        return decoded_sequence

class CNNGenerator(CNNSearchSpace):

    def __init__(self):

        self.target_classes = TARGET_CLASSES
        self.mlp_decay= MLP_DECAY
        self.mlp_momentum= MLP_MOMENTUM
        self.mlp_loss_func = MLP_LOSS_FUNCTION
        self.mlp_one_shot = MLP_ONE_SHOT
        self.metrics = ['accuracy']

        super().__init__(TARGET_CLASSES)

        if self.mlp_one_shot:

            # path to shared weights file
            self.weights_file = 'LOGS2/shared_weights.pkl'

            # open an empty dataframe with columns for bigrams IDs and weights
            self.shared_weights = pd.DataFrame({'bigram_id': [], 'weights': []})

            # pickle the dataframe
            if not os.path.exists(self.weights_file):
                print("Initializing shared weights dictionary...")
                self.shared_weights.to_pickle(self.weights_file)

    # function to create a keras model given a sequence and input data shape
    def create_model(self, sequence, cnn_input_shape):

            # decode sequence to get nodes and activations of each layer
            layer_configs = self.decode_sequence(sequence)
            try:
                # create a sequential model
                model = Sequential()

                for i, layer_conf in enumerate(layer_configs):
                    if i==0:
                        if layer_conf[0] is 'conv2d':
                            model.add(Conv2D(filters=layer_conf[1],kernel_size=layer_conf[2],strides=layer_conf[3],padding=layer_conf[4],kernel_initializer=layer_conf[5], bias_initializer=layer_conf[6], kernel_regularizer=layer_conf[7], input_shape=cnn_input_shape))
                            continue
                        elif layer_conf[0] is 'separableconv2d':
                            model.add(SeparableConv2D(filters=layer_conf[1],kernel_size=layer_conf[2],strides=layer_conf[3],padding=layer_conf[4],depthwise_initializer=layer_conf[5], bias_initializer=layer_conf[6], depthwise_regularizer=layer_conf[7], input_shape=cnn_input_shape))
                            continue
                        elif layer_conf[0] is 'depthwiseconv2d':
                            model.add(DepthwiseConv2D(kernel_size=layer_conf[1],strides=layer_conf[2],padding=layer_conf[3],depthwise_initializer=layer_conf[4], bias_initializer=layer_conf[5], depthwise_regularizer=layer_conf[6], input_shape=cnn_input_shape))
                            continue
                        else:
                            model.add(Conv2DTranspose(filters=layer_conf[1],kernel_size=layer_conf[2],strides=layer_conf[3],padding=layer_conf[4],kernel_initializer=layer_conf[5], bias_initializer=layer_conf[6], kernel_regularizer=layer_conf[7], input_shape=cnn_input_shape))
                            continue
                    elif layer_conf[0] is 'conv2d':
                        model.add(Conv2D(filters=layer_conf[1],kernel_size=layer_conf[2],strides=layer_conf[3],padding=layer_conf[4],kernel_initializer=layer_conf[5], bias_initializer=layer_conf[6], kernel_regularizer=layer_conf[7]))
                    elif layer_conf[0] is 'separableconv2d':
                        model.add(SeparableConv2D(filters=layer_conf[1],kernel_size=layer_conf[2],strides=layer_conf[3],padding=layer_conf[4],depthwise_initializer=layer_conf[5], bias_initializer=layer_conf[6], depthwise_regularizer=layer_conf[7]))
                    elif layer_conf[0] is 'depthwiseconv2d':
                        model.add(DepthwiseConv2D(kernel_size=layer_conf[1],strides=layer_conf[2],padding=layer_conf[3],depthwise_initializer=layer_conf[4], bias_initializer=layer_conf[5], depthwise_regularizer=layer_conf[6]))
                    elif layer_conf[0] is 'conv2dtranspose':
                        model.add(Conv2DTranspose(filters=layer_conf[1],kernel_size=layer_conf[2],strides=layer_conf[3],padding=layer_conf[4],kernel_initializer=layer_conf[5], bias_initializer=layer_conf[6], kernel_regularizer=layer_conf[7]))
                    elif layer_conf[0] is 'maxpool2d':
                        model.add(MaxPooling2D(pool_size=(layer_conf[1],layer_conf[1]),strides=(layer_conf[2],layer_conf[2]),padding=layer_conf[3]))
                    elif layer_conf[0] is 'avgpool2d':
                        model.add(AveragePooling2D(pool_size=(layer_conf[1],layer_conf[1]),strides=(layer_conf[2],layer_conf[2]),padding=layer_conf[3]))
                    elif layer_conf[0] is 'globalmaxpool2d':
                        model.add(GlobalMaxPooling2D())
                    elif layer_conf[0] is 'globalavgpool2d':
                        model.add(GlobalAveragePooling2D())
                    # add subsequent layers (Dense or Dropout)
                    elif layer_conf is 'dropout':
                        model.add(Dropout(layer_conf[1], name='dropout'))
                    elif layer_conf is 'Flatten':
                        model.add(Flatten())
                    elif layer_conf is 'BatchNormalization':
                        model.add(BatchNormalization())
                    else:
                        model.add(Dense(units=layer_conf[0], activation=layer_conf[1]))

            #print(model.summary())
            # return the keras model
                return model
            except ValueError:
                #print("-----------------------Received model that gets negative values for input image after processing------------------")
                return None

    # function to compile the model with the appropriate optimizer and loss function
    def compile_model(self, model):
        models=[]
        # Learning rate and Optimizer are changed and model is complied multiple times
            # compile model

        for i in self.lr:
            for j in self.learning_optimimzer:
                if j is 'sgd':
                    #optim = tensorflow.keras.optimizers.SGD(lr=i, decay=self.mlp_decay, momentum=self.mlp_momentum)
                    optim = tensorflow.keras.optimizers.SGD(learning_rate=i, momentum=self.mlp_momentum)
                elif j is 'adam':
                    #optim=tensorflow.keras.optimizers.Adam( lr=i, decay=self.mlp_decay)
                    optim = tensorflow.keras.optimizers.Adam(learning_rate=i)
                else:
                    #optim=tensorflow.keras.optimizers.RMSprop(lr=i, decay=self.mlp_decay)
                    optim = tensorflow.keras.optimizers.RMSprop(learning_rate=i)
                model.compile(loss=self.mlp_loss_func, optimizer=optim, metrics=self.metrics)
                models.append(model)
            #------------------Always check whether the loss function and metrics is in accordance with the target classes and the dataset-------------#
            # return a list of compiled keras model

        #optim=tensorflow.keras.optimizers.Adam(lr=0.2,decay=self.mlp_decay)
        #model.compile(loss=self.mlp_loss_func,optimizer=optim,metrics=self.metrics)
        return models


    def set_model_weights(self, model):
        #print(model)
        # get nodes and activations for each layer
        layer_configs=[]
        for layer in model.layers:
            #print(layer.name)
            #print(layer.get_config())
            # add flatten since it affects the size of the weights
            #index=layer.get_config()['name'].rfind("_")
            if 'flatten' in layer.name:
                layer_configs.append((layer.input_shape,'Flatten'))
            # don't add dropout since it doesn't affect weight sizes or activations
            elif not (('dropout' in layer.name) or ('max_pooling2d' in layer.name) or ('average_pooling2d' in layer.name) or ('global_max_pooling2d' in layer.name) or ('global_average_pooling2d' in layer.name)):
                #For Conv Layers
                #if layer.name is 'conv2d'
                #print(layer.name)
                if 'separable_conv2d' in layer.name:
                    layer_configs.append((layer.input_shape,'separable_conv2d',layer.get_config()['filters'],layer.get_config()['kernel_size']))
                #elif layer.name is 'separableconv2d':
                    #index=layer.get_config()['name'].rfind("_")
                    #if index == 9:
                        #layer_configs.append(layer.get_config()['name'],layer.get_config()['filters'],layer.get_config()['kernel_size'])
                    #else :
                     #   layer_configs.append(layer.get_config()['name'][:layer.get_config()['name'].rfind("_")],layer.get_config()['filters'],layer.get_config()['kernel_size'])
                elif 'depthwise_conv2d' in layer.name:
                    layer_configs.append((layer.input_shape,'depthwise_conv2d',layer.get_config()['kernel_size']))

                #elif layer.name is 'depthwiseconv2d':
                    #index=layer.get_config()['name'].rfind("_")
                    #if index == 9:
                     #   layer_configs.append(layer.get_configs()['name'],layer.get_configs()['kernel_size'])
                    #else:
                       # layer_configs.append(layer.get_configs()['name'][:layer.get_configs()['name'].rfind('_')],layer.get_configs()['kernel_size'])
                elif 'conv2d_transpose' in layer.name:
                    layer_configs.append((layer.input_shape,'conv2d_transpose',layer.get_config()['filters'],layer.get_config()['kernel_size']))

                #elif layer.name is 'conv2dtranspose':
                    #index=layer.get_config()['name'].rfind("_")
                    #if index == 6:
                     #   layer_configs.append(layer.get_config()['name'],layer.get_config()['filters'],layer.get_config()['kernel_size'])
                    #else :
                     #   layer_configs.append(layer.get_config()['name'][:layer.get_config()['name'].rfind("_")],layer.get_config()['filters'],layer.get_config()['kernel_size'])

                elif 'conv2d' in layer.name:
                    layer_configs.append((layer.input_shape,'conv2d',layer.get_config()['filters'],layer.get_config()['kernel_size']))
                    #index=layer.get_config()['name'].rfind("_")
                    #if index == -1:
                        #layer_configs.append(layer.get_config()['name'],layer.get_config()['filters'],layer.get_config()['kernel_size'])
                    #else :
                        #layer_configs.append(layer.get_config()['name'][:layer.get_config()['name'].rfind("_")],layer.get_config()['filters'],layer.get_config()['kernel_size'])

                #For BatchNormalization Layer
                elif layer.name is 'batch_normalization':
                    layer_configs.append((layer.input_shape,'batch_normalization',layer.get_config()['filters'],layer.get_config()['kernel_size']))
                    #layer_configs.append(layer.get_config()['name'][:layer.get_config()['name'].index("_")],layer.get_config()['filters'],layer.get_config()['kernel_size'])

                #For Dense Layers
                else :
                    layer_configs.append((layer.input_shape,layer.get_config()['units'], layer.get_config()['activation']))

        # get bigrams of relevant layers for weights transfer
        config_ids = []
        #Starting from 1 as we are using i-1 in the saving part
        for i in range(1, len(layer_configs)):
            config_ids.append((layer_configs[i - 1], layer_configs[i]))

        # for all layers
        j = 0
        #print('---------------------------Setting Weights-------------------')
        for i, layer in enumerate(model.layers):
            if j >= len(config_ids):
                break
            if not (('dropout' in layer.name) or ('max_pooling2d' in layer.name) or ('average_pooling2d' in layer.name) or ('global_max_pooling2d' in layer.name) or ('global_average_pooling2d' in layer.name)):
                warnings.simplefilter(action='ignore', category=FutureWarning)


                # get all bigram values we already have weights for
                bigram_ids = self.shared_weights['bigram_id'].values
                #print("Layer : {0}------ Bigram :{1}".format(i,bigram_ids))
                # check if a bigram already exists in the dataframe
                search_index = []
                for x in range(len(bigram_ids)):
                    #print("C0: ",config_ids[j][0][1:])
                    #print("B0: ",bigram_ids[x][0][1:])
                    #print("C1: ",config_ids[j][1])
                    #print("B1: ",bigram_ids[x][1])
                    #print("Config value of first:",config_ids[j][0][0:])
                    #print("Bigram value of first:",bigram_ids[x][0][0:])
                    if ((config_ids[j][0][1:] == bigram_ids[x][0][1:]) and (config_ids[j][1]==bigram_ids[x][1])):
                        search_index.append(x)

                # set layer weights if there is a bigram match in the dataframe
                if len(search_index) > 0:
                    #print("Transferring weights for layer:", config_ids[j])
                    layer.set_weights(self.shared_weights['weights'].values[search_index[0]])
                j += 1

    def update_weights(self, model):

        # get nodes and activations for each layer
        layer_configs = []
        for layer in model.layers:
            if 'flatten' in layer.name:
                layer_configs.append((layer.input_shape,'Flatten'))
            elif not (('dropout' in layer.name) or ('max_pooling2d' in layer.name) or ('average_pooling2d' in layer.name) or ('global_max_pooling2d' in layer.name) or ('global_average_pooling2d' in layer.name)):
                if 'separable_conv2d' in layer.name:
                    layer_configs.append((layer.input_shape,'separable_conv2d',layer.get_config()['filters'],layer.get_config()['kernel_size']))
                elif 'depthwise_conv2d' in layer.name:
                    layer_configs.append((layer.input_shape,'depthwise_conv2d',layer.get_config()['kernel_size']))
                elif 'conv2d_transpose' in layer.name:
                    layer_configs.append((layer.input_shape,'conv2d_transpose',layer.get_config()['filters'],layer.get_config()['kernel_size']))
                elif 'conv2d' in layer.name:
                    layer_configs.append((layer.input_shape,'conv2d',layer.get_config()['filters'],layer.get_config()['kernel_size']))
                elif layer.name is 'batch_normalization':
                    layer_configs.append((layer.input_shape,'batch_normalization',layer.get_config()['filters'],layer.get_config()['kernel_size']))
                else :
                    layer_configs.append((layer.input_shape,layer.get_config()['units'], layer.get_config()['activation']))

            '''
            # add flatten since it affects the size of the weights
            if 'Flatten' in layer.name:
                layer_configs.append(('Flatten'))
            # don't add dropout since it doesn't affect weight sizes or activations
            elif ('dropout' or 'maxpool2d' or 'avgpool2d' or 'globalmaxpool2d' or 'globalavgpool2d') not in layer.name:

                #For Conv Layers
                if layer.name is 'conv2d':
                    index=layer.get_config()['name'].rfind("_")
                    if index == -1:
                        layer_configs.append(layer.get_config()['name'],layer.get_config()['filters'],layer.get_config()['kernel_size'])
                    else :
                        layer_configs.append(layer.get_config()['name'][:layer.get_config()['name'].rfind("_")],layer.get_config()['filters'],layer.get_config()['kernel_size'])
                elif layer.name is 'separableconv2d':
                    index=layer.get_config()['name'].rfind("_")
                    if index == 9:
                        layer_configs.append(layer.get_config()['name'],layer.get_config()['filters'],layer.get_config()['kernel_size'])
                    else :
                        layer_configs.append(layer.get_config()['name'][:layer.get_config()['name'].rfind("_")],layer.get_config()['filters'],layer.get_config()['kernel_size'])
                elif layer.name is 'depthwiseconv2d':
                    index=layer.get_config()['name'].rfind("_")
                    if index == 9:
                        layer_configs.append(layer.get_configs()['name'],layer.get_configs()['kernel_size'])
                    else:
                        layer_configs.append(layer.get_configs()['name'][:layer.get_configs()['name'].rfind('_')],layer.get_configs()['kernel_size'])
                elif layer.name is 'conv2dtranspose':
                    index=layer.get_config()['name'].rfind("_")
                    if index == 6:
                        layer_configs.append(layer.get_config()['name'],layer.get_config()['filters'],layer.get_config()['kernel_size'])
                    else :
                        layer_configs.append(layer.get_config()['name'][:layer.get_config()['name'].rfind("_")],layer.get_config()['filters'],layer.get_config()['kernel_size'])

                #For BatchNormalization Layer
                elif layer.name is 'BatchNormalization':
                    layer_configs.append(layer.get_config()['name'][:layer.get_config()['name'].index("_")],layer.get_config()['filters'],layer.get_config()['kernel_size'])

                #For Dense Layers
                else :
                    layer_configs.append((layer.get_config()['units'], layer.get_config()['activation']))
        '''

        # get bigrams of relevant layers for weights transfer
        config_ids = []
        for i in range(1, len(layer_configs)):
            config_ids.append((layer_configs[i - 1], layer_configs[i]))

        # for all layers
        j = 0
        #print('-------------------Updating weights--------------------------')
        for i, layer in enumerate(model.layers):
            if j >= len(config_ids):
                break
            if not (('dropout' in layer.name) or ('max_pooling2d' in layer.name) or ('average_pooling2d' in layer.name) or ('global_max_pooling2d' in layer.name) or ('global_average_pooling2d' in layer.name)):
                warnings.simplefilter(action='ignore', category=FutureWarning)

                #get all bigram values we already have weights for
                bigram_ids = self.shared_weights['bigram_id'].values
                #print("Layer : {0}------ Bigram :{1}".format(i,bigram_ids))
                # check if a bigram already exists in the dataframe
                search_index = []
                for x in range(len(bigram_ids)):
                    if ((config_ids[j][0][1:] == bigram_ids[x][0][1:]) and (config_ids[j][1]==bigram_ids[x][1])):
                        search_index.append(x)

                # add weights to df in a new row if weights aren't already available
                if len(search_index) == 0:
                    self.shared_weights = self.shared_weights.append({'bigram_id': config_ids[j],
                                                                      'weights': layer.get_weights()},
                                                                     ignore_index=True)
                # else update weights
                else:
                    self.shared_weights.at[search_index[0], 'weights'] = layer.get_weights()
                j += 1
        self.shared_weights.to_pickle(self.weights_file)



    def train_model(self, models, x_data, y_data, nb_epochs, validation_split=0.1, callbacks=None):
        history_of_models=None
        val_acc=0
        for model in models:
            if self.mlp_one_shot:
                self.set_model_weights(model)
                for batch_size_value in self.batch_size:
                    history = model.fit(x_data,
                                y_data,
                                epochs=nb_epochs,
                                validation_split=validation_split,
                                callbacks=callbacks,
                                verbose=0)
                    if history.history['val_accuracy'][0] > val_acc or val_acc==0:
                        val_acc=history.history['val_accuracy'][0]
                        history_of_models=history
                        self.update_weights(model)
            else:
                for batch_size_value in self.batch_size:
                    history = model.fit(x_data,
                                y_data,
                                epochs=nb_epochs,
                                batch_sizze=batch_size_value,
                                validation_split=validation_split,
                                callbacks=callbacks,
                                verbose=0)
                    if history.history['val_accuracy'][0] > val_acc:
                        val_acc=history.history['val_accuracy'][0]
                        history_of_models=history
        return history_of_models

In [None]:
# -*- coding: utf-8 -*-
"""
Created on Mon Mar 21 10:42:31 2022

@author: AnshumaanChauhan
"""
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Conv2D, MaxPooling2D, Activation, Flatten, Input
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.activations import relu
from tensorflow.keras.optimizers import Adam
from collections import deque
import time
import random
import numpy as np

REPLAY_MEMORY_SIZE = 50000 # Can also write as 50_000 for readability
MODEL_NAME= "First_Try"
MIN_REPLAY_MEMORY_SIZE=1000
MINIBATCH_SIZE= 64
DISCOUNT= 0.99
UPDATE_TARGET_EVERY=5


class ModifiedTensorBoard(TensorBoard):

    #By default Keras wants to create a new TensorBoard file after every fit
    #But we want only a single log file, therefore, this class is created to solve this issue

    # Overriding init to set initial step and writer (we want one log file for all .fit() calls)
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.step = 1
        self.writer = tf.summary.FileWriter(self.log_dir)

    # Overriding this method to stop creating default log writer
    def set_model(self, model):
        pass

    # Overrided, saves logs with our step number
    # (otherwise every .fit() will start writing from 0th step)
    def on_epoch_end(self, epoch, logs=None):
        self.update_stats(**logs)

    # Overrided
    # We train for one batch only, no need to save anything at epoch end
    def on_batch_end(self, batch, logs=None):
        pass

    # Overrided, so won't close writer
    def on_train_end(self, _):
        pass

    # Custom method for saving own metrics
    # Creates writer, writes custom metrics and closes writer
    def update_stats(self, **stats):
        self._write_logs(stats, self.step)

class DQNAgent:

    def __init__(self):

        #Main model
        self.model=self.create_model()
        #Target model
        self.target_model=self.create_model()
        self.target_model.set_weights(self.model.get_weights())

        #Initially it will do just random exploration and eventually learn about the optimal value
        #Therefore, not advisable to update weights after each predict

        #Will fit main model that will fitted after every step (Trained every step)
        #Target model will be the one we will do predict every step

        #After some n number of epochs we set weights of Train model same as that of Main model
        #Stablises the model, and a lot of randomness is noticed in initial steps

        self.replay_memory= deque(maxlen=REPLAY_MEMORY_SIZE)
        #List with a fixed max length , will store last maxlen number of steps of Main model

        #Batch Learning generally makes a better and stabilised model (doesn't overfits)
        #Now we take a random samle out of these 50000 memory and then this batch is what we feed to Target Model

        self.tensorBoard= ModifiedTensorBoard(log_dir="logs/{}-{}".format(MODEL_NAME, int(time.time())))

        self.target_update_counter= 0 # Will use to track and tell when to update the Target Network



    def create_model():
        model= Sequential()
        model.add(Input(16,)) #We have to check about the input states (Observation states) #Number of max layers (in terms of layer id)
        model.add(Dense(32,activation='relu'))
        model.add(Dense(64, activation='relu'))
        model.add(Dense(128, activation='linear')) #Output  is number of Action state space - Number of possible actions, like vocab size

        model.compile(loss='mse', optimizer=Adam(lr=0.001), metrics= ['accuracy'])
        return model

    def update_replay_memory(self, transition):
        self.replay_memory.append(transition)
        #Transition is (Observation space , action,  reward,  new observatoin state)

        #Main model get the q values of all possible actions for the current state
    def get_qs(self, state, step):
        return self.model.predict(state)[0]
    #Return a one element array

    #Only train when certain number of samples have been stord in the replay table
    def train(self, terminal_state, step):
        if len(self.replay_memory)< MIN_REPLAY_MEMORY_SIZE:
            return

        minibatch=random.sample(self.replay_memory,MINIBATCH_SIZE)


        current_states= np.array([transition[0] for transition in minibatch])
        current_qs_list= self.model.predict(current_states)

        new_current_states = np.array([transition[3] for transition in minibatch])

        future_qs_list=self.target_model.predict(new_current_states)
        #Need Q values for future current states in order to apply the formula for updation of Q values

        X=[] #Images from gaem, what will be the input
        y=[] #Action we take, what are the action, Input might be the model and output might be the accuracy predicted

        #Done is whether we are done with the environment or not
        #rest 3 are what is present in minibatch
        #Used to caluclate the second half of the updation formula for Q-values
        for index, (current_state, action, reward, new_current_state, done) in enumerate(minibatch) :
            if not done:
                max_future_q= np.max(future_qs_list[index])
                new_q=reward+ DISCOUNT*max_future_q
            if done :
                new_q =reward

            current_qs=current_qs_list[index]
            current_qs[action]=new_q
            X.append(current_state)
            y.append(current_qs)

        self.model.fit(np.array(X),np.array(y), batch_size= MINIBATCH_SIZE, verbose=0, shuffle=False, callbacks=[self.tensorBoard] if terminal_state else None)
    #Output of the Neural Network is the Q-values, so in order to update the Q-value (max one) generated,
    #we save the output, make changes in it, and then fit the neural network that it generates our specified values
    #If on terminal state do fit else do nothing

        #Updating thecounter and checking whether we want to update the model or not
        if terminal_state:
            self.target_update_counter+=1

        if self.target_update_counter>UPDATE_TARGET_EVERY:
            self.target_model.set_weights(self.model.get_weights())
            self.target_update_counter=0

In [None]:
# -*- coding: utf-8 -*-
"""
Created on Tue Mar 22 09:58:50 2022

@author: AnshumaanChauhan
"""

import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Dropout, Conv2D, MaxPooling2D, Activation, Flatten, Input, LSTM
from keras.models import Model
from keras.callbacks import TensorBoard
from keras.activations import relu
from keras.optimizers import *
from collections import deque
import time
import random
import os
import tensorflow.keras.optimizers
import numpy as np
from keras.preprocessing.sequence import pad_sequences
# import CNNCONSTANTS
# from CNNCONSTANTS import *

# from CNNGenerator import CNNSearchSpace


from operator import itemgetter

#In order to make results comparable for different models
random.seed(1)


class DQNAgent (CNNSearchSpace):

    def __init__(self):

        REPLAY_MEMORY_SIZE = 50000 # Can also write as 50_000 for readability
        MODEL_NAME= "First_Try"
        self.MIN_REPLAY_MEMORY_SIZE=10
        #MINIBATCH_SIZE= 64 #Usually the size of mini batch is 32, 64, or multiple of 8
        #DISCOUNT= 0.99
        self.UPDATE_TARGET_EVERY=3


        #Constants for epsilon greedy algo
        self.EPSILON=1 #Will be decayed over the training process
        self.EPSILON_DECAY=0.01
        self.seq_data=[]
        self.replay_memory=[]
        #self.replay_memory= deque(maxlen=REPLAY_MEMORY_SIZE)
        #Will store x,y, val accuracy, pred_accuracy for all the things geenrated
        #List with a fixed max length , will store last maxlen number of steps of Main model

        #Batch Learning generally makes a better and stabilised model (doesn't overfits)
        #Now we take a random samle out of these 50000 memory and then this batch is what we feed to Target Model

        self.target_update_counter= 0 # Will use to track and tell when to update the Target Network

        self.max_len = MAX_ARCHITECTURE_LENGTH
        self.controller_lstm_dim = CONTROLLER_LSTM_DIM
        self.controller_optimizer = CONTROLLER_OPTIMIZER
        self.controller_lr = CONTROLLER_LEARNING_RATE
        self.controller_decay = CONTROLLER_DECAY
        self.controller_momentum = CONTROLLER_MOMENTUM
        self.use_predictor = CONTROLLER_USE_PREDICTOR

        # inheriting from the search space
        super().__init__(TARGET_CLASSES)

        # number of classes for the controller (+ 1 for padding)
        self.controller_classes = len(self.vocab) + 1

        # file path of controller weights to be stored at
        self.controller_weights = 'LOGS2/controller_weights.h5'

        #Main model
        #self.model=self.create_control_model()
        #Target model
        #self.target_model=self.create_control_model()
        #self.target_model.set_weights(self.model.get_weights())

        #Initially it will do just random exploration and eventually learn about the optimal value
        #Therefore, not advisable to update weights after each predict

        #Will fit main model that will fitted after every step (Trained every step)
        #Target model will be the one we will do predict every step

        #After some n number of epochs we set weights of Train model same as that of Main model
        #Stablises the model, and a lot of randomness is noticed in initial steps

    def sample_architecture_sequences(self, model, number_of_samples):
        # define values needed for sampling
        final_layer_id = len(self.vocab)
        BatchNorm_id = final_layer_id - 1
        Flatten_id=final_layer_id-2
        vocab_idx = [0] + list(self.vocab.keys())

        # initialize list for architecture samples
        samples = []
        print("GENERATING ARCHITECTURE SAMPLES...")
        print('------------------------------------------------------')

        # while number of architectures sampled is less than required
        while len(samples) < number_of_samples:

            # initialise the empty list for architecture sequence
            seed = []

            # while len of generated sequence is less than maximum architecture length
            while len(seed) < self.max_len:

                # pad sequence for correctly shaped input for controller
                sequence = pad_sequences([seed], maxlen=self.max_len - 1, padding='post')
                sequence = sequence.reshape(1, 1, self.max_len - 1)

                # given the previous elements, get softmax distribution for the next element
                if self.use_predictor:
                    (probab, _) = model.predict(sequence)
                else:
                    probab = model.predict(sequence)
                #print(probab[0])
                #print(len(probab[0]))
                #probab = probab[0][0]
                #print(probab)
                # sample the next element randomly given the probability of next elements (the softmax distribution)

                '''
                random_val=random.random()
                if self.EPSILON > 0:
                    if random_val<self.EPSILON:
                        next = np.random.choice(vocab_idx, size=1, p=probab[0])
                        next=next[0]
                        self.EPSILON=self.EPSILON-self.EPSILON_DECAY
                    else:
                        best_action= max(probab[0])
                        list_rep=probab[0].tolist()
                        next=vocab_idx[list_rep.index(best_action)]
                else:
                    best_action= max(probab[0])
                    list_rep=probab[0].tolist()
                    next=vocab_idx[list_rep.index(best_action)]
                '''

                next = np.random.choice(vocab_idx, size=1, p=probab[0])
                #Here we have to specify a range of values, to cover the point of dropout cannot be the first layer
                if (next >= self.conv_id) and len(seed) == 0:
                    continue
                #Have to make a rule such that first layer cannot be anything except the Convolutional Layer

                # first layer is not final layer
                if next == final_layer_id and len(seed) == 0:
                    continue

                # if final layer, break out of inner loop
                if next == final_layer_id:
                    seed.pop()
                    seed.append(Flatten_id)
                    seed.append(next)
                    break

                # if sequence length is 1 less than maximum, add final
                # layer and break out of inner loop
                if len(seed) == self.max_len - 2:
                    seed.append(Flatten_id)
                    seed.append(final_layer_id)
                    break

                # ignore padding
                if not next == 0:
                    check_insert=False
                    if next > self.pool_id and next <= self.fully_id :
                        for i in seed:
                            if i == Flatten_id :
                                seed.append(next)
                                check_insert=True
                                break

                    if next == Flatten_id :
                        check_dupli_flatten=False
                        for i in seed :
                            if i==next:
                                check_dupli_flatten=True
                        if not check_dupli_flatten:
                            seed.append(next)
                            check_insert=True

                    if next <= self.pool_id :
                        check_no_pool_conv_after_flatten=False
                        for i in seed:
                            if i == Flatten_id:
                                check_no_pool_conv_after_flatten=True
                        if not check_no_pool_conv_after_flatten :
                            seed.append(next)
                            check_insert=True

                    if next > self.fully_id and next<=self.reg_layer_id:
                        i=seed[-1]
                        if ((i>self.conv_id and i<=self.pool_id) or (i>self.pool_id and i<=self.fully_id)):
                            seed.append(next)
                            check_insert=True

                    if next== BatchNorm_id:
                        i=seed[-1]
                        if i>self.fully_id and i<=self.reg_layer_id:
                            seed.append(next)
                            check_insert=True

                    if not check_insert:
                        seed.append(next)
                else:
                    continue
            # check if the generated sequence has been generated before.
            # if not, add it to the sequence data.
            if seed not in self.seq_data:
                samples.append(seed)
                self.seq_data.append(seed)
        return samples

    def create_control_model(self, controller_input_shape, controller_batch_size):

        main_input=Input(shape=controller_input_shape, name='main_input')
        x= LSTM(self.controller_lstm_dim, return_sequences=True)(main_input)
        x2=Dropout(0.2)(x)
        x3=LSTM(self.controller_lstm_dim)(x2)
        x4=Dropout(0.2)(x3)
        main_output=Dense(self.controller_classes, activation='softmax', name='main_output')(x4)
        model=Model(inputs=[main_input],outputs=[main_output])
        return model


    def create_hybrid_model(self, controller_input_shape, controller_batch_size):

        main_input=Input(shape=controller_input_shape, name='main_input')
        x= LSTM(self.controller_lstm_dim, return_sequences=True)(main_input)
        x2=Dropout(0.2)(x)
        x3=LSTM(self.controller_lstm_dim)(x2)
        x4=Dropout(0.2)(x3)
        main_output=Dense(self.controller_classes, activation='softmax', name='main_output')(x4)

        # LSTM layer
        x5 = LSTM(self.controller_lstm_dim, return_sequences=True)(main_input)
        # single neuron sigmoid layer for accuracy prediction
        predictor_output = Dense(1, activation='sigmoid', name='predictor_output')(x2)

        # finally the Keras Model class is used to create a multi-output model
        model = Model(inputs=[main_input], outputs=[main_output, predictor_output])
        return model


    #Only train when certain number of samples have been stord in the replay table
    def train_control_model(self, model, target_model, x_data, y_data, val_accuracy, loss_func, controller_batch_size, nb_epochs):


        for i in range(len(x_data)):
            self.replay_memory.append([x_data[i][0],y_data[i],val_accuracy[i]])

        if len(self.replay_memory)<self.MIN_REPLAY_MEMORY_SIZE:
            return
        #Top 250 Architectures are taken
        self.replay_memory= sorted(self.replay_memory,key=itemgetter(2))
        to_train=self.replay_memory[:1]
        optim = getattr(tensorflow.keras.optimizers, self.controller_optimizer)(lr=self.controller_lr,
                                                       decay=self.controller_decay)

        # compile model depending on loss function and optimizer provided
        model.compile(optimizer=optim, loss={'main_output': loss_func})

        # load controller weights
        if os.path.exists(self.controller_weights):
            model.load_weights(self.controller_weights)

        x_data=[]
        y_data=[]
        for i in range(len(to_train)):
            x_data.append(to_train[i][0].reshape(1,7))
            y_data.append(to_train[i][1])

        # train the controller

        #We are trying to make it learn that if the previous layers are given in this order and the next predicted is final
        #Taking the ones with best accuracy helps it learn which is better
        print("TRAINING CONTROLLER...")
        model.fit({'main_input': np.array(x_data)},
                  {'main_output': np.array(y_data)},
                  epochs=nb_epochs,
                  batch_size=controller_batch_size,
                  verbose=0)
        #{'main_output': y_data.reshape(len(y_data), 1, self.controller_classes)}
        # save controller weights
        model.save_weights(self.controller_weights)

        #Updating the counter and checking whether we want to update the model or not
        self.target_update_counter+=1

        #If we are at the point where counter value id reached, then we just copy the weights from Main model to Target Model
        if self.target_update_counter>self.UPDATE_TARGET_EVERY:
            print("TRANSFERRING WEIGHTS...")
            target_model.set_weights(model.get_weights())
            #Reinitialize target update counter value to 0
            self.target_update_counter=0



    def train_hybrid_model(self, model, target_model, x_data, y_data, val_accuracy, pred_accuracy, loss_func, controller_batch_size, nb_epochs):

        for i in range(len(x_data)):
            self.replay_memory.append([x_data[i][0],y_data[i],val_accuracy[i],pred_accuracy[i]])

        if len(self.replay_memory)<self.MIN_REPLAY_MEMORY_SIZE:
            return
        #Top 250 Architectures are taken
        self.replay_memory= sorted(self.replay_memory,key=itemgetter(2))
        to_train=self.replay_memory[:1]
        optim = getattr(tensorflow.keras.optimizers, self.controller_optimizer)(lr=self.controller_lr, decay=self.controller_decay, clipnorm=1.0)


        model.compile(optimizer=optim,
                      loss={'main_output': loss_func, 'predictor_output': 'mse'},
                      loss_weights={'main_output': 1, 'predictor_output': 1})

        if os.path.exists(self.controller_weights):
            model.load_weights(self.controller_weights)

        x_data=[]
        y_data=[]
        pred_target=[]
        for i in range(len(to_train)):
            x_data.append(to_train[i][0].reshape(1,7))
            y_data.append(to_train[i][1])
            pred_target.append(to_train[i][3])
        print("TRAINING CONTROLLER...")
        model.fit({'main_input': np.array(x_data)},
                  {'main_output': np.array(y_data),
                   'predictor_output': np.array(pred_target)},
                  epochs=nb_epochs,
                  batch_size=controller_batch_size,
                  verbose=0)

        model.save_weights(self.controller_weights)

        self.target_update_counter+=1

        #If we are at the point where counter value id reached, then we just copy the weights from Main model to Target Model
        if self.target_update_counter>self.UPDATE_TARGET_EVERY:
            print("TRANSFERRING WEIGHTS...")
            self.target_model.set_weights(model.get_weights())
            #Reinitialize target update counter value to 0
            self.target_update_counter=0



    def get_predicted_accuracies_hybrid_model(self, model, seqs):
        pred_accuracies = []
        for seq in seqs:
            # pad each sequence
            control_sequences = pad_sequences([seq], maxlen=self.max_len, padding='post')
            xc = control_sequences[:, :-1].reshape(len(control_sequences), 1, self.max_len - 1)
            # get predicted accuracies
            (_, pred_accuracy) = [x[0][0] for x in model.predict(xc)]
            pred_accuracies.append(pred_accuracy[0])
        return pred_accuracies

In [None]:
# -*- coding: utf-8 -*-
"""
Created on Sat Apr 23 18:09:06 2022

@author: AnshumaanChauhan
"""

# -*- coding: utf-8 -*-
"""
Created on Tue Feb 22 09:58:43 2022

@author: AnshumaanChauhan
"""

import os
import shutil
import pickle
import numpy as np
from itertools import groupby
from matplotlib import pyplot as plt

# from CNNCONSTANTS import *
# from CNNGenerator import CNNSearchSpace


########################################################
#                   DATA PROCESSING                    #
########################################################


def unison_shuffled_copies(a, b):
    assert len(a) == len(b)
    p = np.random.permutation(len(a))
    return a[p], b[p]


########################################################
#                       LOGGING                        #
########################################################


def clean_log():
    filelist = os.listdir('LOGS2')
    for file in filelist:
        if os.path.isfile('LOGS2/{}'.format(file)):
            os.remove('LOGS2/{}'.format(file))


def log_event():
    dest = 'LOGS'
    while os.path.exists(dest):
        dest = 'LOGS2/event{}'.format(np.random.randint(10000))
    os.mkdir(dest)
    filelist = os.listdir('LOGS2')
    for file in filelist:
        if os.path.isfile('LOGS2/{}'.format(file)):
            shutil.move('LOGS2/{}'.format(file),dest)


def get_latest_event_id():
    all_subdirs = ['LOGS2/' + d for d in os.listdir('LOGS2') if os.path.isdir('LOGS2/' + d)]
    latest_subdir = max(all_subdirs, key=os.path.getmtime)
    return int(latest_subdir.replace('LOGS2/event', ''))


########################################################
#                 RESULTS PROCESSING                   #
########################################################


def load_nas_data():
    event = get_latest_event_id()
    data_file = 'LOGS2/event{}/nas_data.pkl'.format(event)
    with open(data_file, 'rb') as f:
        data = pickle.load(f)
    return data


def sort_search_data(nas_data):
    val_accs = [item[1] for item in nas_data]
    sorted_idx = np.argsort(val_accs)[::-1]
    nas_data = [nas_data[x] for x in sorted_idx]
    return nas_data

########################################################
#                EVALUATION AND PLOTS                  #
########################################################

def get_top_n_architectures(n):
    data = load_nas_data()
    data = sort_search_data(data)
    search_space = CNNSearchSpace(TARGET_CLASSES)
    print('Top {} Architectures:'.format(n))
    for seq_data in data[:n]:
        print('Architecture', search_space.decode_sequence(seq_data[0]))
        print('Validation Accuracy:', seq_data[1])


def get_nas_accuracy_plot():
    data = load_nas_data()
    accuracies = [x[1] for x in data]
    plt.plot(np.arange(len(data)), accuracies)
    plt.show()


def get_accuracy_distribution():
    event = get_latest_event_id()
    data = load_nas_data()
    accuracies = [x[1]*100. for x in data]
    accuracies = [int(x) for x in accuracies]
    sorted_accs = np.sort(accuracies)
    count_dict = {k: len(list(v)) for k, v in groupby(sorted_accs)}
    plt.bar(list(count_dict.keys()), list(count_dict.values()))
    plt.show()

In [None]:
# -*- coding: utf-8 -*-
"""
Created on Tue Mar 22 14:41:33 2022

@author: AnshumaanChauhan
"""

# import CNNCONSTANTS
import pickle
import keras.backend as K
from tensorflow.keras.utils import to_categorical
from keras.preprocessing.sequence import pad_sequences
import tensorflow as tf
# from NASutils import *
# from CNNCONSTANTS import *
# from DQNController import DQNAgent
# from CNNGenerator import CNNGenerator
class CNNNAS(DQNAgent):


    def __init__(self, x, y):

        self.x = x
        self.y = y
        self.target_classes = TARGET_CLASSES
        self.controller_sampling_epochs = CONTROLLER_SAMPLING_EPOCHS
        self.samples_per_controller_epoch = SAMPLES_PER_CONTROLLER_EPOCH
        self.controller_train_epochs = CONTROLLER_TRAINING_EPOCHS
        self.architecture_train_epochs = ARCHITECTURE_TRAINING_EPOCHS
        self.controller_loss_alpha = CONTROLLER_LOSS_ALPHA

        self.data = []
        self.nas_data_log = 'LOGS2/nas_data.pkl'
        clean_log()

        super().__init__()

        self.CNNGenerator = CNNGenerator()

        self.controller_batch_size = len(self.data)
        self.controller_input_shape = (1, MAX_ARCHITECTURE_LENGTH - 1)

        if self.use_predictor:
            self.controller_model = self.create_hybrid_model(self.controller_input_shape, self.controller_batch_size)
            self.target_model= self.create_hybrid_model(self.controller_input_shape, self.controller_batch_size)
        else:
            self.controller_model = self.create_control_model(self.controller_input_shape, self.controller_batch_size)
            self.target_model= self.create_control_model(self.controller_input_shape, self.controller_batch_size)

    def create_architecture(self, sequence):
        if self.target_classes == 2:
            self.CNNGenerator.loss_func = 'binary_crossentropy'
        model = self.CNNGenerator.create_model(sequence, np.shape(self.x[0]))
        #models = self.CNNGenerator.compile_model(model)
        if model==None:
            return model
        models = self.CNNGenerator.compile_model(model)
        return models

    def train_architecture(self, model):
        x, y = unison_shuffled_copies(self.x, self.y)
        #Check how to train models on different number of epochs
        history_of_models = self.CNNGenerator.train_model(model, x, y, self.architecture_train_epochs)
        return history_of_models

    def append_model_metrics(self, sequence, history, pred_accuracy=None):
        if len(history.history['val_accuracy']) == 1:
            if pred_accuracy:
                self.data.append([sequence,
                                  history.history['val_accuracy'][0],
                                  pred_accuracy])
                print('predicted accuracy: ',pred_accuracy)
            else:
                self.data.append([sequence,
                                  history.history['val_accuracy'][0]])
            print('validation accuracy: ', history.history['val_accuracy'][0])
        else:
            val_acc = np.ma.average(history.history['val_accuracy'],
                                    weights=np.arange(1, len(history.history['val_accuracy']) + 1),
                                    axis=-1)
            if pred_accuracy:
                self.data.append([sequence,
                                  val_acc,
                                  pred_accuracy])
            else:
                self.data.append([sequence,
                                  val_acc])
            print('validation accuracy: ', val_acc)

    def prepare_controller_data(self, sequences):
        #Adds 0 at the end if the sequence length is shorter than Max length architecture
        controller_sequences = pad_sequences(sequences, maxlen=self.max_len, padding='post')
        #Have all the layers except the final softmax layer
        xc = controller_sequences[:, :-1].reshape(len(controller_sequences), 1, self.max_len - 1)
        #Final layer
        yc = to_categorical(controller_sequences[:, -1], self.controller_classes)
        #Getting val accuracy of the sequences
        val_acc_target = [item[1] for item in self.data]
        return xc, yc, val_acc_target

    def get_discounted_reward(self, rewards):
        discounted_r = np.zeros_like(rewards, dtype=np.float32)
        for t in range(len(rewards)):
            running_add = 0.
            exp = 0.
            for r in rewards[t:]:
                running_add += self.controller_loss_alpha**exp * r
                exp += 1
            discounted_r[t] = running_add
        discounted_r = (discounted_r - discounted_r.mean()) / discounted_r.std()
        return discounted_r

    def custom_loss(self, target, output):
        baseline = 0.5
        reward = np.array([item[1] - baseline for item in self.data[-self.samples_per_controller_epoch:]]).reshape(
            self.samples_per_controller_epoch, 1)
        discounted_reward = self.get_discounted_reward(reward)
        loss = - K.log(output) * discounted_reward[:, None]
        return loss

    def train_controller(self, model, x, y, val_accuracy, pred_accuracy=None):
        if self.use_predictor:
            self.train_hybrid_model(model,
                                    self.target_model,
                                    x,
                                    y,
                                    val_accuracy,
                                    pred_accuracy,
                                    self.custom_loss,
                                    len(self.data),
                                    self.controller_train_epochs)
        else:
            self.train_control_model(model,
                                     self.target_model,
                                     x,
                                     y,
                                     val_accuracy,
                                     self.custom_loss,
                                     len(self.data),
                                     self.controller_train_epochs)

    def search(self):
        for controller_epoch in range(self.controller_sampling_epochs):
            print('------------------------------------------------------------------')
            print('                       CONTROLLER EPOCH: {}'.format(controller_epoch))
            print('------------------------------------------------------------------')
            sequences = self.sample_architecture_sequences(self.controller_model, self.samples_per_controller_epoch)
            if self.use_predictor:
                pred_accuracies = self.get_predicted_accuracies_hybrid_model(self.controller_model, sequences)
                #print("At start print acc: ",pred_accuracies)
            for i, sequence in enumerate(sequences):
                print('Architecture: ', self.decode_sequence(sequence))
                model = self.create_architecture(sequence)
                if model==None:
                    if self.use_predictor:
                        self.data.append([sequence, -10.0, pred_accuracies[i]])
                        print('validation accuracy: ', -10.0)
                    else:
                        self.data.append([sequence, -10.0])
                        print('validation accuracy: ', -10.0)
                    continue
                history = self.train_architecture(model)
                if self.use_predictor:
                    self.append_model_metrics(sequence, history, pred_accuracies[i])
                else:
                    self.append_model_metrics(sequence, history)
                print('------------------------------------------------------')
            xc, yc, val_acc_target = self.prepare_controller_data(sequences)
            if self.use_predictor:
                self.train_controller(self.controller_model,
                                  xc,
                                  yc,
                                  val_acc_target[-self.samples_per_controller_epoch:], pred_accuracies)
            else:
                self.train_controller(self.controller_model,
                                  xc,
                                  yc,
                                  val_acc_target[-self.samples_per_controller_epoch:])
        with open(self.nas_data_log, 'wb') as f:
            pickle.dump(self.data, f)
        log_event()
        return self.data

In [None]:
import os

def clean_log():
    log_directory = 'LOGS2'

    # Create the directory if it doesn't exist
    if not os.path.exists(log_directory):
        os.makedirs(log_directory)

    filelist = os.listdir(log_directory)
    for file in filelist:
        if os.path.isfile(os.path.join(log_directory, file)):
            os.remove(os.path.join(log_directory, file))

In [None]:
# -*- coding: utf-8 -*-
"""
Created on Tue Mar 22 14:40:18 2022

@author: AnshumaanChauhan
"""

# import CNNCONSTANTS
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
# from NASutils import *
# from cnnnas import CNNNAS
# from CNNCONSTANTS import TOP_N
from keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
#Use MNIST for the time being
#data = pd.read_csv('DATASETS/wine-quality.csv')
#x = data.drop('quality_label', axis=1, inplace=False).values
#y = pd.get_dummies(data['quality_label']).values

(train_X, train_y), (test_X, test_y) = mnist.load_data()
train_X = train_X.reshape((train_X.shape[0], 28, 28, 1))
train_y=to_categorical(train_y,num_classes=10)
train_new_X=train_X[:2]
train_new_y=train_y[:2]
nas_object = CNNNAS(train_new_X, train_new_y)
data = nas_object.search()
get_top_n_architectures(TOP_N)

Initializing shared weights dictionary...
------------------------------------------------------------------
                       CONTROLLER EPOCH: 0
------------------------------------------------------------------
GENERATING ARCHITECTURE SAMPLES...
------------------------------------------------------
Architecture:  [('conv2d', 128, 7, 3, 'same', 'RandomUniform', 'RandomUniform', 'l2'), ('conv2d', 160, 5, 2, 'same', 'HeUniform', 'RandomNormal', 'l1'), ('separableconv2d', 64, 7, 2, 'valid', 'HeNormal', 'RandomNormal', 'l1_l2'), ('separableconv2d', 224, 5, 3, 'same', 'HeUniform', 'RandomNormal', 'l2'), ('conv2d', 224, 5, 2, 'same', 'RandomNormal', 'HeNormal', 'l2'), ('conv2d', 256, 5, 3, 'same', 'RandomUniform', 'RandomNormal', 'l1'), 'Flatten', (10, 'softmax')]
validation accuracy:  -10.0
Architecture:  [('depthwiseconv2d', 3, 3, 'same', 'RandomUniform', 'RandomNormal', 'l1_l2'), ('separableconv2d', 128, 7, 2, 'valid', 'RandomNormal', 'RandomNormal', 'l1_l2'), ('conv2dtranspose', 

KeyboardInterrupt: ignored