In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from keras_tuner import HyperParameters

import autokeras as ak

from cerebro.nas.hphpmodel import HyperHyperModel

In [2]:
input_node = [ak.StructuredDataInput() for i in range(4)]
output_node = ak.StructuredDataBlock(categorical_encoding=True)(input_node)
output_node = ak.ClassificationHead()(output_node)
am = HyperHyperModel(
    inputs=input_node, outputs=output_node, overwrite=True, max_trials=3
)

In [3]:
from pyspark.sql import SparkSession

# Build the SparkSession
spark = SparkSession.builder \
   .appName("Linear Regression Model") \
   .getOrCreate()

sc = spark.sparkContext

from cerebro.backend import SparkBackend
from cerebro.storage import LocalStore

backend = SparkBackend(spark_context=sc, num_workers=1)
store = LocalStore(prefix_path='/Users/zijian/Desktop/ucsd/cse234/project/cerebro-system/experiments')

am.resource_bind(
    backend=backend, 
    store=store,
    feature_columns=['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm'],
    label_columns=['Species']
)

21/11/20 21:58:37 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
21/11/20 21:58:37 WARN Utils: Service 'SparkUI' could not bind on port 4040. Attempting port 4041.


CEREBRO => Time: 2021-11-20 21:58:38, Running 1 Workers


In [4]:
df = spark.read.csv("/Users/zijian/Desktop/ucsd/cse234/project/cerebro-system/Iris_clean.csv", header=True, inferSchema=True)
df = df.toPandas()

train=df.sample(frac=0.8,random_state=200) #random state is a seed value
test=df.drop(train.index)

df.head(10)
# train_df, test_df = df.randomSplit([0.8, 0.2])

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
5,5.4,3.9,1.7,0.4,0
6,4.6,3.4,1.4,0.3,0
7,5.0,3.4,1.5,0.2,0
8,4.4,2.9,1.4,0.2,0
9,4.9,3.1,1.5,0.1,0


In [5]:
df.dtypes

SepalLengthCm    float64
SepalWidthCm     float64
PetalLengthCm    float64
PetalWidthCm     float64
Species            int32
dtype: object

In [6]:
train_np = np.array(train)
x_train = train_np[:,:-1]
y_train = train_np[:,-1,np.newaxis]
print(x_train.shape)
print(y_train.shape)

(120, 4)
(120, 1)


In [7]:
cuz_hps = HyperParameters()
cuz_hps.Choice('learning_rate', values=[0.1,0.01])
cuz_hps.Choice('batch_size', values=[32,64,128])

am.tuner_bind(tuner="randomsearch", hyperparameters=cuz_hps)
am.test_tuner_space(x=x_train, y=y_train)

am.tuner.search_space_summary()

INFO:tensorflow:Reloading Oracle from existing project ./test/oracle.json


2021-11-20 21:58:41.781456: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2021-11-20 21:58:41.781710: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Search space summary
Default search space size: 10
learning_rate (Choice)
{'default': 0.1, 'conditions': [], 'values': [0.1, 0.01], 'ordered': True}
batch_size (Choice)
{'default': 32, 'conditions': [], 'values': [32, 64, 128], 'ordered': True}
structured_data_block_1/normalize (Boolean)
{'default': False, 'conditions': []}
structured_data_block_1/dense_block_1/use_batchnorm (Boolean)
{'default': False, 'conditions': []}
structured_data_block_1/dense_block_1/num_layers (Choice)
{'default': 2, 'conditions': [], 'values': [1, 2, 3], 'ordered': True}
structured_data_block_1/dense_block_1/units_0 (Choice)
{'default': 32, 'conditions': [], 'values': [16, 32, 64, 128, 256, 512, 1024], 'ordered': True}
structured_data_block_1/dense_block_1/dropout (Choice)
{'default': 0.0, 'conditions': [], 'values': [0.0, 0.25, 0.5], 'ordered': True}
structured_data_block_1/dense_block_1/units_1 (Choice)
{'default': 32, 'conditions': [], 'values': [16, 32, 64, 128, 256, 512, 1024], 'ordered': True}
classific

In [8]:
tuner = am.tuner

trials = tuner.oracle.create_trials(2)
for trial in trials:
    print(trial.hyperparameters.values)

{'learning_rate': 0.1, 'batch_size': 128, 'structured_data_block_1/normalize': True, 'structured_data_block_1/dense_block_1/use_batchnorm': True, 'structured_data_block_1/dense_block_1/num_layers': 3, 'structured_data_block_1/dense_block_1/units_0': 16, 'structured_data_block_1/dense_block_1/dropout': 0.5, 'structured_data_block_1/dense_block_1/units_1': 1024, 'classification_head_1/dropout': 0.25, 'optimizer': 'adam'}
{'learning_rate': 0.01, 'batch_size': 32, 'structured_data_block_1/normalize': True, 'structured_data_block_1/dense_block_1/use_batchnorm': True, 'structured_data_block_1/dense_block_1/num_layers': 3, 'structured_data_block_1/dense_block_1/units_0': 32, 'structured_data_block_1/dense_block_1/dropout': 0.5, 'structured_data_block_1/dense_block_1/units_1': 128, 'classification_head_1/dropout': 0.5, 'optimizer': 'adam'}


In [9]:
kt_trial = tuner.oracle.create_trial(tuner.tuner_id)
kt_trial.hyperparameters.values

{'learning_rate': 0.01,
 'batch_size': 128,
 'structured_data_block_1/normalize': False,
 'structured_data_block_1/dense_block_1/use_batchnorm': True,
 'structured_data_block_1/dense_block_1/num_layers': 2,
 'structured_data_block_1/dense_block_1/units_0': 512,
 'structured_data_block_1/dense_block_1/dropout': 0.5,
 'structured_data_block_1/dense_block_1/units_1': 128,
 'classification_head_1/dropout': 0.0,
 'optimizer': 'adam_weight_decay'}

In [10]:
model = tuner.hypermodel.build(kt_trial.hyperparameters)

In [11]:
model.optimizer

<autokeras.keras_layers.AdamWeightDecay at 0x17f9b9810>

In [12]:
tuner.hypermodel.hypermodel._get_loss()

{'classification_head_1': <tensorflow.python.keras.losses.CategoricalCrossentropy at 0x17f754b10>}

In [13]:
tuner.hypermodel.hypermodel._get_metrics().values()

dict_values([['accuracy']])

In [14]:
tuner.hypermodel.hypermodel.batch_size

32

In [15]:
model.get_config()

{'name': 'model',
 'layers': [{'class_name': 'InputLayer',
   'config': {'batch_input_shape': (None, 1),
    'dtype': 'float64',
    'sparse': False,
    'ragged': False,
    'name': 'input_1'},
   'name': 'input_1',
   'inbound_nodes': []},
  {'class_name': 'Custom>MultiCategoryEncoding',
   'config': {'name': 'multi_category_encoding',
    'trainable': True,
    'dtype': 'float32',
    'encoding': ListWrapper(['none'])},
   'name': 'multi_category_encoding',
   'inbound_nodes': [[['input_1', 0, 0, {}]]]},
  {'class_name': 'Dense',
   'config': {'name': 'dense',
    'trainable': True,
    'dtype': 'float32',
    'units': 512,
    'activation': 'linear',
    'use_bias': True,
    'kernel_initializer': {'class_name': 'GlorotUniform',
     'config': {'seed': None}},
    'bias_initializer': {'class_name': 'Zeros', 'config': {}},
    'kernel_regularizer': None,
    'bias_regularizer': None,
    'activity_regularizer': None,
    'kernel_constraint': None,
    'bias_constraint': None},
   'n

In [16]:
tf.keras.utils.get_custom_objects()

{'Custom>CastToFloat32': autokeras.keras_layers.CastToFloat32,
 'Custom>ExpandLastDim': autokeras.keras_layers.ExpandLastDim,
 'Custom>MultiCategoryEncoding': autokeras.keras_layers.MultiCategoryEncoding,
 'Custom>BertTokenizer': autokeras.keras_layers.BertTokenizer,
 'Custom>BertEncoder': autokeras.keras_layers.BertEncoder,
 'Custom>AdamWeightDecay': autokeras.keras_layers.AdamWeightDecay,
 'Custom>WarmUp': autokeras.keras_layers.WarmUp,
 'Custom>gelu': <function autokeras.keras_layers.gelu(x)>,
 'Custom>OnDeviceEmbedding': autokeras.keras_layers.OnDeviceEmbedding,
 'Custom>PositionEmbedding': autokeras.keras_layers.PositionEmbedding,
 'Custom>SelfAttentionMask': autokeras.keras_layers.SelfAttentionMask,
 'Custom>Transformer': autokeras.keras_layers.Transformer,
 'Custom>MultiHeadAttention': autokeras.keras_layers.MultiHeadAttention,
 'Custom>DenseEinsum': autokeras.keras_layers.DenseEinsum,
 'Custom>MaskedSoftmax': autokeras.keras_layers.MaskedSoftmax}

In [19]:
xtmp = [x_train[:,i,np.newaxis] for i in range(x_train.shape[1])]
dataset, validation_data = am._convert_to_dataset(
            x=xtmp, y=y_train, validation_data=None, batch_size=32
        )
ests = tuner.trials2estimators(trials, dataset)

2021-11-20 22:00:19.559444: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)


In [20]:
ests[0].getCustomObjects()

{'Custom>CastToFloat32': autokeras.keras_layers.CastToFloat32,
 'Custom>ExpandLastDim': autokeras.keras_layers.ExpandLastDim,
 'Custom>MultiCategoryEncoding': autokeras.keras_layers.MultiCategoryEncoding,
 'Custom>BertTokenizer': autokeras.keras_layers.BertTokenizer,
 'Custom>BertEncoder': autokeras.keras_layers.BertEncoder,
 'Custom>AdamWeightDecay': autokeras.keras_layers.AdamWeightDecay,
 'Custom>WarmUp': autokeras.keras_layers.WarmUp,
 'Custom>gelu': <function autokeras.keras_layers.gelu(x)>,
 'Custom>OnDeviceEmbedding': autokeras.keras_layers.OnDeviceEmbedding,
 'Custom>PositionEmbedding': autokeras.keras_layers.PositionEmbedding,
 'Custom>SelfAttentionMask': autokeras.keras_layers.SelfAttentionMask,
 'Custom>Transformer': autokeras.keras_layers.Transformer,
 'Custom>MultiHeadAttention': autokeras.keras_layers.MultiHeadAttention,
 'Custom>DenseEinsum': autokeras.keras_layers.DenseEinsum,
 'Custom>MaskedSoftmax': autokeras.keras_layers.MaskedSoftmax}

In [21]:
ests[0].get_model_shapes()

([[-1, 1], [-1, 1], [-1, 1], [-1, 1]], [[-1, 1]])

In [22]:
model = ests[0].getModel()
model

<tensorflow.python.keras.engine.functional.Functional at 0x17f926090>

In [23]:
model.inputs

[<KerasTensor: shape=(None, 1) dtype=float64 (created by layer 'input_1')>,
 <KerasTensor: shape=(None, 1) dtype=float64 (created by layer 'input_2')>,
 <KerasTensor: shape=(None, 1) dtype=float64 (created by layer 'input_3')>,
 <KerasTensor: shape=(None, 1) dtype=float64 (created by layer 'input_4')>]

In [24]:
model.outputs

[<KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'classification_head_1')>]