In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from keras_tuner import HyperParameters

import autokeras as ak

from cerebro.nas.hphpmodel import HyperHyperModel

In [2]:
feature_columns=['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']

input_node = [ak.StructuredDataInput() for col in feature_columns]
output_node = ak.StructuredDataBlock()(input_node)
output_node = ak.ClassificationHead()(output_node)
am = HyperHyperModel(
    inputs=input_node, outputs=output_node, overwrite=True, max_trials=3
)

In [3]:
from pyspark.sql import SparkSession

# Build the SparkSession
spark = SparkSession.builder \
   .appName("Iris test") \
   .getOrCreate()

sc = spark.sparkContext

from cerebro.backend import SparkBackend
from cerebro.storage import LocalStore

backend = SparkBackend(spark_context=sc, num_workers=1)
store = LocalStore(prefix_path='/Users/zijian/Desktop/ucsd/cse234/project/cerebro-system/experiments')

am.resource_bind(
    backend=backend, 
    store=store,
    feature_columns=feature_columns,
    label_columns=['Species']
)

21/11/20 22:01:57 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).


CEREBRO => Time: 2021-11-20 22:01:59, Running 1 Workers


In [4]:
df = spark.read.csv("/Users/zijian/Desktop/ucsd/cse234/project/cerebro-system/Iris_clean.csv", header=True, inferSchema=True)

train_df, test_df = df.randomSplit([0.8, 0.2])
df.head(10)

[Row(SepalLengthCm=5.1, SepalWidthCm=3.5, PetalLengthCm=1.4, PetalWidthCm=0.2, Species=0),
 Row(SepalLengthCm=4.9, SepalWidthCm=3.0, PetalLengthCm=1.4, PetalWidthCm=0.2, Species=0),
 Row(SepalLengthCm=4.7, SepalWidthCm=3.2, PetalLengthCm=1.3, PetalWidthCm=0.2, Species=0),
 Row(SepalLengthCm=4.6, SepalWidthCm=3.1, PetalLengthCm=1.5, PetalWidthCm=0.2, Species=0),
 Row(SepalLengthCm=5.0, SepalWidthCm=3.6, PetalLengthCm=1.4, PetalWidthCm=0.2, Species=0),
 Row(SepalLengthCm=5.4, SepalWidthCm=3.9, PetalLengthCm=1.7, PetalWidthCm=0.4, Species=0),
 Row(SepalLengthCm=4.6, SepalWidthCm=3.4, PetalLengthCm=1.4, PetalWidthCm=0.3, Species=0),
 Row(SepalLengthCm=5.0, SepalWidthCm=3.4, PetalLengthCm=1.5, PetalWidthCm=0.2, Species=0),
 Row(SepalLengthCm=4.4, SepalWidthCm=2.9, PetalLengthCm=1.4, PetalWidthCm=0.2, Species=0),
 Row(SepalLengthCm=4.9, SepalWidthCm=3.1, PetalLengthCm=1.5, PetalWidthCm=0.1, Species=0)]

In [5]:
am.tuner_bind(tuner="randomsearch", hyperparameters=None)
am.fit(train_df, epochs=10)

INFO:tensorflow:Reloading Oracle from existing project ./test/oracle.json
CEREBRO => Time: 2021-11-20 22:02:02, Preparing Data
CEREBRO => Time: 2021-11-20 22:02:02, Num Partitions: 1
CEREBRO => Time: 2021-11-20 22:02:02, Writing DataFrames
CEREBRO => Time: 2021-11-20 22:02:02, Train Data Path: file:///Users/zijian/Desktop/ucsd/cse234/project/cerebro-system/experiments/intermediate_train_data
CEREBRO => Time: 2021-11-20 22:02:02, Val Data Path: file:///Users/zijian/Desktop/ucsd/cse234/project/cerebro-system/experiments/intermediate_val_data


                                                                                

CEREBRO => Time: 2021-11-20 22:02:03, Train Partitions: 1


                                                                                

CEREBRO => Time: 2021-11-20 22:02:06, Val Partitions: 1
CEREBRO => Time: 2021-11-20 22:02:07, Train Rows: 89
CEREBRO => Time: 2021-11-20 22:02:07, Val Rows: 31
CEREBRO => Time: 2021-11-20 22:02:07, Initializing Workers
CEREBRO => Time: 2021-11-20 22:02:07, Initializing Data Loaders


2021-11-20 22:02:07.558203: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2021-11-20 22:02:07.558467: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.



Search: Running Trial #1

Hyperparameter    |Value             |Best Value So Far 
learning_rate     |0.1               |?                 
batch_size        |64                |?                 
structured_data...|True              |?                 
structured_data...|True              |?                 
structured_data...|2                 |?                 
structured_data...|64                |?                 
structured_data...|0                 |?                 
structured_data...|512               |?                 
classification_...|0.5               |?                 
optimizer         |adam              |?                 



2021-11-20 22:02:08.180220: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
[Stage 9:>                                                          (0 + 1) / 1]

-------------------------

['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']
[[-1, 1], [-1, 1], [-1, 1], [-1, 1]]


2021-11-20 22:02:09.028642: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2021-11-20 22:02:09.509174: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2021-11-20 22:02:09.520539: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:196] None of the MLIR optimization passes are enabled (registered 0 passes)
Instructions for updating:
Use output_signature instead
Instructions for updating:
Use output_signature instead
2021-11-20 22:02:10.382623: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
Train on 3 steps
CEREBRO => Time: 2021-11-20 22:02:11, Model: model_0_1637474528, Mode: TRA

-------------------------

['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']
[[-1, 1], [-1, 1], [-1, 1], [-1, 1]]


                                                                                

Exception: can only concatenate str (not "list") to str
Traceback (most recent call last):
  File "/Users/zijian/Desktop/ucsd/cse234/project/cerebro-system/cerebro/backend/spark/service_task.py", line 196, in bg_execute
    local_task_index=self.local_task_index)
  File "/Users/zijian/Desktop/ucsd/cse234/project/cerebro-system/cerebro/backend/spark/backend.py", line 511, in train
    result = {k: v for k, v in zip(['val_loss'] + ['val_' + name for name in metrics_names], result)}
  File "/Users/zijian/Desktop/ucsd/cse234/project/cerebro-system/cerebro/backend/spark/backend.py", line 511, in <listcomp>
    result = {k: v for k, v in zip(['val_loss'] + ['val_' + name for name in metrics_names], result)}
TypeError: can only concatenate str (not "list") to str
