In [1]:
from cerebro.backend import SparkBackend
from cerebro.keras import SparkEstimator

# datas storage for intermediate data and model artifacts.
from cerebro.storage import LocalStore, HDFSStore

# Model selection/AutoML methods.
from cerebro.tune import GridSearch, RandomSearch, TPESearch

# Utility functions for specifying the search space.
from cerebro.tune import hp_choice, hp_uniform, hp_quniform, hp_loguniform, hp_qloguniform

import tensorflow as tf
from pyspark.sql import SparkSession


spark = SparkSession \
    .builder \
    .appName("Cerebro Example") \
    .getOrCreate()

...

backend = SparkBackend(spark_context=spark.sparkContext, num_workers=1)
store = LocalStore(prefix_path='/Users/zijian/Desktop/ucsd/cse234/project/cerebro-system/experiments')


# Initialize input DataFrames.
# You can download sample dataset from https://apache.googlesource.com/spark/+/master/data/mllib/sample_libsvm_data.txt
df = spark.read.csv("/Users/zijian/Desktop/ucsd/cse234/project/cerebro-system/Iris_clean.csv", header=True, inferSchema=True)
train_df, test_df = df.randomSplit([0.8, 0.2])

# Define estimator generating function.
# Input: Dictionary containing parameter values
# Output: SparkEstimator
def estimator_gen_fn(params):
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(10, activation=tf.nn.relu, input_shape=(4,)),  # input shape required
        tf.keras.layers.Dense(10, activation=tf.nn.relu),
        tf.keras.layers.Dense(3)
    ])

    optimizer = tf.keras.optimizers.Adam(lr=params['lr'])
    loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

    estimator = SparkEstimator(
        model=model,
        optimizer=optimizer,
        loss=loss,
        metrics=['accuracy'],
        batch_size=params['batch_size'])

    return estimator

# Define dictionary containing the parameter search space.
search_space = {
    'lr': hp_choice([0.01, 0.001, 0.0001]),
    'batch_size': hp_quniform(16, 256, 16)
}

# Instantiate TPE (Tree of Parzan Estimators a.k.a., HyperOpt) model selection object.
model_selection = TPESearch(
    backend=backend, 
    store=store, 
    estimator_gen_fn=estimator_gen_fn, 
    search_space=search_space,
    num_models=30, 
    num_epochs=10, 
    validation=0.25, 
    evaluation_metric='loss',
    feature_columns=['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm'],
    label_columns=['Species']
)

# Perform model selection. Returns best model.
model = model_selection.fit(train_df)

# Inspect best model training history.
model_history = model.get_history()

# Perform inference using the best model and Spark DataFrame.
output_df = model.set_output_columns(['label_predicted']).transform(test_df)
output_df.select('label', 'label_predicted').show(n=10)

# Access all models.
all_models = model.get_all_models()
all_model_training_history = model.get_all_model_history()

# Convert the best model to Keras and perform inference using numpy data.
keras_model = model.keras()
pred = keras_model.predict([np.ones([1, 692], dtype=np.float32)])
# Save the keras checkpoint file.
keras_model.save(ckpt_path)

# Convert all the model to Keras.
all_models_keras = [m.keras() for m in all_models]


21/11/20 20:10:18 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
21/11/20 20:10:19 WARN Utils: Service 'SparkUI' could not bind on port 4040. Attempting port 4041.
21/11/20 20:10:19 WARN Utils: Service 'SparkUI' could not bind on port 4041. Attempting port 4042.


CEREBRO => Time: 2021-11-20 20:10:20, Running 1 Workers
CEREBRO => Time: 2021-11-20 20:10:23, Preparing Data
CEREBRO => Time: 2021-11-20 20:10:23, Num Partitions: 1
CEREBRO => Time: 2021-11-20 20:10:23, Writing DataFrames
CEREBRO => Time: 2021-11-20 20:10:23, Train Data Path: file:///Users/zijian/Desktop/ucsd/cse234/project/cerebro-system/experiments/intermediate_train_data
CEREBRO => Time: 2021-11-20 20:10:23, Val Data Path: file:///Users/zijian/Desktop/ucsd/cse234/project/cerebro-system/experiments/intermediate_val_data


                                                                                

CEREBRO => Time: 2021-11-20 20:10:24, Train Partitions: 1


                                                                                

CEREBRO => Time: 2021-11-20 20:10:27, Val Partitions: 1
CEREBRO => Time: 2021-11-20 20:10:28, Train Rows: 91
CEREBRO => Time: 2021-11-20 20:10:28, Val Rows: 35
CEREBRO => Time: 2021-11-20 20:10:28, Initializing Workers
CEREBRO => Time: 2021-11-20 20:10:28, Initializing Data Loaders
CEREBRO => Time: 2021-11-20 20:10:28, Launching Model Selection Workload
-------------------------

['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']
[[-1, 4]]


2021-11-20 20:10:28.417440: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2021-11-20 20:10:28.417661: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2021-11-20 20:10:28.578284: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2021-11-20 20:10:28.658589: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2021-11-20 20:10:28.664313: I tensorflow/com

CEREBRO => Time: 2021-11-20 20:10:30, Terminating Workers


                                                                                

ConnectionRefusedError: [Errno 61] Connection refused