In [None]:
%%bash
pip3 install google-cloud-aiplatform cloudml-hypertune kfp numpy tensorflow

## Run a standalone model

In [None]:
%%writefile model.py
import argparse
import logging
import os, time
import hypertune
import numpy as np
import tensorflow as tf

BUCKET = None
TF_VERSION = '2-' + tf.__version__[2:3]  # needed to choose container

DEVELOP_MODE = True
NUM_EXAMPLES = 5000 * 1000  # doesn't need to be precise but get order of magnitude right.

NUM_BUCKETS = 5
NUM_EMBEDS = 3
TRAIN_BATCH_SIZE = 64
DNN_HIDDEN_UNITS = '64,32'

CSV_COLUMNS = (
    'ontime,dep_delay,taxi_out,distance,origin,dest,dep_hour,is_weekday,carrier,' +
    'dep_airport_lat,dep_airport_lon,arr_airport_lat,arr_airport_lon,data_split'
).split(',')

CSV_COLUMN_TYPES = [
    1.0, -3.0, 5.0, 1037.493622678299, 'OTH', 'DEN', 21, 1.0, 'OO',
    43.41694444, -124.24694444, 39.86166667, -104.67305556, 'TRAIN'
]


def features_and_labels(features):
    label = features.pop('ontime')  # this is what we will train for
    return features, label


def read_dataset(pattern, batch_size, mode=tf.estimator.ModeKeys.TRAIN, truncate=None):
    dataset = tf.data.experimental.make_csv_dataset(
        pattern, batch_size,
        column_names=CSV_COLUMNS,
        column_defaults=CSV_COLUMN_TYPES,
        sloppy=True,
        num_parallel_reads=2,
        ignore_errors=True,
        num_epochs=1)
    dataset = dataset.map(features_and_labels)
    if mode == tf.estimator.ModeKeys.TRAIN:
        dataset = dataset.shuffle(batch_size * 10)
        dataset = dataset.repeat()
    dataset = dataset.prefetch(1)
    if truncate is not None:
        dataset = dataset.take(truncate)
    return dataset


def create_model():
    real = {
        colname: tf.feature_column.numeric_column(colname)
        for colname in
        (
                'dep_delay,taxi_out,distance,dep_hour,is_weekday,' +
                'dep_airport_lat,dep_airport_lon,' +
                'arr_airport_lat,arr_airport_lon'
        ).split(',')
    }
    sparse = {
        'carrier': tf.feature_column.categorical_column_with_vocabulary_list('carrier',
                                                                             vocabulary_list='AS,VX,F9,UA,US,WN,HA,EV,MQ,DL,OO,B6,NK,AA'.split(
                                                                                 ',')),
        'origin': tf.feature_column.categorical_column_with_hash_bucket('origin', hash_bucket_size=1000),
        'dest': tf.feature_column.categorical_column_with_hash_bucket('dest', hash_bucket_size=1000),
    }

    inputs = {
        colname: tf.keras.layers.Input(name=colname, shape=(), dtype='float32')
        for colname in real.keys()
    }
    inputs.update({
        colname: tf.keras.layers.Input(name=colname, shape=(), dtype='string')
        for colname in sparse.keys()
    })

    latbuckets = np.linspace(20.0, 50.0, NUM_BUCKETS).tolist()  # USA
    lonbuckets = np.linspace(-120.0, -70.0, NUM_BUCKETS).tolist()  # USA
    disc = {}
    disc.update({
        'd_{}'.format(key): tf.feature_column.bucketized_column(real[key], latbuckets)
        for key in ['dep_airport_lat', 'arr_airport_lat']
    })
    disc.update({
        'd_{}'.format(key): tf.feature_column.bucketized_column(real[key], lonbuckets)
        for key in ['dep_airport_lon', 'arr_airport_lon']
    })

    # cross columns that make sense in combination
    sparse['dep_loc'] = tf.feature_column.crossed_column(
        [disc['d_dep_airport_lat'], disc['d_dep_airport_lon']], NUM_BUCKETS * NUM_BUCKETS)
    sparse['arr_loc'] = tf.feature_column.crossed_column(
        [disc['d_arr_airport_lat'], disc['d_arr_airport_lon']], NUM_BUCKETS * NUM_BUCKETS)
    sparse['dep_arr'] = tf.feature_column.crossed_column([sparse['dep_loc'], sparse['arr_loc']], NUM_BUCKETS ** 4)

    # embed all the sparse columns
    embed = {
        'embed_{}'.format(colname): tf.feature_column.embedding_column(col, NUM_EMBEDS)
        for colname, col in sparse.items()
    }
    real.update(embed)

    # one-hot encode the sparse columns
    sparse = {
        colname: tf.feature_column.indicator_column(col)
        for colname, col in sparse.items()
    }

    model = wide_and_deep_classifier(
        inputs,
        linear_feature_columns=sparse.values(),
        dnn_feature_columns=real.values(),
        dnn_hidden_units=DNN_HIDDEN_UNITS)

    return model


def wide_and_deep_classifier(inputs, linear_feature_columns, dnn_feature_columns, dnn_hidden_units):
    deep = tf.keras.layers.DenseFeatures(dnn_feature_columns, name='deep_inputs')(inputs)
    layers = [int(x) for x in dnn_hidden_units.split(',')]
    for layerno, numnodes in enumerate(layers):
        deep = tf.keras.layers.Dense(numnodes, activation='relu', name='dnn_{}'.format(layerno + 1))(deep)
    wide = tf.keras.layers.DenseFeatures(linear_feature_columns, name='wide_inputs')(inputs)
    both = tf.keras.layers.concatenate([deep, wide], name='both')
    output = tf.keras.layers.Dense(1, activation='sigmoid', name='pred')(both)
    model = tf.keras.Model(inputs, output)
    model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['accuracy', rmse, tf.keras.metrics.AUC()])
    return model


def rmse(y_true, y_pred):
    return tf.sqrt(tf.reduce_mean(tf.square(y_pred - y_true)))


def train_and_evaluate(train_data_pattern, eval_data_pattern, test_data_pattern, export_dir, output_dir):
    train_batch_size = TRAIN_BATCH_SIZE
    if DEVELOP_MODE:
        eval_batch_size = 100
        steps_per_epoch = 3
        epochs = 2
        num_eval_examples = eval_batch_size * 10
    else:
        eval_batch_size = 100
        steps_per_epoch = NUM_EXAMPLES // train_batch_size
        epochs = NUM_EPOCHS
        num_eval_examples = eval_batch_size * 100

    train_dataset = read_dataset(train_data_pattern, train_batch_size)
    eval_dataset = read_dataset(eval_data_pattern, eval_batch_size, tf.estimator.ModeKeys.EVAL, num_eval_examples)

    # checkpoint
    checkpoint_path = '{}/checkpoints/flights.cpt'.format(output_dir)
    logging.info("Checkpointing to {}".format(checkpoint_path))
    cp_callback = tf.keras.callbacks.ModelCheckpoint(checkpoint_path,
                                                     save_weights_only=True,
                                                     verbose=1)

    # call back to write out hyperparameter tuning metric
    METRIC = 'val_rmse'
    hpt = hypertune.HyperTune()

    class HpCallback(tf.keras.callbacks.Callback):
        def on_epoch_end(self, epoch, logs=None):
            if logs and METRIC in logs:
                logging.info("Epoch {}: {} = {}".format(epoch, METRIC, logs[METRIC]))
                hpt.report_hyperparameter_tuning_metric(hyperparameter_metric_tag=METRIC,
                                                        metric_value=logs[METRIC],
                                                        global_step=epoch)

    # train the model
    model = create_model()
    logging.info(f"Training on {train_data_pattern}; eval on {eval_data_pattern}; {epochs} epochs; {steps_per_epoch}")
    history = model.fit(train_dataset,
                        validation_data=eval_dataset,
                        epochs=epochs,
                        steps_per_epoch=steps_per_epoch,
                        callbacks=[cp_callback, HpCallback()])

    # export
    logging.info('Exporting to {}'.format(export_dir))
    tf.saved_model.save(model, export_dir)

    # write out final metric
    final_rmse = history.history[METRIC][-1]
    logging.info("Validation metric {} on {} samples = {}".format(METRIC, num_eval_examples, final_rmse))

    if (not DEVELOP_MODE) and (test_data_pattern is not None) and (not SKIP_FULL_EVAL):
        logging.info("Evaluating over full test dataset")
        test_dataset = read_dataset(test_data_pattern, eval_batch_size, tf.estimator.ModeKeys.EVAL, None)
        final_metrics = model.evaluate(test_dataset)
        logging.info("Final metrics on full test dataset = {}".format(final_metrics))
    else:
        logging.info("Skipping evaluation on full test dataset")


if __name__ == '__main__':
    logging.info("Tensorflow version " + tf.__version__)
    parser = argparse.ArgumentParser()

    parser.add_argument(
        '--bucket',
        help='Data will be read from gs://BUCKET/ch9/data and output will be in gs://BUCKET/ch9/trained_model',
        required=True
    )

    parser.add_argument(
        '--num_examples',
        help='Number of examples per epoch. Get order of magnitude correct.',
        type=int,
        default=5000000
    )

    # for hyper-parameter tuning
    parser.add_argument(
        '--train_batch_size',
        help='Number of examples to compute gradient on',
        type=int,
        default=256  # originally 64
    )
    parser.add_argument(
        '--nbuckets',
        help='Number of bins into which to discretize lats and lons',
        type=int,
        default=10  # originally 5
    )
    parser.add_argument(
        '--nembeds',
        help='Embedding dimension for categorical variables',
        type=int,
        default=3
    )
    parser.add_argument(
        '--num_epochs',
        help='Number of epochs (used only if --develop is not set)',
        type=int,
        default=10
    )
    parser.add_argument(
        '--dnn_hidden_units',
        help='Architecture of DNN part of wide-and-deep network',
        default='64,64,64,8'  # originally '64,32'
    )
    parser.add_argument(
        '--develop',
        help='Train on a small subset in development',
        dest='develop',
        action='store_true')
    parser.set_defaults(develop=False)
    parser.add_argument(
        '--skip_full_eval',
        help='Just train. Do not evaluate on test dataset.',
        dest='skip_full_eval',
        action='store_true')
    parser.set_defaults(skip_full_eval=False)

    # parse args
    args = parser.parse_args().__dict__
    logging.getLogger().setLevel(logging.INFO)

    # The Vertex AI contract. If not running in Vertex AI Training, these will be None
    OUTPUT_MODEL_DIR = os.getenv("AIP_MODEL_DIR")  # or None
    TRAIN_DATA_PATTERN = os.getenv("AIP_TRAINING_DATA_URI")
    EVAL_DATA_PATTERN = os.getenv("AIP_VALIDATION_DATA_URI")
    TEST_DATA_PATTERN = os.getenv("AIP_TEST_DATA_URI")

    # set top-level output directory for checkpoints, etc.
    BUCKET = args['bucket']
    OUTPUT_DIR = 'gs://{}/ch9/train_output'.format(BUCKET)
    # During hyperparameter tuning, we need to make sure different trials don't clobber each other
    # https://cloud.google.com/ai-platform/training/docs/distributed-training-details#tf-config-format
    # This doesn't exist in Vertex AI
    # OUTPUT_DIR = os.path.join(
    #     OUTPUT_DIR,
    #     json.loads(
    #         os.environ.get('TF_CONFIG', '{}')
    #     ).get('task', {}).get('trial', '')
    # )
    if OUTPUT_MODEL_DIR:
        # convert gs://ai-analytics-solutions-dsongcp2/aiplatform-custom-job-2021-11-13-22:22:46.175/1/model/
        # to gs://ai-analytics-solutions-dsongcp2/aiplatform-custom-job-2021-11-13-22:22:46.175/1
        OUTPUT_DIR = os.path.join(
            os.path.dirname(OUTPUT_MODEL_DIR if OUTPUT_MODEL_DIR[-1] != '/' else OUTPUT_MODEL_DIR[:-1]),
            'train_output')
    logging.info('Writing checkpoints and other outputs to {}'.format(OUTPUT_DIR))

    # Set default values for the contract variables in case we are not running in Vertex AI Training
    if not OUTPUT_MODEL_DIR:
        OUTPUT_MODEL_DIR = os.path.join(OUTPUT_DIR,
                                        'export/flights_{}'.format(time.strftime("%Y%m%d-%H%M%S")))
    if not TRAIN_DATA_PATTERN:
        TRAIN_DATA_PATTERN = 'gs://{}/ch9/data/train*'.format(BUCKET)
        CSV_COLUMNS.pop()  # the data_split column won't exist
        CSV_COLUMN_TYPES.pop()  # the data_split column won't exist
    if not EVAL_DATA_PATTERN:
        EVAL_DATA_PATTERN = 'gs://{}/ch9/data/eval*'.format(BUCKET)
    logging.info('Exporting trained model to {}'.format(OUTPUT_MODEL_DIR))
    logging.info("Reading training data from {}".format(TRAIN_DATA_PATTERN))
    logging.info('Writing trained model to {}'.format(OUTPUT_MODEL_DIR))

    # other global parameters
    NUM_BUCKETS = args['nbuckets']
    NUM_EMBEDS = args['nembeds']
    NUM_EXAMPLES = args['num_examples']
    NUM_EPOCHS = args['num_epochs']
    TRAIN_BATCH_SIZE = args['train_batch_size']
    DNN_HIDDEN_UNITS = args['dnn_hidden_units']
    DEVELOP_MODE = args['develop']
    SKIP_FULL_EVAL = args['skip_full_eval']

    # run
    train_and_evaluate(TRAIN_DATA_PATTERN, EVAL_DATA_PATTERN, TEST_DATA_PATTERN, OUTPUT_MODEL_DIR, OUTPUT_DIR)

    logging.info("Done")

In [None]:
%%bash
export PROJECT_ID=$(gcloud info --format='value(config.project)')
export BUCKET_NAME=$PROJECT_ID-dsongcp
python3 model.py --bucket $BUCKET_NAME --develop

## Develop and delopy model using Vertex AI

- Load up a managed dataset in Vertex AI
- Set up training infrastructure to run model.py
- Train the model by invoking functions in model.py on the managed dataset
- Find the endpoint to which to deploy the model
- Deploy the model to the endpoint

In [None]:
%%writefile train_on_vertexai.py
import argparse
import logging
from datetime import datetime
import tensorflow as tf

from google.cloud import aiplatform
from google.cloud.aiplatform import gapic as aip
from google.cloud.aiplatform import hyperparameter_tuning as hpt
from kfp.v2 import compiler, dsl

ENDPOINT_NAME = 'flights'


def train_custom_model(data_set, timestamp, develop_mode, cpu_only_mode, tf_version, extra_args=None):
    # Set up training and deployment infra
    
    if cpu_only_mode:
        train_image='us-docker.pkg.dev/vertex-ai/training/tf-cpu.{}:latest'.format(tf_version)
        deploy_image='us-docker.pkg.dev/vertex-ai/prediction/tf2-cpu.{}:latest'.format(tf_version)
    else:
        train_image = "us-docker.pkg.dev/vertex-ai/training/tf-gpu.{}:latest".format(tf_version)
        deploy_image = "us-docker.pkg.dev/vertex-ai/prediction/tf2-cpu.{}:latest".format(tf_version)

    # train
    model_display_name = '{}-{}'.format(ENDPOINT_NAME, timestamp)
    job = aiplatform.CustomTrainingJob(
        display_name='train-{}'.format(model_display_name),
        script_path="model.py",
        container_uri=train_image,
        requirements=['cloudml-hypertune'],  # any extra Python packages
        model_serving_container_image_uri=deploy_image
    )
    model_args = [
        '--bucket', BUCKET,
    ]
    if develop_mode:
        model_args += ['--develop']
    if extra_args:
        model_args += extra_args
    
    if cpu_only_mode:
        model = job.run(
            dataset=data_set,
            # See https://googleapis.dev/python/aiplatform/latest/aiplatform.html#
            predefined_split_column_name='data_split',
            model_display_name=model_display_name,
            args=model_args,
            replica_count=1,
            machine_type='n1-standard-4',
            sync=develop_mode
        )
    else:
        model = job.run(
            dataset=data_set,
            # See https://googleapis.dev/python/aiplatform/latest/aiplatform.html#
            predefined_split_column_name='data_split',
            model_display_name=model_display_name,
            args=model_args,
            replica_count=1,
            machine_type='n1-standard-4',
            # See https://cloud.google.com/vertex-ai/docs/general/locations#accelerators
            accelerator_type=aip.AcceleratorType.NVIDIA_TESLA_T4.name,
            accelerator_count=1,
            sync=develop_mode
        )
    return model


def train_automl_model(data_set, timestamp, develop_mode):
    # train
    model_display_name = '{}-{}'.format(ENDPOINT_NAME, timestamp)
    job = aiplatform.AutoMLTabularTrainingJob(
        display_name='train-{}'.format(model_display_name),
        optimization_prediction_type='classification'
    )
    model = job.run(
        dataset=data_set,
        # See https://googleapis.dev/python/aiplatform/latest/aiplatform.html#
        predefined_split_column_name='data_split',
        target_column='ontime',
        model_display_name=model_display_name,
        budget_milli_node_hours=(300 if develop_mode else 2000),
        disable_early_stopping=False,
        export_evaluated_data_items=True,
        export_evaluated_data_items_bigquery_destination_uri='{}:dsongcp.ch9_automl_evaluated'.format(PROJECT),
        export_evaluated_data_items_override_destination=True,
        sync=develop_mode
    )
    return model


def do_hyperparameter_tuning(data_set, timestamp, develop_mode, cpu_only_mode, tf_version):
    # Vertex AI services require regional API endpoints.
    if cpu_only_mode:
        train_image='us-docker.pkg.dev/vertex-ai/training/tf-cpu.{}:latest'.format(tf_version)
    else: 
        train_image = "us-docker.pkg.dev/vertex-ai/training/tf-gpu.{}:latest".format(tf_version)

    # a single trial job
    model_display_name = '{}-{}'.format(ENDPOINT_NAME, timestamp)
    if cpu_only_mode:
        trial_job = aiplatform.CustomJob.from_local_script(
            display_name='train-{}'.format(model_display_name),
            script_path="model.py",
            container_uri=train_image,
            args=[
                '--bucket', BUCKET,
                '--skip_full_eval',  # no need to evaluate on test data set
                '--num_epochs', '10',
                '--num_examples', '500000'  # 1/10 actual size to finish faster
            ],
            requirements=['cloudml-hypertune'],  # any extra Python packages
            replica_count=1,
            machine_type='n1-standard-4'
        )
    else:
        trial_job = aiplatform.CustomJob.from_local_script(
            display_name='train-{}'.format(model_display_name),
            script_path="model.py",
            container_uri=train_image,
            args=[
                '--bucket', BUCKET,
                '--skip_full_eval',  # no need to evaluate on test data set
                '--num_epochs', '10',
                '--num_examples', '500000'  # 1/10 actual size to finish faster
            ],
            requirements=['cloudml-hypertune'],  # any extra Python packages
            replica_count=1,
            machine_type='n1-standard-4',
            # See https://cloud.google.com/vertex-ai/docs/general/locations#accelerators
            accelerator_type=aip.AcceleratorType.NVIDIA_TESLA_T4.name,
            accelerator_count=1,
        )

    # the tuning job
    hparam_job = aiplatform.HyperparameterTuningJob(
        # See https://googleapis.dev/python/aiplatform/latest/aiplatform.html#
        display_name='hparam-{}'.format(model_display_name),
        custom_job=trial_job,
        metric_spec={'val_rmse': 'minimize'},
        parameter_spec={
            "train_batch_size": hpt.IntegerParameterSpec(min=16, max=256, scale='log'),
            "nbuckets": hpt.IntegerParameterSpec(min=5, max=10, scale='linear'),
            "dnn_hidden_units": hpt.CategoricalParameterSpec(values=["64,16", "64,16,4", "64,64,64,8", "256,64,16"])
        },
        max_trial_count=2 if develop_mode else NUM_HPARAM_TRIALS,
        parallel_trial_count=2,
        search_algorithm=None,  # Bayesian
    )

    hparam_job.run(sync=True)  # has to finish before we can get trials.

    # get the parameters corresponding to the best trial
    best = sorted(hparam_job.trials, key=lambda x: x.final_measurement.metrics[0].value)[0]
    logging.info('Best trial: {}'.format(best))
    best_params = []
    for param in best.parameters:
        best_params.append('--{}'.format(param.parameter_id))

        if param.parameter_id in ["train_batch_size", "nbuckets"]:
            # hparam returns 10.0 even though it's an integer param. so round it.
            # but CustomTrainingJob makes integer args into floats. so make it a string
            best_params.append(str(int(round(param.value))))
        else:
            # string or float parameters
            best_params.append(param.value)

    # run the best trial to completion
    logging.info('Launching full training job with {}'.format(best_params))
    return train_custom_model(data_set, timestamp, develop_mode, cpu_only_mode, tf_version, extra_args=best_params)


@dsl.pipeline(name="flights-ch9-pipeline",
              description="ds-on-gcp ch9 flights pipeline"
)
def main():
    aiplatform.init(project=PROJECT, location=REGION, staging_bucket='gs://{}'.format(BUCKET))

    # create data set
    all_files = tf.io.gfile.glob('gs://{}/ch9/data/all*.csv'.format(BUCKET))
    logging.info("Training on {}".format(all_files))
    data_set = aiplatform.TabularDataset.create(
        display_name='data-{}'.format(ENDPOINT_NAME),
        gcs_source=all_files
    )
    if TF_VERSION is not None:
        tf_version = TF_VERSION.replace(".", "-")
    else:
        tf_version = '2-' + tf.__version__[2:3]

    # train
    if AUTOML:
        model = train_automl_model(data_set, TIMESTAMP, DEVELOP_MODE)
    elif NUM_HPARAM_TRIALS > 1:
        model = do_hyperparameter_tuning(data_set, TIMESTAMP, DEVELOP_MODE, CPU_ONLY_MODE, tf_version)
    else:
        model = train_custom_model(data_set, TIMESTAMP, DEVELOP_MODE, CPU_ONLY_MODE, tf_version)

    # create endpoint if it doesn't already exist
    endpoints = aiplatform.Endpoint.list(
        filter='display_name="{}"'.format(ENDPOINT_NAME),
        order_by='create_time desc',
        project=PROJECT, location=REGION,
    )
    if len(endpoints) > 0:
        endpoint = endpoints[0]  # most recently created
    else:
        endpoint = aiplatform.Endpoint.create(
            display_name=ENDPOINT_NAME, project=PROJECT, location=REGION,
            sync=DEVELOP_MODE
        )

    # deploy
    model.deploy(
        endpoint=endpoint,
        traffic_split={"0": 100},
        machine_type='n1-standard-2',
        min_replica_count=1,
        max_replica_count=1,
        sync=DEVELOP_MODE
    )

    if DEVELOP_MODE:
        model.wait()


def run_pipeline():
    compiler.Compiler().compile(pipeline_func=main, package_path='flights_pipeline.json')

    job = aip.PipelineJob(
        display_name="{}-pipeline".format(ENDPOINT_NAME),
        template_path="{}_pipeline.json".format(ENDPOINT_NAME),
        pipeline_root="{}/pipeline_root/intro".format(BUCKET),
        enable_caching=False
    )

    job.run()


if __name__ == '__main__':
    parser = argparse.ArgumentParser()

    parser.add_argument(
        '--bucket',
        help='Data will be read from gs://BUCKET/ch9/data and checkpoints will be in gs://BUCKET/ch9/trained_model',
        required=True
    )
    parser.add_argument(
        '--region',
        help='Where to run the trainer',
        default='us-central1'
    )
    parser.add_argument(
        '--project',
        help='Project to be billed',
        required=True
    )
    parser.add_argument(
        '--develop',
        help='Train on a small subset in development',
        dest='develop',
        action='store_true')
    parser.set_defaults(develop=False)
    parser.add_argument(
        '--automl',
        help='Train an AutoML Table, instead of using model.py',
        dest='automl',
        action='store_true')
    parser.set_defaults(automl=False)
    parser.add_argument(
        '--num_hparam_trials',
        help='Number of hyperparameter trials. 0/1 means no hyperparam. Ignored if --automl is set.',
        type=int,
        default=0)
    parser.add_argument(
        '--pipeline',
        help='Run as pipeline',
        dest='pipeline',
        action='store_true')
    parser.add_argument(
        '--cpuonly',
        help='Run without GPU',
        dest='cpuonly',
        action='store_true')
    parser.set_defaults(cpuonly=False)
    parser.add_argument(
        '--tfversion',
        help='TensorFlow version to use'
    )

    # parse args
    logging.getLogger().setLevel(logging.INFO)
    args = parser.parse_args().__dict__
    BUCKET = args['bucket']
    PROJECT = args['project']
    REGION = args['region']
    DEVELOP_MODE = args['develop']
    CPU_ONLY_MODE = args['cpuonly']
    TF_VERSION = args['tfversion']    
    AUTOML = args['automl']
    NUM_HPARAM_TRIALS = args['num_hparam_trials']
    TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")

    if args['pipeline']:
        run_pipeline()
    else:
        main()

In [None]:
%%bash
python3 train_on_vertexai.py \
--project $PROJECT_ID \
--bucket $BUCKET_NAME \
--develop --cpuonly --tfversion 2.6

## Make predictions from the deployed model

Sending the normal prediction request to the model endpoint will return a response that contains feature attributions.

### Call the model using bash

In [None]:
%%writefile call_predict.sh
#!/bin/bash

REGION=us-central1
ENDPOINT_NAME=flights

ENDPOINT_ID=$(gcloud ai endpoints list --region=$REGION \
              --format='value(ENDPOINT_ID)' --filter=display_name=${ENDPOINT_NAME} \
              --sort-by=creationTimeStamp | tail -1)
echo $ENDPOINT_ID
gcloud ai endpoints predict $ENDPOINT_ID --region=$REGION --json-request=example_input.json

In [None]:
%%bash
bash ./call_predict.sh

### Call the model using Python

In [None]:
%%writefile call_predict.py
import sys, json
from google.cloud import aiplatform
from google.cloud.aiplatform import gapic as aip

ENDPOINT_NAME = 'flights'

if __name__ == '__main__':

    endpoints = aiplatform.Endpoint.list(
        filter='display_name="{}"'.format(ENDPOINT_NAME),
        order_by='create_time desc'
    )
    if len(endpoints) == 0:
        print("No endpoint named {}".format(ENDPOINT_NAME))
        sys.exit(-1)
    
    endpoint = endpoints[0]
    
    input_data = {"instances": [
        {"dep_hour": 2, "is_weekday": 1, "dep_delay": 40, "taxi_out": 17, "distance": 41, "carrier": "AS",
         "dep_airport_lat": 58.42527778, "dep_airport_lon": -135.7075, "arr_airport_lat": 58.35472222,
         "arr_airport_lon": -134.57472222, "origin": "GST", "dest": "JNU"},
        {"dep_hour": 22, "is_weekday": 0, "dep_delay": -7, "taxi_out": 7, "distance": 201, "carrier": "HA",
         "dep_airport_lat": 21.97611111, "dep_airport_lon": -159.33888889, "arr_airport_lat": 20.89861111,
         "arr_airport_lon": -156.43055556, "origin": "LIH", "dest": "OGG"}
    ]}

    preds = endpoint.predict(input_data['instances'])
    print(preds)

In [None]:
%%bash
python3 call_predict.py

### Run a Vertex AI pipeline on full dataset

In [None]:
%%bash
python3 train_on_vertexai.py \
--project $PROJECT_ID \
--bucket $BUCKET_NAME

In [None]:
%%bash
python3 train_on_vertexai.py \
--project $PROJECT_ID \
--bucket $BUCKET_NAME \
--tfversion 2.6