In [None]:
from google.cloud import automl_v1beta1
import os
import time

## Initialize environment variables

In [None]:
project_id   = 'tensorflow-clv-331422'
location     = 'us-central1'
keyfile_name = 'mykey.json'

dataset_display_name = 'ltv'
dataset_table_name   = 'features_n_target'
model_display_name   = 'clv_model_test1'

gcs_input_uri         = "gs://tensorflow-clv-331422_data_final/predictions/to_predict.csv"
gcs_output_uri_prefix = "gs://tensorflow-clv-331422_data_final/predictions"

## Create and authenticate clients 

In [None]:
client = automl_v1beta1.AutoMlClient.from_service_account_file(keyfile_name)
prediction_client = automl_v1beta1.PredictionServiceClient.from_service_account_file(keyfile_name)

In [None]:
location_path = client.location_path(project_id, location)
location_path

## Create AutoML Dataset

In [None]:
create_dataset_response = client.create_dataset(
  location_path,
  {'display_name': dataset_display_name, 'tables_dataset_metadata': {}})
dataset_name = create_dataset_response.name
dataset_name

## ... or alternatively, use an existing Dataset

In [None]:
dataset_list_response = client.list_datasets(location_path)
dataset_list = [d for d in dataset_list_response]
dataset = [d for d in dataset_list if d.display_name == dataset_display_name][0]
dataset_name = dataset.name
dataset_name

## Import data from BigQuery

In [None]:
dataset_bq_input_uri = 'bq://{}.{}.{}'.format(project_id, dataset_display_name, dataset_table_name)
dataset_bq_input_uri

In [None]:
input_config = {
  'bigquery_source': {
      'input_uri': dataset_bq_input_uri}}
import_data_response = client.import_data(dataset_name, input_config)

In [None]:
while import_data_response.done() is False:
    time.sleep(1)

## Get column specs for Dataset

In [None]:
list_table_specs_response = client.list_table_specs(dataset_name)
table_specs = [s for s in list_table_specs_response]
table_spec_name = table_specs[0].name
list_column_specs_response = client.list_column_specs(table_spec_name)
column_specs = {s.display_name: s for s in list_column_specs_response}

### Example of updating column spec...

In [None]:
update_column_spec_dict = {
  "name": column_specs['has_returned'].name,
  "data_type": {
      "type_code": "CATEGORY"
  }
}
update_column_response = client.update_column_spec(update_column_spec_dict)

## Assign a training label

In [None]:
label_column_name = 'target_monetary'
label_column_spec = column_specs[label_column_name]
label_column_id = label_column_spec.name.rsplit('/', 1)[-1]
print('Label column ID: {}'.format(label_column_id))
update_dataset_dict = {
  'name': dataset_name,
  'tables_dataset_metadata': {
      'target_column_spec_id': label_column_id
  }
}
update_dataset_response = client.update_dataset(update_dataset_dict)

## Select features for training

In [None]:
feat_list = list(column_specs.keys())
feat_list.remove('target_monetary')
feat_list.remove('customer_id')
feat_list.remove('monetary_btyd')
feat_list.remove('frequency_btyd')
feat_list.remove('frequency_btyd_clipped')
feat_list.remove('monetary_btyd_clipped')
feat_list.remove('target_monetary_clipped')

In [None]:
feat_list

## Train the model

In [None]:
model_dict = {
  'display_name': model_display_name,
  'dataset_id': dataset_name.rsplit('/', 1)[-1],
  'tables_model_metadata': {
      'target_column_spec': column_specs['target_monetary'],
      'input_feature_column_specs': [
          column_specs[x] for x in feat_list],
      'train_budget_milli_node_hours': 10000,
      'optimization_objective': 'MINIMIZE_MAE'
  }
}

In [None]:
create_model_response = client.create_model(location_path, model_dict)
while create_model_response.done() is False:
    time.sleep(10)

In [None]:
create_model_result = create_model_response.result()
model_name = create_model_result.name
create_model_result.name

## ... or alternatively get an existing trained Model 

In [None]:
model_list_response = client.list_models(location_path)
model_list = [m for m in model_list_response]
model = [m for m in model_list if m.display_name == model_display_name][0]
model_name = model.name

## Get evalutions for model

In [None]:
model_evaluations = [e for e in client.list_model_evaluations(model_name)]
model_evaluations[0]

## Deploy the model

In [None]:
deploy_model_response = client.deploy_model(model_name)
api = client.transport._operations_client
while deploy_model_response.done is False:
    deploy_model_response = api.get_operation(deploy_model_response.name)
    time.sleep(1)

## Run batch predictions

In [None]:
# Define input source.
batch_prediction_input_source = {
  'gcs_source': {
    'input_uris': [gcs_input_uri]
  }
}

# Define output target.
batch_prediction_output_target = {
    'gcs_destination': {
      'output_uri_prefix': gcs_output_uri_prefix
    }
}

batch_predict_response = prediction_client.batch_predict(
  model_name, batch_prediction_input_source, batch_prediction_output_target)

while batch_predict_response.done() is False:
  time.sleep(1)