In [None]:
PROJECT_ID      = ""
PROJECT_REGION  = ""

GCS_BUCKET_NAME = ""

VERTEX_DATASET_NAME    = ""
VERTEX_MODEL_NAME      = ""
VERTEX_PREDICTION_NAME = ""

BQ_DATASET_NAME  = ""
BQ_TRAIN_TABLE   = ""
BQ_PREDICT_TABLE = ""

In [None]:
import google.cloud.aiplatform as aiplatform
import datetime

MARKETING_CORRECTION_WORKFLOW_ID = "b2b-wf-prediction-workflow-marketing"
TRAINING_DATASET_BQ_PATH   = f"bq://{PROJECT_ID}.{BQ_DATASET_NAME}.{BQ_TRAIN_TABLE}"
PREDICTION_DATASET_BQ_PATH = f"bq://{PROJECT_ID}.{BQ_DATASET_NAME}.{BQ_PREDICT_TABLE}"
PREDICTION_OUTPUT_PREFIX   = f"bq://{PROJECT_ID}.{BQ_DATASET_NAME}"
BUCKET_URI = f"gs://{PROJECT_ID}_{GCS_BUCKET_NAME}"

In [None]:
aiplatform.init(
    project=PROJECT_ID, 
    staging_bucket=BUCKET_URI,
    location=PROJECT_REGION
)

In [None]:
dataset_list = aiplatform.TimeSeriesDataset.list(
    filter=f"display_name={VERTEX_DATASET_NAME}"
)

if len(dataset_list) == 0:
    print("... creating new dataset ... ")
    dataset = aiplatform.TimeSeriesDataset.create(
        display_name=VERTEX_DATASET_NAME,
        bq_source=[TRAINING_DATASET_BQ_PATH],
    )
else:
    print("... using existent dataset ... ")
    dataset = dataset_list[0]

In [None]:
time_column                   = "Appointment_Month"
time_series_identifier_column = "Series_Identifier"
target_column                 = "SWT"

ATTRIBUTE_COLUMNS = [
    "District",
    "Product",
    "Technology",
    "Work_Force",
    "Work_Order_Action",
]

COLUMN_SPECS = {
    time_column:             "timestamp",
    target_column:           "numeric",
    "District":              "categorical",
    "Product":               "categorical",
    "Technology":            "categorical",
    "Work_Force":            "categorical",
    "Work_Order_Action":     "categorical",
}

In [None]:
model_list = aiplatform.Model.list(
    filter=f"display_name={VERTEX_MODEL_NAME}"
)

if len(model_list) == 0:
    print("... training a new model ... ")
    parent_model = None
else:
    print("... using existent model ... ")
    model = model_list[0]
    print(model)
    parent_model = model.resource_name

In [None]:
training_job = aiplatform.AutoMLForecastingTrainingJob(
    display_name=VERTEX_MODEL_NAME,
    optimization_objective="minimize-rmse",
    column_specs=COLUMN_SPECS,
)

In [None]:
model = training_job.run(
    dataset=dataset,
    target_column=target_column,
    time_column=time_column,
    time_series_identifier_column=time_series_identifier_column,
    available_at_forecast_columns=[time_column],
    unavailable_at_forecast_columns=[target_column],
    time_series_attribute_columns=ATTRIBUTE_COLUMNS,
    forecast_horizon=18,
    context_window=18,
    data_granularity_unit="month",
    data_granularity_count=1,
    weight_column=None,
    budget_milli_node_hours=1000,
    parent_model = parent_model,
    model_display_name=VERTEX_MODEL_NAME,
    is_default_version = True,
    model_version_description = f"Model generated on {datetime.date.today().isoformat()}",
    predefined_split_column_name=None,
)

In [None]:
batch_prediction_job = model.batch_predict(
    job_display_name=VERTEX_PREDICTION_NAME,
    bigquery_source=PREDICTION_DATASET_BQ_PATH,
    instances_format="bigquery",
    bigquery_destination_prefix=PREDICTION_OUTPUT_PREFIX,
    predictions_format="bigquery",
    generate_explanation=True,
    sync=True,
)

In [None]:
from google.cloud import bigquery

batch_table  = batch_prediction_job.output_info.bigquery_output_table

client = bigquery.Client(
    project=PROJECT_ID, 
    location=PROJECT_REGION
)

query_job = client.query(
    f"""
    CALL `{PROJECT_ID}.{BQ_DATASET_NAME}.sp_persist_wf_forecasts` ( '{batch_table}' );
    """,
)

query_job.result(timeout=3600)

In [None]:
# from google.cloud.workflows.executions_v1 import ExecutionsClient

# client = ExecutionsClient()
# parent = f'projects/{PROJECT_ID}/locations/{PROJECT_REGION}/workflows/{MARKETING_CORRECTION_WORKFLOW_ID}'

# execution = client.create_execution(
#     request={
#         "parent": parent,
#         "execution": {}
#     }
# )