In [1]:
PROJECT_ID      = ""
PROJECT_REGION  = ""

GCS_BUCKET_NAME = ""

VERTEX_DATASET_NAME    = ""
VERTEX_MODEL_NAME      = ""
VERTEX_PREDICTION_NAME = ""

BQ_DATASET_NAME  = ""
BQ_TRAIN_TABLE   = ""
BQ_PREDICT_TABLE = ""

In [None]:
EXPERIMENT_NAME = ""

In [16]:
PROJECT_ID = "wb-ai-acltr-tbs-3-pr-a62583"
GCS_BUCKET_NAME = "bkt_b2b_wf_prediction"
PROJECT_REGION = "northamerica-northeast1"

VERTEX_DATASET_NAME = "b2b_wf_prediction_panorama" + f"_{EXPERIMENT_NAME}"
VERTEX_MODEL_NAME = "b2b_wf_prediction_panorama"
VERTEX_PREDICTION_NAME = "b2b_wf_prediction_batch"

BQ_DATASET_NAME = "b2b_wf_prediction"
BQ_TRAIN_TABLE = "vw_wf_historical"
BQ_PREDICT_TABLE = "bq_wf_temp_predictions"

In [17]:
import google.cloud.aiplatform as aiplatform
from google.cloud import bigquery
import datetime

TRAINING_DATASET_BQ_PATH   = f"bq://{PROJECT_ID}.{BQ_DATASET_NAME}.{BQ_TRAIN_TABLE}"
PREDICTION_DATASET_BQ_PATH = f"bq://{PROJECT_ID}.{BQ_DATASET_NAME}.{BQ_PREDICT_TABLE}"
PREDICTION_OUTPUT_PREFIX   = f"bq://{PROJECT_ID}.{BQ_DATASET_NAME}"
BUCKET_URI = f"gs://{PROJECT_ID}_{GCS_BUCKET_NAME}"

In [18]:
aiplatform.init(
    project=PROJECT_ID, 
    staging_bucket=BUCKET_URI,
    location=PROJECT_REGION
)

In [None]:
client = bigquery.Client(
    project=PROJECT_ID, 
    location=PROJECT_REGION
)

In [None]:
# TODO: create the historical table (and the dataset) for the experiment (data from 2022 until jun 2024)

In [None]:
dataset_list = aiplatform.TimeSeriesDataset.list(
    filter=f"display_name={VERTEX_DATASET_NAME}"
)

if len(dataset_list) == 0:
    print("... creating new dataset ... ")
    dataset = aiplatform.TimeSeriesDataset.create(
        display_name=VERTEX_DATASET_NAME,
        bq_source=[TRAINING_DATASET_BQ_PATH],
    )
else:
    print("... using existent dataset ... ")
    dataset = dataset_list[0]

In [None]:
# TODO: GET the columns based on the table

In [20]:
time_column                   = "Appointment_Day"
time_series_identifier_column = "Series_Identifier"
target_column                 = "SWT"

ATTRIBUTE_COLUMNS = [
    "District",
    "Product",
    "Technology",
    "Work_Force",
    "Work_Order_Action",
]

COLUMN_SPECS = {
    time_column:             "timestamp",
    target_column:           "numeric",
    "District":              "categorical",
    "Product":               "categorical",
    "Technology":            "categorical",
    "Work_Force":            "categorical",
    "Work_Order_Action":     "categorical",
}

In [None]:
model_list = aiplatform.Model.list(
    filter=f"display_name={VERTEX_MODEL_NAME}"
)

if len(model_list) == 0:
    print("... training a new model ... ")
    parent_model = None
else:
    print("... using existent model ... ")
    model = model_list[0]
    print(model)
    parent_model = model.resource_name

In [12]:
training_job = aiplatform.AutoMLForecastingTrainingJob(
    display_name=VERTEX_MODEL_NAME,
    optimization_objective="minimize-rmse",
    column_specs=COLUMN_SPECS,
)

In [None]:
model = training_job.run(
    dataset=dataset,
    target_column=target_column,
    time_column=time_column,
    time_series_identifier_column=time_series_identifier_column,
    available_at_forecast_columns=[time_column],
    unavailable_at_forecast_columns=[target_column],
    time_series_attribute_columns=ATTRIBUTE_COLUMNS,
    forecast_horizon=6,
    context_window=18,
    data_granularity_unit="month",
    data_granularity_count=1,
    weight_column=None,
    budget_milli_node_hours=1000,
    parent_model = parent_model,
    model_display_name=VERTEX_MODEL_NAME,
    is_default_version = True,
    model_version_description = f"Model generated on {datetime.date.today().isoformat()}",
    predefined_split_column_name=None,
)

In [None]:
# TODO: Create the temp table for the predictions

In [None]:
batch_prediction_job = model.batch_predict(
    job_display_name=VERTEX_PREDICTION_NAME,
    bigquery_source=PREDICTION_DATASET_BQ_PATH,
    instances_format="bigquery",
    bigquery_destination_prefix=PREDICTION_OUTPUT_PREFIX,
    predictions_format="bigquery",
    generate_explanation=True,
    sync=True,
)

In [None]:
# TODO: Clean up the temp predictions table

In [None]:


batch_table  = batch_prediction_job.output_info.bigquery_output_table

# TODO: manually fill the fields based on the fields on the dataset table
query_job = client.query(
    f"""
        QUERY
    """,
)

# TODO: delete the batch table

query_job.result(timeout=3600)

In [None]:
# TODO: calculate the metrics

In [None]:
# TODO: save the metrics on the bq_wf_evaluation