In [46]:
from kfp import dsl
from kfp.dsl import component
from kfp import compiler
from google.cloud import aiplatform
from typing import List


In [47]:
BASE_IMAGE = "europe-west3-docker.pkg.dev/bda-gameon-demo/vertex/base_cricket_container:latest"

In [48]:
@component(
    base_image=BASE_IMAGE,
)
def load_and_preprocess(
    match_ids: List[int],
    write_disp: str = 'APPEND',
):
    from cricket_utils import (
        load_data,
        transform_cricket_data,
        save_historic_to_big_query,
    )

    print("Loading data...")
    df = load_data(match_ids)

    print("Transforming data...")
    df = transform_cricket_data(df)

    print("Saving data to BigQuery...")
    save_historic_to_big_query(df, write_disp)

In [49]:
@dsl.pipeline(name="batch_processing", description="Cricket batch processing pipeline")
def cricket_batch_processing_pipeline(match_ids: List[int] = [i for i in range(1, 30)], write_disp: str = 'APPEND'):
    
    load_preprocess_step = load_and_preprocess(
        match_ids=match_ids, 
        write_disp=write_disp
    ).set_display_name("Load and Preprocess Data")

    # train_step = train_model().after(load_preprocess_step).set_display_name('Model training')

In [51]:
compiler.Compiler().compile(
    pipeline_func=cricket_batch_processing_pipeline,
    package_path="cricket_batch_processing_pipeline.json",
)

In [None]:
aiplatform.init(project="bda-gameon-demo", location="europe-west3")

pipeline_job = aiplatform.PipelineJob(
    display_name="cricket_batch_processing_pipeline",
    template_path="cricket_batch_processing_pipeline.json",
    parameter_values={
        "match_ids": [i for i in range(1, 2360)],
        "write_disp": 'APPEND',
    },
)

pipeline_job.run(sync=True)

Creating PipelineJob
PipelineJob created. Resource name: projects/248863766350/locations/europe-west3/pipelineJobs/batch-processing-20241123195259
To use this PipelineJob in another session:
pipeline_job = aiplatform.PipelineJob.get('projects/248863766350/locations/europe-west3/pipelineJobs/batch-processing-20241123195259')
View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/europe-west3/pipelines/runs/batch-processing-20241123195259?project=248863766350
PipelineJob projects/248863766350/locations/europe-west3/pipelineJobs/batch-processing-20241123195259 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/248863766350/locations/europe-west3/pipelineJobs/batch-processing-20241123195259 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/248863766350/locations/europe-west3/pipelineJobs/batch-processing-20241123195259 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/248863766350/locations/europe-west3/pi