# Running a kubeflow pipeline on google vertex

In [1]:
%load_ext autoreload
%autoreload 2
import kfp.dsl as dsl
import os
from dotenv import load_dotenv
from kfp.v2.dsl import Output, component, InputPath, HTML, Input, Dataset
from kfp.v2 import compiler
from google.cloud import aiplatform as aip

#loading enviroment variables
load_dotenv()
bucket = os.getenv("bucket")
gcp_project = os.getenv("gcp_project")
gcp_service_account = os.getenv("gcp_service_account")


  from kfp.v2.dsl import Output, component, InputPath, HTML, Input, Dataset


### Check that we have access and can loop all pipelines

In [2]:
aip.init(
    project=gcp_project,
    location="europe-west1",
)
all_piplines = aip.PipelineJob.list()
all_piplines



[<google.cloud.aiplatform.pipeline_jobs.PipelineJob object at 0xffff7bdb86d0> 
 resource name: projects/559395553199/locations/europe-west1/pipelineJobs/ltv-train-20230322073644,
 <google.cloud.aiplatform.pipeline_jobs.PipelineJob object at 0xffff7bdb8eb0> 
 resource name: projects/559395553199/locations/europe-west1/pipelineJobs/ltv-train-20230322072939,
 <google.cloud.aiplatform.pipeline_jobs.PipelineJob object at 0xffff7bdb8a90> 
 resource name: projects/559395553199/locations/europe-west1/pipelineJobs/ltv-train-20230322072919,
 <google.cloud.aiplatform.pipeline_jobs.PipelineJob object at 0xffff7bdb9cc0> 
 resource name: projects/559395553199/locations/europe-west1/pipelineJobs/ltv-train-20230322072628,
 <google.cloud.aiplatform.pipeline_jobs.PipelineJob object at 0xffff7bdba1d0> 
 resource name: projects/559395553199/locations/europe-west1/pipelineJobs/ltv-train-20230322072223]

# Create a kubeflow pipeline

In [6]:
def train_model(input: float) -> float:
    return 2.0 + input


def ingetst_data() -> float:
    return 2.0

# Create components for the ingestion and training functions
ingest_data_component = component(ingetst_data)
train_component = component(train_model)


# Define the pipeline using the Kubeflow Pipelines SDK
@dsl.pipeline(
    name="ltv-train",
)
def add_pipeline():
    # Instantiate the ingest_data_component and store its output
    ingest_data = ingest_data_component()
    
    # Instantiate the train_component, passing the output from the ingest_data_component
    train_model = train_component(input=ingest_data.output)
    
    # Disable caching for the train_model component to ensure it runs every time
    train_model.set_caching_options(False)

# Compile the pipeline to generate a JSON file for execution
compiler.Compiler().compile(pipeline_func=add_pipeline, package_path="local_run.yaml")

This code defines a simple pipeline using the Kubeflow Pipelines SDK. The pipeline consists of two components: a data ingestion component (ingetst_data) and a model training component (train_model). The ingetst_data component returns a constant value of 2.0, while the train_model component adds 2.0 to the input value. Finally, the pipeline is compiled and saved as a JSON file (local_run.json) for local execution or deployment.

In [7]:
job = aip.PipelineJob(
    #job_id='test' # TODO se in the future
    display_name="First kubeflow pipeline",
    template_path="local_run.json",
    pipeline_root=bucket,
    location="europe-west1",
    project=gcp_project,
)

job.submit(
    service_account=gcp_service_account
)

Creating PipelineJob
PipelineJob created. Resource name: projects/559395553199/locations/europe-west1/pipelineJobs/ltv-train-20230322212013
To use this PipelineJob in another session:
pipeline_job = aiplatform.PipelineJob.get('projects/559395553199/locations/europe-west1/pipelineJobs/ltv-train-20230322212013')
View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/europe-west1/pipelines/runs/ltv-train-20230322212013?project=559395553199
