In [1]:
# This script sets up the connection to a Kubeflow Pipelines Artifact Registry
# using the RegistryClient from the kfp.registry module. It loads the necessary
# environment variables and establishes a connection to the registry.

import os
from kfp.registry import RegistryClient
from dotenv import load_dotenv

# Load environment variables from the .env file
load_dotenv()

# Retrieve required environment variables
bucket = os.getenv("bucket")
gcp_project = os.getenv("gcp_project")
gcp_service_account = os.getenv("gcp_service_account")
kubeflow_pipelines_artifact_registyr = os.getenv('kubeflow_pipelines_artifact_registyr')

# Create a RegistryClient instance and connect to the Kubeflow Pipelines Artifact Registry
client = RegistryClient(host=f"https://europe-west1-kfp.pkg.dev/{gcp_project}/{kubeflow_pipelines_artifact_registyr}")




In [2]:
client.list_packages()

[{'name': 'projects/johan-kubeflow/locations/europe-west1/repositories/test-test/packages/ltv-train',
  'createTime': '2023-03-22T21:35:14.063631Z',
  'updateTime': '2023-03-22T21:40:29.948495Z'}]

# Creating a super simple pipeline

However it is a littel diffrent from the one in previous blog post on how arguments are defined and that we compile to a yaml file instead. 

In [3]:
import kfp.dsl as dsl
import os
from dotenv import load_dotenv
from kfp.v2.dsl import component
from kfp.v2 import compiler
from google.cloud import aiplatform as aip

# Define the model training function
def train_model(input: float) -> float:
    return 2.0 + input

# Define the data ingestion function
def ingetst_data(input: float) -> float:
    return 2.0

# Create components for the ingestion and training functions
ingest_data_component = component(ingetst_data)
train_component = component(train_model)

# Define the pipeline using the Kubeflow Pipelines SDK
@dsl.pipeline(
    name="ltv-train",
)
def add_pipeline():
    # Instantiate the ingest_data_component and store its output
    ingest_data = ingest_data_component(input=3.0)
    
    # Instantiate the train_component, passing the output from the ingest_data_component
    train_model = train_component(input=ingest_data.output)
    
    # Disable caching for the train_model component to ensure it runs every time
    train_model.set_caching_options(False)

# Compile the pipeline to generate a YAML file for execution
compiler.Compiler().compile(pipeline_func=add_pipeline, package_path="local_run.yaml")


  from kfp.v2.dsl import component


In [5]:
# Upload the pipeline to the Kubeflow Pipelines registry
templateName, versionName = client.upload_pipeline(
    # Provide the compiled pipeline YAML file
    file_name="local_run.yaml",
    
    # Assign tags to the pipeline for easier identification and versioning
    tags=["v1", "latest"],
    
    # Add a description to the pipeline using extra_headers
    extra_headers={"description": "This is an example pipeline template."}
)
