In [1]:
import kfp
import kfp.components as comp
import requests
import kfp.dsl as dsl


In [2]:
from mlProject import logger
import os
from src.mlProject.pipeline.stage_01_data_ingestion import DataIngestionTrainingPipeline
from src.mlProject.pipeline.stage_02_data_cleaning import DataCleaningPipeline
from src.mlProject.pipeline.stage_03_data_validation import DataValidationTrainingPipeline
from src.mlProject.pipeline.stage_04_data_transformation import DataTransformationTrainingPipeline
from src.mlProject.pipeline.stage_05_model_trainer import ModelTrainerTrainingPipeline
from src.mlProject.pipeline.stage_06_model_evaluation import ModelEvaluationTrainingPipeline

ModuleNotFoundError: No module named 'mlProject'

In [None]:
create_step_data_ingestion = kfp.components.create_component_from_func(
    func=DataIngestionTrainingPipeline,
    base_image='python:3.8',
    packages_to_install=['pandas','numpy']
)

In [None]:
create_step_data_cleaning = kfp.components.create_component_from_func(
    func=DataCleaningPipeline,
    base_image='python:3.8',
    packages_to_install=['pandas','numpy','scikit-learn']
)

In [None]:
create_step_data_validation = kfp.components.create_component_from_func(
    func=DataValidationTrainingPipeline,
    base_image='python:3.8',
    packages_to_install=['pandas','numpy','scikit-learn']
)

In [None]:
create_step_data_transformation = kfp.components.create_component_from_func(
    func=DataTransformationTrainingPipeline,
    base_image='python:3.8',
    packages_to_install=['pandas','numpy','scikit-learn']
)

In [None]:
create_step_model_trainer = kfp.components.create_component_from_func(
    func=ModelTrainerTrainingPipeline,
    base_image='python:3.8',
    packages_to_install=['pandas','numpy','scikit-learn']
)

In [None]:
create_step_model_evaluation = kfp.components.create_component_from_func(
    func=ModelEvaluationTrainingPipeline,
    base_image='python:3.8',
    packages_to_install=['pandas','numpy','scikit-learn']
)

In [None]:

# Define the pipeline
@dsl.pipeline(
   name='Muru Logistic Engine Pipeline',
   description='A  pipeline that performs Logistic Engine pipeline'
)
# Define parameters to be fed into pipeline
def muru_logistic_pipeline(data_path: str):
    vop = dsl.VolumeOp(
    name="t-vol",
    resource_name="t-vol", 
    size="2Gi", 
    modes=dsl.VOLUME_MODE_RWO)
   
    data_ingestion = create_step_data_ingestion().add_pvolumes({data_path: vop.volume})
    data_cleaning = create_step_data_cleaning().add_pvolumes({data_path: vop.volume}).after(data_ingestion)
    data_validation = create_step_data_validation().add_pvolumes({data_path: vop.volume}).after(data_cleaning)
    data_transformation = create_step_data_transformation().add_pvolumes({data_path: vop.volume}).after(data_validation)
    model_trainer = create_step_model_trainer().add_pvolumes({data_path: vop.volume}).after(data_transformation)
    model_evaluation = create_step_model_evaluation().add_pvolumes({data_path: vop.volume}).after(model_trainer)

    
    # prepare_data_task.execution_options.caching_strategy.max_cache_staleness = "P0D"
    # train_test_split.execution_options.caching_strategy.max_cache_staleness = "P0D"
    # classifier_training.execution_options.caching_strategy.max_cache_staleness = "P0D"
    # log_predicted_class.execution_options.caching_strategy.max_cache_staleness = "P0D"
    # log_predicted_probabilities.execution_options.caching_strategy.max_cache_staleness = "P0D"
    # log_metrics_task.execution_options.caching_strategy.max_cache_staleness = "P0D"
    
    
    

In [None]:

kfp.compiler.Compiler().compile(
    pipeline_func=muru_logistic_pipeline,
    package_path='MURU_Logistic_pipeline.yaml')


In [None]:
client = kfp.Client()

In [None]:
DATA_PATH = '/data'

import datetime
print(datetime.datetime.now().date())


pipeline_func = muru_logistic_pipeline
experiment_name = 'muru_logistic_exp' +"_"+ str(datetime.datetime.now().date())
run_name = pipeline_func.__name__ + ' run'
namespace = "kubeflow"

arguments = {"data_path":DATA_PATH}

kfp.compiler.Compiler().compile(pipeline_func,  
  '{}.zip'.format(experiment_name))

run_result = client.create_run_from_pipeline_func(pipeline_func, 
                                                  experiment_name=experiment_name, 
                                                  run_name=run_name, 
                                                  arguments=arguments)

# from kubernetes import client as k8s_client
# pipeline_conf = kfp.dsl.PipelineConf()
# pipeline_conf.set_image_pull_secrets([k8s_client.V1ObjectReference(namespace='kubeflow', 
#                                                                                  name="secret")])
# pipeline_conf.set_image_pull_policy("IfNotPresent")
    
# Compile pipeline to generate compressed YAML definition of the pipeline.
# kfp.compiler.Compiler().compile(pipeline_func,  
#   '{}.zip'.format(experiment_name))

# Submit pipeline directly from pipeline function
# run_result = client.create_run_from_pipeline_func(pipeline_func, 
#                                                   experiment_name=experiment_name, 
#                                                   run_name=run_name, 
#                                                   arguments=arguments,
#                                                   namespace = namespace,
#                                                   pipeline_conf=pipeline_conf)







In [None]:
https://www.youtube.com/watch?v=TBXJownrn7A