In [116]:
!python -m pip install --user --upgrade pip



In [117]:
!pip install kubeflow-katib==0.10.1



In [118]:
!pip3 install kfp --upgrade --user



In [1]:
import kfp
from kfp import dsl
import kfp.components as comp

In [2]:
from kubeflow.katib import ApiClient
from kubeflow.katib import V1beta1ExperimentSpec
from kubeflow.katib import V1beta1AlgorithmSpec
from kubeflow.katib import V1beta1ObjectiveSpec
from kubeflow.katib import V1beta1ParameterSpec
from kubeflow.katib import V1beta1FeasibleSpace
from kubeflow.katib import V1beta1TrialTemplate
from kubeflow.katib import V1beta1TrialParameterSpec
from kubeflow.katib import V1beta1MetricsCollectorSpec
from kubeflow.katib import V1beta1CollectorSpec

In [3]:
# keras katib

In [5]:
# Experiment name and namespace.
experiment_namek = "keras-heart"
experiment_namespace = "sooter"

# Trial count specification.
max_trial_count = 3
max_failed_trial_count = 1
parallel_trial_count = 1

# Objective specification.
objective=V1beta1ObjectiveSpec(
    type="maximize",
    goal= 0.99,
    objective_metric_name="accuracy"

)

# Algorithm specification.
algorithm=V1beta1AlgorithmSpec(
    algorithm_name="random",
)

collect = V1beta1MetricsCollectorSpec(
    collector = V1beta1CollectorSpec(
        kind = 'StdOut'
        )
)



# Experiment search space.
# In this example we tune learning rate, number of layer and optimizer.
# Learning rate has bad feasible space to show more early stopped Trials.
parameters=[
    V1beta1ParameterSpec(
        name="learning_rate",
        parameter_type="double",
        feasible_space=V1beta1FeasibleSpace(
            min="0.01",
            max="0.1"
        ),
    ),
    V1beta1ParameterSpec(
        name="batch_size",
        parameter_type="int",
        feasible_space=V1beta1FeasibleSpace(
            min="50",
            max="100"
        ),
    ),
    V1beta1ParameterSpec(
        name="optimizer",
        parameter_type="categorical",
        feasible_space=V1beta1FeasibleSpace(
            list=[
                "rmsprop", 
                "adam"                
            ]
        ),
    ),
]


# JSON template specification for the Trial's Worker Kubernetes Job.
trial_spec={
    "apiVersion": "batch/v1",
    "kind": "Job",
    "spec": {
        "template": {
            "metadata": {
                "annotations": {
                     "sidecar.istio.io/inject": "false"
                }
            },
            "spec": {
                "containers": [
                    {
                        "name": "tensorflow",
                        "image": "docker.io/mavencodevv/tfjob_heart:v.0.1",
                        "command": [
                            "python",
                            "/tfjobheart.py",
                            "--batch_size=${trialParameters.batchSize}",
                            "--learning_rate=${trialParameters.learningRate}",
                            "--optimizer=${trialParameters.optimizer}"
                        ]
                    }
                ],
                "restartPolicy": "Never"
            }
        }
    }
}

# Configure parameters for the Trial template.
# We set the retain parameter to "True" to not clean-up the Trial Job's Kubernetes Pods.
trial_template=V1beta1TrialTemplate(
    retain=True,
    primary_container_name="tensorflow",
    trial_parameters=[
        V1beta1TrialParameterSpec(
            name="learningRate",
            description="Learning rate for the training model",
            reference="learning_rate"
        ),
        V1beta1TrialParameterSpec(
            name="batchSize",
            description="Batch Size",
            reference="batch_size"
        ),
        V1beta1TrialParameterSpec(
            name="optimizer",
            description="Training model optimizer (sdg, adam)",
            reference="optimizer"
        ),
    ],
    trial_spec=trial_spec
)

In [6]:
experiment_spec=V1beta1ExperimentSpec(
    max_trial_count=max_trial_count,
    max_failed_trial_count=max_failed_trial_count,
    parallel_trial_count=parallel_trial_count,
    objective=objective,
    algorithm=algorithm,
    metrics_collector_spec=collect,
    parameters=parameters,
    trial_template=trial_template
)

In [7]:
# logistic regression katib

In [10]:
# Experiment name and namespace.
experiment_name1 = "lr-heart"
experiment_namespace = "sooter"

# Trial count specification.
max_trial_count = 3
max_failed_trial_count = 1
parallel_trial_count = 1

# Objective specification.
objective=V1beta1ObjectiveSpec(
    type="maximize",
    goal= 0.99,
    objective_metric_name="accuracy"

)

# Algorithm specification.
algorithm=V1beta1AlgorithmSpec(
    algorithm_name="bayesianoptimization",
)

collect = V1beta1MetricsCollectorSpec(
    collector = V1beta1CollectorSpec(
        kind = 'StdOut'
        )
)


# Experiment search space.
# In this example we tune learning rate, number of layer and optimizer.
# Learning rate has bad feasible space to show more early stopped Trials.
parameters=[
    V1beta1ParameterSpec(
        name="max_iter",
        parameter_type="int",
        feasible_space=V1beta1FeasibleSpace(
            min="100",
            max="500"
        ),
    ),
    V1beta1ParameterSpec(
        name="penalty",
        parameter_type="categorical",
        feasible_space=V1beta1FeasibleSpace(
            list=[
                "l1", 
                "l2",
                "elasticnet",
                "none"
            ]
        ),
    ),
    V1beta1ParameterSpec(
        name="solver",
        parameter_type="categorical",
        feasible_space=V1beta1FeasibleSpace(
            list=[
                "liblinear", 
                "newton-cg",
                "lbfgs",
                "sag",
                "saga"
            ]
        ),
    ),
]


# JSON template specification for the Trial's Worker Kubernetes Job.
trial_spec={
    "apiVersion": "batch/v1",
    "kind": "Job",
    "spec": {
        "template": {
            "metadata": {
                "annotations": {
                     "sidecar.istio.io/inject": "false"
                }
            },
            "spec": {
                "containers": [
                    {
                        "name": "logistic",
                        "image": "docker.io/mavencodevv/lrjobheart:v.0.1",
                        "command": [
                            "python",
                            "/lrjobheart.py",
                            "--max_iter=${trialParameters.maxiter}",
                            "--penalty=${trialParameters.penalty}",
                            "--solver=${trialParameters.solver}"
                        ]
                    }
                ],
                "restartPolicy": "Never"
            }
        }
    }
}

# Configure parameters for the Trial template.
# We set the retain parameter to "True" to not clean-up the Trial Job's Kubernetes Pods.
trial_template=V1beta1TrialTemplate(
    retain=True,
    primary_container_name="logistic",
    trial_parameters=[
        V1beta1TrialParameterSpec(
            name="maxiter",
            description="The number of iterations for solvers to converge",
            reference="max_iter"
        ),
        V1beta1TrialParameterSpec(
            name="penalty",
            description="The norm used in penalization",
            reference="penalty"
        ),
        V1beta1TrialParameterSpec(
            name="solver",
            description="Algorithm for optimization",
            reference="solver"
        ),
    ],
    trial_spec=trial_spec
)

In [11]:
experiment_spec1=V1beta1ExperimentSpec(
    max_trial_count=max_trial_count,
    max_failed_trial_count=max_failed_trial_count,
    parallel_trial_count=parallel_trial_count,
    objective=objective,
    algorithm=algorithm,
    metrics_collector_spec=collect,
    parameters=parameters,
    trial_template=trial_template
)

In [12]:
# randomforest katib

In [13]:
# Experiment name and namespace.
experiment_name2 = "rf-heart"
experiment_namespace = "sooter"

# Trial count specification.
max_trial_count = 3
max_failed_trial_count = 1
parallel_trial_count = 1

# Objective specification.
objective=V1beta1ObjectiveSpec(
    type="maximize",
    goal= 0.99,
    objective_metric_name="accuracy"

)

# Algorithm specification.
algorithm=V1beta1AlgorithmSpec(
    algorithm_name="bayesianoptimization",
)

collect = V1beta1MetricsCollectorSpec(
    collector = V1beta1CollectorSpec(
        kind = 'StdOut'
        )
)


# Experiment search space.
# In this example we tune learning rate, number of layer and optimizer.
# Learning rate has bad feasible space to show more early stopped Trials.
parameters=[
    V1beta1ParameterSpec(
        name="n_estimators",
        parameter_type="int",
        feasible_space=V1beta1FeasibleSpace(
            min="100",
            max="200"
        ),
    ),
    V1beta1ParameterSpec(
        name="min_samples_leaf",
        parameter_type="int",
        feasible_space=V1beta1FeasibleSpace(
            min="2",
            max="5"
        ),
    ),
    V1beta1ParameterSpec(
        name="min_samples_split",
        parameter_type="int",
        feasible_space=V1beta1FeasibleSpace(
            min="1",
            max="2"        ),
    ),
]


# JSON template specification for the Trial's Worker Kubernetes Job.
trial_spec={
    "apiVersion": "batch/v1",
    "kind": "Job",
    "spec": {
        "template": {
            "metadata": {
                "annotations": {
                     "sidecar.istio.io/inject": "false"
                }
            },
            "spec": {
                "containers": [
                    {
                        "name": "randomforest",
                        "image": "docker.io/mavencodevv/rfjobheart:v.0.1",
                        "command": [
                            "python",
                            "/rfjobheart.py",
                            "--n_estimators=${trialParameters.nEstimators}",
                            "--min_samples_leaf=${trialParameters.minsamplesleaf}",
                            "--min_samples_split=${trialParameters.minsamplessplit}"
                        ]
                    }
                ],
                "restartPolicy": "Never"
            }
        }
    }
}

# Configure parameters for the Trial template.
# We set the retain parameter to "True" to not clean-up the Trial Job's Kubernetes Pods.
trial_template=V1beta1TrialTemplate(
    retain=True,
    primary_container_name="randomforest",
    trial_parameters=[
        V1beta1TrialParameterSpec(
            name="nEstimators",
            description="The number of iterations for solvers to converge",
            reference="n_estimators"
        ),
        V1beta1TrialParameterSpec(
            name="minsamplesleaf",
            description="The minimum number of samples required to be at a leaf node",
            reference="min_samples_leaf"
        ),
        V1beta1TrialParameterSpec(
            name="minsamplessplit",
            description="The minimum number of samples required to split an internal node",
            reference="min_samples_split"
        ),
    ],
    trial_spec=trial_spec
)

In [14]:
experiment_spec2=V1beta1ExperimentSpec(
    max_trial_count=max_trial_count,
    max_failed_trial_count=max_failed_trial_count,
    parallel_trial_count=parallel_trial_count,
    objective=objective,
    algorithm=algorithm,
    metrics_collector_spec=collect,
    parameters=parameters,
    trial_template=trial_template
)

In [15]:
# knn katib

In [16]:
# Experiment name and namespace.
experiment_name3 = "knn-heart"
experiment_namespace = "sooter"

# Trial count specification.
max_trial_count = 3
max_failed_trial_count = 1
parallel_trial_count = 1

# Objective specification.
objective=V1beta1ObjectiveSpec(
    type="maximize",
    goal= 0.99,
    objective_metric_name="accuracy"

)

# Algorithm specification.
algorithm=V1beta1AlgorithmSpec(
    algorithm_name="bayesianoptimization",
)

collect = V1beta1MetricsCollectorSpec(
    collector = V1beta1CollectorSpec(
        kind = 'StdOut'
        )
)


# Experiment search space.
# In this example we tune learning rate, number of layer and optimizer.
# Learning rate has bad feasible space to show more early stopped Trials.
parameters=[
    V1beta1ParameterSpec(
        name="n_neighbors",
        parameter_type="int",
        feasible_space=V1beta1FeasibleSpace(
            min="5",
            max="10"
        ),
    ),
    V1beta1ParameterSpec(
        name="weights",
        parameter_type="categorical",
        feasible_space=V1beta1FeasibleSpace(
            list=[
                "uniform", 
                "distance"
            ]
        ),
    ),
    V1beta1ParameterSpec(
        name="algorithm",
        parameter_type="categorical",
        feasible_space=V1beta1FeasibleSpace(
            list=[
                "auto", 
                "ball_tree",
                "kd_tree",
                "brute"
            ]
        ),
    ),
]

# JSON template specification for the Trial's Worker Kubernetes Job.
trial_spec={
    "apiVersion": "batch/v1",
    "kind": "Job",
    "spec": {
        "template": {
            "metadata": {
                "annotations": {
                     "sidecar.istio.io/inject": "false"
                }
            },
            "spec": {
                "containers": [
                    {
                        "name": "knn",
                        "image": "docker.io/mavencodevv/knnjobheart:v.0.1",
                        "command": [
                            "python",
                            "/knnjobheart.py",
                            "--n_neighbors=${trialParameters.nneighbors}",
                            "--weights=${trialParameters.weights}",
                            "--algorithm=${trialParameters.algorithm}"
                        ]
                    }
                ],
                "restartPolicy": "Never"
            }
        }
    }
}

# Configure parameters for the Trial template.
# We set the retain parameter to "True" to not clean-up the Trial Job's Kubernetes Pods.
trial_template=V1beta1TrialTemplate(
    retain=True,
    primary_container_name="knn",
    trial_parameters=[
        V1beta1TrialParameterSpec(
            name="nneighbors",
            description="Number of neighbors to use by default for kneighbors queries",
            reference="n_neighbors"
        ),
        V1beta1TrialParameterSpec(
            name="weights",
            description="weight function used in prediction",
            reference="weights"
        ),
        V1beta1TrialParameterSpec(
            name="algorithm",
            description="Algorithm used to compute the nearest neighbors",
            reference="algorithm"
        ),
    ],
    trial_spec=trial_spec
)

In [17]:
experiment_spec3=V1beta1ExperimentSpec(
    max_trial_count=max_trial_count,
    max_failed_trial_count=max_failed_trial_count,
    parallel_trial_count=parallel_trial_count,
    objective=objective,
    algorithm=algorithm,
    metrics_collector_spec=collect,
    parameters=parameters,
    trial_template=trial_template
)

In [18]:
#catboost katib

In [19]:
# Experiment name and namespace.
experiment_name4 = "cat-heart"
experiment_namespace = "sooter"

# Trial count specification.
max_trial_count = 3
max_failed_trial_count = 1
parallel_trial_count = 1

# Objective specification.
objective=V1beta1ObjectiveSpec(
    type="maximize",
    goal= 0.99,
    objective_metric_name="accuracy"

)

# Algorithm specification.
algorithm=V1beta1AlgorithmSpec(
    algorithm_name="bayesianoptimization",
)

collect = V1beta1MetricsCollectorSpec(
    collector = V1beta1CollectorSpec(
        kind = 'StdOut'
        )
)


# Experiment search space.
# In this example we tune learning rate, number of layer and optimizer.
# Learning rate has bad feasible space to show more early stopped Trials.
parameters=[
    V1beta1ParameterSpec(
        name="iterations",
        parameter_type="int",
        feasible_space=V1beta1FeasibleSpace(
            min="100",
            max="500"
        ),
    ),
    V1beta1ParameterSpec(
        name="depth",
        parameter_type="int",
        feasible_space=V1beta1FeasibleSpace(
            min="6",
            max="10"

        ),
    ),
    V1beta1ParameterSpec(
        name="l2_leaf_reg",
        parameter_type="double",
        feasible_space=V1beta1FeasibleSpace(
            min="3.0",
            max="6.0"
        ),
    ),
]

# JSON template specification for the Trial's Worker Kubernetes Job.
trial_spec={
    "apiVersion": "batch/v1",
    "kind": "Job",
    "spec": {
        "template": {
            "metadata": {
                "annotations": {
                     "sidecar.istio.io/inject": "false"
                }
            },
            "spec": {
                "containers": [
                    {
                        "name": "catboost",
                        "image": "docker.io/mavencodevv/catjobheart:v.0.1",
                        "command": [
                            "python",
                            "/catjobheart.py",
                            "--iterations=${trialParameters.iterations}",
                            "--depth=${trialParameters.depth}",
                            "--l2_leaf_reg=${trialParameters.l2leafreg}"
                        ]
                    }
                ],
                "restartPolicy": "Never"
            }
        }
    }
}

# Configure parameters for the Trial template.
# We set the retain parameter to "True" to not clean-up the Trial Job's Kubernetes Pods.
trial_template=V1beta1TrialTemplate(
    retain=True,
    primary_container_name="catboost",
    trial_parameters=[
        V1beta1TrialParameterSpec(
            name="iterations",
            description="Max count of trees",
            reference="iterations"
        ),
        V1beta1TrialParameterSpec(
            name="depth",
            description="Depth of a tree",
            reference="depth"
        ),
        V1beta1TrialParameterSpec(
            name="l2leafreg",
            description="Coefficient at the L2 regularization term of the cost function",
            reference="l2_leaf_reg"
        ),
    ],
    trial_spec=trial_spec
)

In [20]:
experiment_spec4=V1beta1ExperimentSpec(
    max_trial_count=max_trial_count,
    max_failed_trial_count=max_failed_trial_count,
    parallel_trial_count=parallel_trial_count,
    objective=objective,
    algorithm=algorithm,
    metrics_collector_spec=collect,
    parameters=parameters,
    trial_template=trial_template
)

In [21]:
# svc katib

In [22]:
# Experiment name and namespace.
experiment_name5 = "svc-heart"
experiment_namespace = "sooter"

# Trial count specification.
max_trial_count = 3
max_failed_trial_count = 1
parallel_trial_count = 1

# Objective specification.
objective=V1beta1ObjectiveSpec(
    type="maximize",
    goal= 0.99,
    objective_metric_name="accuracy"

)

# Algorithm specification.
algorithm=V1beta1AlgorithmSpec(
    algorithm_name="bayesianoptimization",
)

collect = V1beta1MetricsCollectorSpec(
    collector = V1beta1CollectorSpec(
        kind = 'StdOut'
        )
)


# Experiment search space.
# In this example we tune learning rate, number of layer and optimizer.
# Learning rate has bad feasible space to show more early stopped Trials.
parameters=[
    V1beta1ParameterSpec(
        name="C",
        parameter_type="double",
        feasible_space=V1beta1FeasibleSpace(
            min="1.0",
            max="5.0"
        ),
    ),
    V1beta1ParameterSpec(
        name="kernel",
        parameter_type="categorical",
        feasible_space=V1beta1FeasibleSpace(
            list=[
                "rbf", 
                "linear",
                "poly",
                "sigmoid",
                "precomputed"
            ]
        ),
    ),
    V1beta1ParameterSpec(
        name="max_iter",
        parameter_type="int",
        feasible_space=V1beta1FeasibleSpace(
            min="1",
            max="10"
        ),
    ),
]

# JSON template specification for the Trial's Worker Kubernetes Job.
trial_spec={
    "apiVersion": "batch/v1",
    "kind": "Job",
    "spec": {
        "template": {
            "metadata": {
                "annotations": {
                     "sidecar.istio.io/inject": "false"
                }
            },
            "spec": {
                "containers": [
                    {
                        "name": "svc",
                        "image": "docker.io/mavencodevv/svcjobheart:v.0.1",
                        "command": [
                            "python",
                            "/svcjobheart.py",
                            "--C=${trialParameters.C}",
                            "--kernel=${trialParameters.kernel}",
                            "--max_iter=${trialParameters.maxiter}"
                        ]
                    }
                ],
                "restartPolicy": "Never"
            }
        }
    }
}

# Configure parameters for the Trial template.
# We set the retain parameter to "True" to not clean-up the Trial Job's Kubernetes Pods.
trial_template=V1beta1TrialTemplate(
    retain=True,
    primary_container_name="svc",
    trial_parameters=[
        V1beta1TrialParameterSpec(
            name="C",
            description="Regularization Parameter",
            reference="C"
        ),
        V1beta1TrialParameterSpec(
            name="kernel",
            description="Specifies the kernel type to be used in the algorithm",
            reference="kernel"
        ),
        V1beta1TrialParameterSpec(
            name="maxiter",
            description="Hard limit on iterations within solver, or -1 for no limit",
            reference="max_iter"
        ),
    ],
    trial_spec=trial_spec
)

In [23]:
experiment_spec5=V1beta1ExperimentSpec(
    max_trial_count=max_trial_count,
    max_failed_trial_count=max_failed_trial_count,
    parallel_trial_count=parallel_trial_count,
    objective=objective,
    algorithm=algorithm,
    metrics_collector_spec=collect,
    parameters=parameters,
    trial_template=trial_template
)

In [24]:
katib_experiment_launcher_op = comp.load_component_from_url(
    "https://raw.githubusercontent.com/kubeflow/pipelines/master/components/kubeflow/katib-launcher/component.yaml")

In [25]:
def load_data_op():
    return dsl.ContainerOp(
        name = 'Load Data',
        image = 'mavencodevv/load_heart:v.0.1',
        arguments = [],
        file_outputs={
            'data': '/load_data/data'
        }      
    )

In [26]:
def stat_op(data):
  return dsl.ContainerOp(
        name = 'Dataset Statistics',
        image = 'mavencodevv/stat_heart:v.0.18',
        arguments = ['--data', data],
        file_outputs={
            'stats': '/statgen/stats',
            'mlpipeline-ui-metadata': '/mlpipeline-ui-metadata.json'
        }
    )

In [27]:
def schema_op(stats):
  return dsl.ContainerOp(
        name = 'Dataset Schema',
        image = 'mavencodevv/schema_heart:v.0.2',
        arguments = ['--stats', stats],
        file_outputs={
            'schema': '/schema/schema',
            'mlpipeline-ui-metadata': '/mlpipeline-ui-metadata.json'
        }      
    )

In [28]:
def val_op(stats, schema):
  return dsl.ContainerOp(
        name = 'Dataset Validation',
        image = 'mavencodevv/valid_heart:v.0.3',
        arguments = ['--stats', stats, '--schema', schema],
        file_outputs={
            'mlpipeline-ui-metadata': '/mlpipeline-ui-metadata.json'}
    )

In [29]:
def preprocess_op(data):
    return dsl.ContainerOp(
        name = 'Preprocess Data',
        image = 'mavencodevv/preprocess_heart:v.0.1',
        arguments = ['--data', data],
        file_outputs={
            'clean_data': '/preprocess/clean_data'   
        }
    )

In [30]:
def rf_op(clean_data):
    return dsl.ContainerOp(
        name = 'Randomforest',
        image = 'mavencodevv/rf_heart:v.0.1',
        arguments = ['--clean_data', clean_data
            ],
        file_outputs={
            'rf_metrics': '/random/rf_metrics' 
        }
    )

In [31]:
def lr_op(clean_data):
    return dsl.ContainerOp(
        name = 'Logistic Regression',
        image = 'mavencodevv/logistic_heart:v.0.1',
        arguments = ['--clean_data', clean_data
            ],
        file_outputs={
            'lr_metrics': '/logistic/lr_metrics' 
        }
    )

In [32]:
def keras_op(clean_data):
    return dsl.ContainerOp(
        name = 'Keras Model',
        image = 'mavencodevv/km_heart:v.0.1',
        arguments = ['--clean_data', clean_data
            ],
        file_outputs={
            'keras_metrics': '/keras_model/keras_metrics' 
        }
    )

In [33]:
def cb_op(clean_data):
    return dsl.ContainerOp(
        name = 'CatBoost',
        image = 'mavencodevv/cb_heart:v.0.1',
        arguments = ['--clean_data', clean_data
            ],
        file_outputs={
            'cb_metrics': '/cb/cb_metrics' 
        }
    )

In [34]:
def knn_op(clean_data):
    return dsl.ContainerOp(
        name = 'KNN model',
        image = 'mavencodevv/knn_heart:v.0.1',
        arguments = ['--clean_data', clean_data
            ],
        file_outputs={
            'knn_metrics': '/knn/knn_metrics' 
        }
    )

In [35]:
def sv_op(clean_data):
    return dsl.ContainerOp(
        name = 'SVC model',
        image = 'mavencodevv/sv_heart:v.0.1',
        arguments = ['--clean_data', clean_data
            ],
        file_outputs={
            'sv_metrics': '/sv/sv_metrics' 
        }
    )

In [36]:
def eval_op(rf_metrics,keras_metrics,lr_metrics,sv_metrics, knn_metrics,cb_metrics):
    return dsl.ContainerOp(
        name = 'Model Evaluation',
        image = 'mavencodevv/eval_heart:v.0.3',
        arguments = ['--rf_metrics', rf_metrics,
                     '--keras_metrics', keras_metrics,
                     '--lr_metrics', lr_metrics,
                     '--sv_metrics', sv_metrics,
                     '--knn_metrics', knn_metrics,
                     '--cb_metrics', cb_metrics
                     ],
                   
        file_outputs={
            'best_model': '/eval/best_model' 
        }    
    )

In [37]:
def push_op(bucket_name,credentials,best_model):
  return dsl.ContainerOp(
      name = 'Export Model to Cloud Storage',
      image = 'mavencodevv/push_heart',
      arguments = ['--bucket_name', bucket_name,
                   '--credentials', credentials,
                   '--best_model',best_model
                   ]
  )

In [39]:
@dsl.pipeline(
    name='Heart Attack Prediction',
   description='An ML reusable pipeline that predicts the chances of a patient having heart attack'
)

# Define parameters to be fed into pipeline
def heart():#bucket_name, credentials): 

        # Katib launcher component.
    # Experiment Spec should be serialized to a valid Kubernetes object.
    op = katib_experiment_launcher_op(
        experiment_name=experiment_namek,
        experiment_namespace=experiment_namespace,
        experiment_spec=ApiClient().sanitize_for_serialization(experiment_spec),
        experiment_timeout_minutes=60,
        delete_finished_experiment=False)

    # Output container to print the results.
    op_out = dsl.ContainerOp(
        name="best-hp",
        image="library/bash:4.4.23",
        command=["sh", "-c"],
        arguments=["echo Best HyperParameters: %s" % op.output],
    )

    op1 = katib_experiment_launcher_op(
        experiment_name=experiment_name1,
        experiment_namespace=experiment_namespace,
        experiment_spec=ApiClient().sanitize_for_serialization(experiment_spec1),
        experiment_timeout_minutes=60,
        delete_finished_experiment=False)

    # Output container to print the results.
    op_out1 = dsl.ContainerOp(
        name="best-hp",
        image="library/bash:4.4.23",
        command=["sh", "-c"],
        arguments=["echo Best HyperParameters: %s" % op1.output],
    )

    op2 = katib_experiment_launcher_op(
        experiment_name=experiment_name2,
        experiment_namespace=experiment_namespace,
        experiment_spec=ApiClient().sanitize_for_serialization(experiment_spec2),
        experiment_timeout_minutes=60,
        delete_finished_experiment=False)

    # Output container to print the results.
    op_out2 = dsl.ContainerOp(
        name="best-hp",
        image="library/bash:4.4.23",
        command=["sh", "-c"],
        arguments=["echo Best HyperParameters: %s" % op2.output],
    )

    op3 = katib_experiment_launcher_op(
        experiment_name=experiment_name3,
        experiment_namespace=experiment_namespace,
        experiment_spec=ApiClient().sanitize_for_serialization(experiment_spec3),
        experiment_timeout_minutes=60,
        delete_finished_experiment=False)

    # Output container to print the results.
    op_out3 = dsl.ContainerOp(
        name="best-hp",
        image="library/bash:4.4.23",
        command=["sh", "-c"],
        arguments=["echo Best HyperParameters: %s" % op3.output],
    )

    op4 = katib_experiment_launcher_op(
        experiment_name=experiment_name4,
        experiment_namespace=experiment_namespace,
        experiment_spec=ApiClient().sanitize_for_serialization(experiment_spec4),
        experiment_timeout_minutes=60,
        delete_finished_experiment=False)

    # Output container to print the results.
    op_out4 = dsl.ContainerOp(
        name="best-hp",
        image="library/bash:4.4.23",
        command=["sh", "-c"],
        arguments=["echo Best HyperParameters: %s" % op4.output],
    )

    op5 = katib_experiment_launcher_op(
        experiment_name=experiment_name5,
        experiment_namespace=experiment_namespace,
        experiment_spec=ApiClient().sanitize_for_serialization(experiment_spec5),
        experiment_timeout_minutes=60,
        delete_finished_experiment=False)

    # Output container to print the results.
    op_out5 = dsl.ContainerOp(
        name="best-hp",
        image="library/bash:4.4.23",
        command=["sh", "-c"],
        arguments=["echo Best HyperParameters: %s" % op5.output],
    )

    
'''    _load_data_op = load_data_op()

    _stat_op  = stat_op(
        dsl.InputArgumentPath(_load_data_op.outputs['data'])).after(_load_data_op)

    _schema_op = schema_op(
      dsl.InputArgumentPath(_stat_op.outputs['stats'])).after(_stat_op)


    _val_op = val_op(
      dsl.InputArgumentPath(_stat_op.outputs['stats']),
      dsl.InputArgumentPath(_schema_op.outputs['schema'])).after(_stat_op,_schema_op)

    _preprocess_op = preprocess_op(
        dsl.InputArgumentPath(_load_data_op.outputs['data'])).after(_load_data_op,_val_op)

    _rf_op = rf_op(
        dsl.InputArgumentPath(_preprocess_op.outputs['clean_data'])).after(_preprocess_op)

    _keras_op = keras_op(
        dsl.InputArgumentPath(_preprocess_op.outputs['clean_data'])).after(_preprocess_op)
    _lr_op = lr_op(
        dsl.InputArgumentPath(_preprocess_op.outputs['clean_data'])).after(_preprocess_op)
    _cb_op = cb_op(
        dsl.InputArgumentPath(_preprocess_op.outputs['clean_data'])).after(_preprocess_op)
    _knn_op = knn_op(
        dsl.InputArgumentPath(_preprocess_op.outputs['clean_data'])).after(_preprocess_op)

    _sv_op = sv_op(
        dsl.InputArgumentPath(_preprocess_op.outputs['clean_data'])).after(_preprocess_op)

    _eval_op = eval_op(
        dsl.InputArgumentPath(_rf_op.outputs['rf_metrics']),
        dsl.InputArgumentPath(_keras_op.outputs['keras_metrics']),
        dsl.InputArgumentPath(_lr_op.outputs['lr_metrics']),
        dsl.InputArgumentPath(_cb_op.outputs['cb_metrics']),
        dsl.InputArgumentPath(_knn_op.outputs['knn_metrics']),
        dsl.InputArgumentPath(_sv_op.outputs['sv_metrics'])).after(_rf_op,_keras_op,_lr_op,_cb_op,_knn_op,_sv_op)

    _push_op = push_op(bucket_name, credentials,
        dsl.InputArgumentPath(_eval_op.outputs['best_model'])).after(_eval_op)   
'''

"    _load_data_op = load_data_op()\n\n    _stat_op  = stat_op(\n        dsl.InputArgumentPath(_load_data_op.outputs['data'])).after(_load_data_op)\n\n    _schema_op = schema_op(\n      dsl.InputArgumentPath(_stat_op.outputs['stats'])).after(_stat_op)\n\n\n    _val_op = val_op(\n      dsl.InputArgumentPath(_stat_op.outputs['stats']),\n      dsl.InputArgumentPath(_schema_op.outputs['schema'])).after(_stat_op,_schema_op)\n\n    _preprocess_op = preprocess_op(\n        dsl.InputArgumentPath(_load_data_op.outputs['data'])).after(_load_data_op,_val_op)\n\n    _rf_op = rf_op(\n        dsl.InputArgumentPath(_preprocess_op.outputs['clean_data'])).after(_preprocess_op)\n\n    _keras_op = keras_op(\n        dsl.InputArgumentPath(_preprocess_op.outputs['clean_data'])).after(_preprocess_op)\n    _lr_op = lr_op(\n        dsl.InputArgumentPath(_preprocess_op.outputs['clean_data'])).after(_preprocess_op)\n    _cb_op = cb_op(\n        dsl.InputArgumentPath(_preprocess_op.outputs['clean_data'])).after(

In [40]:
# Compile pipeline to generate compressed YAML definition of the pipeline.
experiment_name = 'heart'

kfp.compiler.Compiler().compile(heart, '{}.yaml'.format(experiment_name))



In [73]:
client = kfp.Client()
#client.create_run_from_pipeline_func(heart_pipeline, arguments={})

ERROR:root:Failed to get healthz info attempt 1 of 5.
Traceback (most recent call last):
  File "/home/jovyan/.local/lib/python3.6/site-packages/kfp/_client.py", line 312, in get_kfp_healthz
    response = self._healthz_api.get_healthz()
  File "/home/jovyan/.local/lib/python3.6/site-packages/kfp_server_api/api/healthz_service_api.py", line 63, in get_healthz
    return self.get_healthz_with_http_info(**kwargs)  # noqa: E501
  File "/home/jovyan/.local/lib/python3.6/site-packages/kfp_server_api/api/healthz_service_api.py", line 148, in get_healthz_with_http_info
    collection_formats=collection_formats)
  File "/home/jovyan/.local/lib/python3.6/site-packages/kfp_server_api/api_client.py", line 369, in call_api
    _preload_content, _request_timeout, _host)
  File "/home/jovyan/.local/lib/python3.6/site-packages/kfp_server_api/api_client.py", line 188, in __call_api
    raise e
  File "/home/jovyan/.local/lib/python3.6/site-packages/kfp_server_api/api_client.py", line 185, in __call_ap

TimeoutError: Failed getting healthz endpoint after 5 attempts.