In [1]:
#!python -m pip install --user --upgrade pip

In [2]:
#!pip install kubeflow-katib==0.10.1

In [3]:
#!pip3 install kfp --upgrade --user

In [4]:
import kfp
from kfp import dsl
import kfp.components as comp

In [5]:
from kubeflow.katib import ApiClient
from kubeflow.katib import V1beta1ExperimentSpec
from kubeflow.katib import V1beta1AlgorithmSpec
from kubeflow.katib import V1beta1ObjectiveSpec
from kubeflow.katib import V1beta1ParameterSpec
from kubeflow.katib import V1beta1FeasibleSpace
from kubeflow.katib import V1beta1TrialTemplate
from kubeflow.katib import V1beta1TrialParameterSpec
from kubeflow.katib import V1beta1MetricsCollectorSpec
from kubeflow.katib import V1beta1CollectorSpec

In [6]:
# Experiment name and namespace.
experiment_namek = "airline1"
experiment_namespace = "sooter"

# Trial count specification.
max_trial_count = 30
max_failed_trial_count = 3
parallel_trial_count = 3

# Objective specification.
objective=V1beta1ObjectiveSpec(
    type="maximize",
    goal= 0.99,
    objective_metric_name="accuracy"

)

# Algorithm specification.
algorithm=V1beta1AlgorithmSpec(
    algorithm_name= "bayesianoptimization"#"random",
)

collect = V1beta1MetricsCollectorSpec(
    collector = V1beta1CollectorSpec(
        kind = 'StdOut'
        )
)



# Experiment search space.
# In this example we tune learning rate, number of layer and optimizer.
# Learning rate has bad feasible space to show more early stopped Trials.
parameters=[
    V1beta1ParameterSpec(
        name="learning_rate",
        parameter_type="double",
        feasible_space=V1beta1FeasibleSpace(
            min="0.001",
            max="0.01"
        ),
    ),
    V1beta1ParameterSpec(
        name="batch_size",
        parameter_type="int",
        feasible_space=V1beta1FeasibleSpace(
            min="80",
            max="200"
        ),
    ),
    V1beta1ParameterSpec(
        name="optimizer",
        parameter_type="categorical",
        feasible_space=V1beta1FeasibleSpace(
            list=[
                "adam", 
                "sgd"                
            ]
        ),
    ),
]


# JSON template specification for the Trial's Worker Kubernetes Job.
trial_spec={
    "apiVersion": "batch/v1",
    "kind": "Job",
    "spec": {
        "template": {
            "metadata": {
                "annotations": {
                     "sidecar.istio.io/inject": "false"
                }
            },
            "spec": {
                "containers": [
                    {
                        "name": "tensorflow",
                        "image": "docker.io/mavencodevv/tfjob_airline:v.0.3",
                        "command": [
                            "python",
                            "/tfjobairline.py",
                            "--batch_size=${trialParameters.batchSize}",
                            "--learning_rate=${trialParameters.learningRate}",
                            "--optimizer=${trialParameters.optimizer}"
                        ]
                    }
                ],
                "restartPolicy": "Never"
            }
        }
    }
}

# Configure parameters for the Trial template.
# We set the retain parameter to "True" to not clean-up the Trial Job's Kubernetes Pods.
trial_template=V1beta1TrialTemplate(
    retain=True,
    primary_container_name="tensorflow",
    trial_parameters=[
        V1beta1TrialParameterSpec(
            name="learningRate",
            description="Learning rate for the training model",
            reference="learning_rate"
        ),
        V1beta1TrialParameterSpec(
            name="batchSize",
            description="Batch Size",
            reference="batch_size"
        ),
        V1beta1TrialParameterSpec(
            name="optimizer",
            description="Training model optimizer (sdg, adam)",
            reference="optimizer"
        ),
    ],
    trial_spec=trial_spec
)

In [7]:
experiment_spec=V1beta1ExperimentSpec(
    max_trial_count=max_trial_count,
    max_failed_trial_count=max_failed_trial_count,
    parallel_trial_count=parallel_trial_count,
    objective=objective,
    algorithm=algorithm,
    metrics_collector_spec=collect,
    parameters=parameters,
    trial_template=trial_template
)

In [8]:
katib_experiment_launcher_op = comp.load_component_from_url(
    "https://raw.githubusercontent.com/kubeflow/pipelines/master/components/kubeflow/katib-launcher/component.yaml")

In [9]:
@dsl.pipeline(
    name='Airline Custome Satisfaction Prediction',
   description='An ML reusable pipeline that predicts airline customer satisfaction'
)

# Define parameters to be fed into pipeline
def airlinee():#bucket_name, credentials): 

        # Katib launcher component.
    # Experiment Spec should be serialized to a valid Kubernetes object.
    op = katib_experiment_launcher_op(
        experiment_name=experiment_namek,
        experiment_namespace=experiment_namespace,
        experiment_spec=ApiClient().sanitize_for_serialization(experiment_spec),
        experiment_timeout_minutes=60,
        delete_finished_experiment=False)

    # Output container to print the results.
    op_out = dsl.ContainerOp(
        name="best-hp",
        image="library/bash:4.4.23",
        command=["sh", "-c"],
        arguments=["echo Best HyperParameters: %s" % op.output],
    )

In [11]:
# Compile pipeline to generate compressed YAML definition of the pipeline.
experiment_name = 'airlinee'

kfp.compiler.Compiler().compile(airlinee, '{}.yaml'.format(experiment_name))

