In [None]:
with open("requirements.txt", "w") as f:
    f.write("kfp==1.8.9\n")
    f.write("kubeflow-katib==0.14.0\n")
    
!pip install -r requirements.txt  --upgrade --user

In [1]:
## import required pkgs

import kfp
import kfp.dsl as dsl
from kfp import components

from kubeflow.katib import ApiClient
from kubeflow.katib import V1beta1ExperimentSpec
from kubeflow.katib import V1beta1AlgorithmSpec
from kubeflow.katib import V1beta1EarlyStoppingSpec
from kubeflow.katib import V1beta1EarlyStoppingSetting
from kubeflow.katib import V1beta1ObjectiveSpec
from kubeflow.katib import V1beta1ParameterSpec
from kubeflow.katib import V1beta1FeasibleSpace
from kubeflow.katib import V1beta1TrialTemplate
from kubeflow.katib import V1beta1TrialParameterSpec

In [3]:
## define katib objective, stopping criteria, and parameter spaces

experiment_name = "median-stop"
experiment_namespace = "kubeflow-user-example-com"

# Trial count specification.
max_trial_count = 4
max_failed_trial_count = 2
parallel_trial_count = 1

# Objective specification.
objective=V1beta1ObjectiveSpec(
    type="maximize",
    goal= 0.99,
    objective_metric_name="Validation-accuracy",
    additional_metric_names=[
        "Train-accuracy"
    ]
)

# Algorithm specification.
algorithm=V1beta1AlgorithmSpec(
    algorithm_name="random",
)

# Early Stopping specification.
early_stopping=V1beta1EarlyStoppingSpec(
    algorithm_name="medianstop",
    algorithm_settings=[
        V1beta1EarlyStoppingSetting(
            name="min_trials_required",
            value="2"
        )
    ]
)


# Experiment search space.
# In this example we tune learning rate, number of layer and optimizer.
# Learning rate has bad feasible space to show more early stopped Trials.
parameters=[
    V1beta1ParameterSpec(
        name="lr",
        parameter_type="double",
        feasible_space=V1beta1FeasibleSpace(
            min="0.01",
            max="0.3"
        ),
    ),
    V1beta1ParameterSpec(
        name="num-layers",
        parameter_type="int",
        feasible_space=V1beta1FeasibleSpace(
            min="2",
            max="5"
        ),
    ),
    V1beta1ParameterSpec(
        name="optimizer",
        parameter_type="categorical",
        feasible_space=V1beta1FeasibleSpace(
            list=[
                "sgd", 
                "adam",
                "ftrl"
            ]
        ),
    ),
]

In [4]:
## define trial templates

trial_spec={
    "apiVersion": "batch/v1",
    "kind": "Job",
    "spec": {
        "template": {
            "metadata": {
                "annotations": {
                     "sidecar.istio.io/inject": "false"
                }
            },
            "spec": {
                "containers": [
                    {
                        "name": "training-container",
                        "image": "docker.io/kubeflowkatib/mxnet-mnist:v1beta1-45c5727",
                        "command": [
                            "python3",
                            "/opt/mxnet-mnist/mnist.py",
                            "--batch-size=64",
                            "--lr=${trialParameters.learningRate}",
                            "--num-layers=${trialParameters.numberLayers}",
                            "--optimizer=${trialParameters.optimizer}"
                        ]
                    }
                ],
                "restartPolicy": "Never"
            }
        }
    }
}

# Configure parameters for the Trial template.
# We set the retain parameter to "True" to not clean-up the Trial Job's Kubernetes Pods.
trial_template=V1beta1TrialTemplate(
    retain=True,
    primary_container_name="training-container",
    trial_parameters=[
        V1beta1TrialParameterSpec(
            name="learningRate",
            description="Learning rate for the training model",
            reference="lr"
        ),
        V1beta1TrialParameterSpec(
            name="numberLayers",
            description="Number of training model layers",
            reference="num-layers"
        ),
        V1beta1TrialParameterSpec(
            name="optimizer",
            description="Training model optimizer (sdg, adam or ftrl)",
            reference="optimizer"
        ),
    ],
    trial_spec=trial_spec
)

In [5]:
## create experiment resources

experiment_spec=V1beta1ExperimentSpec(
    max_trial_count=max_trial_count,
    max_failed_trial_count=max_failed_trial_count,
    parallel_trial_count=parallel_trial_count,
    objective=objective,
    algorithm=algorithm,
    early_stopping=early_stopping,
    parameters=parameters,
    trial_template=trial_template
)

In [6]:
## Create pipeline components

katib_experiment_launcher_op = components.load_component_from_url(
    "https://raw.githubusercontent.com/kubeflow/pipelines/master/components/kubeflow/katib-launcher/component.yaml")

@dsl.pipeline(
    name="Launch Katib early stopping Experiment",
    description="An example to launch Katib Experiment with early stopping"
)

def median_stop():
    
    # Katib launcher component.
    # Experiment Spec should be serialized to a valid Kubernetes object.
    op = katib_experiment_launcher_op(
        experiment_name=experiment_name,
        experiment_namespace=experiment_namespace,
        experiment_spec=ApiClient().sanitize_for_serialization(experiment_spec),
        experiment_timeout_minutes=60,
        delete_finished_experiment=False)
    
    op.set_cpu_request("1").set_cpu_limit("1")
    # Output container to print the results.
    op_out = dsl.ContainerOp(
        name="best-hp",
        image="library/bash:4.4.23",
        command=["sh", "-c"],
        arguments=["echo Best HyperParameters: %s" % op.output],
    )
    op_out.set_cpu_request("1").set_cpu_limit("1")

In [7]:
kfp.compiler.Compiler().compile(median_stop, 'helloworld-5.zip')