In [1]:
!pip install kfp==1.8.18
!pip install kubeflow-katib==0.14.0

import kfp
import kfp.dsl as dsl
from kfp import components

from kubeflow.katib import KatibClient
from kubeflow.katib import ApiClient
from kubeflow.katib import V1beta1ExperimentSpec
from kubeflow.katib import V1beta1AlgorithmSpec
from kubeflow.katib import V1beta1ObjectiveSpec
from kubeflow.katib import V1beta1ParameterSpec
from kubeflow.katib import V1beta1FeasibleSpace
from kubeflow.katib import V1beta1TrialTemplate
from kubeflow.katib import V1beta1TrialParameterSpec
from kubeflow.katib import V1beta1Experiment
from kubernetes.client import V1ObjectMeta

Collecting kubeflow-katib==0.14.0
  Using cached kubeflow_katib-0.14.0-py3-none-any.whl (89 kB)
Installing collected packages: kubeflow-katib
  Attempting uninstall: kubeflow-katib
    Found existing installation: kubeflow-katib 0.13.0
    Uninstalling kubeflow-katib-0.13.0:
      Successfully uninstalled kubeflow-katib-0.13.0
Successfully installed kubeflow-katib-0.14.0


In [8]:
max_trial_count = 5
max_failed_trial_count = 3
parallel_trial_count = 2
namespace = "lazarlemic2"
experiment_name = "lemicmnisttf7"

# Objective specification.
objective = V1beta1ObjectiveSpec(
    type="minimize",
    goal=0.001,
    objective_metric_name="loss"
)

# Algorithm specification.
algorithm = V1beta1AlgorithmSpec(
    algorithm_name="random",
)

metadata = V1ObjectMeta(
    name=experiment_name,
    namespace=namespace
)

# Experiment search space.
# In this example we tune learning rate and batch size.
parameters = [
    V1beta1ParameterSpec(
        name="learning_rate",
        parameter_type="double",
        feasible_space=V1beta1FeasibleSpace(
            min="0.01",
            max="0.05"
        ),
    )
]

# Experiment Trial template.
# TODO (andreyvelich): Use community image for the mnist example.
trial_spec = {
    "apiVersion": "kubeflow.org/v1",
    "kind": "TFJob",
    "spec": {
        "tfReplicaSpecs": {
            "Chief": {
                "replicas": 1,
                "restartPolicy": "OnFailure",
                "template": {
                    "metadata": {
                        "annotations": {
                            "sidecar.istio.io/inject": "false"
                        }
                    },
                    "spec": {
                        "containers": [
                            {
                                "name": "tensorflow",
                                "image": "ghcr.io/flexigrobots-h2020/simple-katib-tensorflow:latest",
                                "command": [
                                    "python",
                                    "mnist.py",
                                    "--epochs=1",
                                    "--learning-rate=${trialParameters.learningRate}"
                                ]
                            }
                        ]
                    }
                }
            },
            "Worker": {
                "replicas": 1,
                "restartPolicy": "OnFailure",
                "template": {
                    "metadata": {
                        "annotations": {
                            "sidecar.istio.io/inject": "false"
                        }
                    },
                    "spec": {
                        "containers": [
                            {
                                "name": "tensorflow",
                                "image": "ghcr.io/flexigrobots-h2020/simple-katib-tensorflow:latest",
                                "command": [
                                    "python",
                                    "mnist.py",
                                    "--epochs=1",
                                    "--learning-rate=${trialParameters.learningRate}"
                                ]
                            }
                        ]
                    }
                }
            }
        }
    }
}

# Configure parameters for the Trial template.
trial_template = V1beta1TrialTemplate(
    primary_container_name="tensorflow",
    trial_parameters=[
        V1beta1TrialParameterSpec(
            name="learningRate",
            description="Learning rate for the training model",
            reference="learning_rate"
        ),
    ],
    trial_spec=trial_spec
)

# Create an Experiment from the above parameters.
experiment_spec = V1beta1ExperimentSpec(
    max_trial_count=max_trial_count,
    max_failed_trial_count=max_failed_trial_count,
    parallel_trial_count=parallel_trial_count,
    objective=objective,
    algorithm=algorithm,
    parameters=parameters,
    trial_template=trial_template
)

experiment = V1beta1Experiment(
    api_version="kubeflow.org/v1beta1",
    kind="Experiment",
    metadata=metadata,
    spec=experiment_spec
)

In [9]:
# print(experiment)

In [10]:
print(experiment.metadata.name)
print(experiment.spec.algorithm.algorithm_name)

lemicmnisttf7
random


In [11]:
# Create client.
kclient = KatibClient()

# Create your Experiment.
kclient.create_experiment(experiment,namespace="lazarlemic2")

{'apiVersion': 'kubeflow.org/v1beta1',
 'kind': 'Experiment',
 'metadata': {'creationTimestamp': '2023-02-07T12:02:40Z',
  'generation': 1,
  'managedFields': [{'apiVersion': 'kubeflow.org/v1beta1',
    'fieldsType': 'FieldsV1',
    'fieldsV1': {'f:spec': {'.': {},
      'f:algorithm': {'.': {}, 'f:algorithmName': {}},
      'f:maxFailedTrialCount': {},
      'f:maxTrialCount': {},
      'f:objective': {'.': {},
       'f:goal': {},
       'f:objectiveMetricName': {},
       'f:type': {}},
      'f:parallelTrialCount': {},
      'f:parameters': {},
      'f:trialTemplate': {'.': {},
       'f:primaryContainerName': {},
       'f:trialParameters': {},
       'f:trialSpec': {'.': {},
        'f:apiVersion': {},
        'f:kind': {},
        'f:spec': {'.': {},
         'f:tfReplicaSpecs': {'.': {},
          'f:Chief': {'.': {},
           'f:replicas': {},
           'f:restartPolicy': {},
           'f:template': {'.': {},
            'f:metadata': {'.': {},
             'f:annotations

In [12]:
kclient.get_experiment_status(name=experiment_name, namespace=namespace)


'Created'

In [7]:
# kclient.list_trials(name="mnist28uspeh", namespace=namespace)