In [None]:
!pip install kfp==1.8.18
!pip install kubeflow-katib==0.14.0

In [73]:
import kfp
import kfp.dsl as dsl
from kfp import components
from kubeflow.katib import KatibClient
from kubeflow.katib import ApiClient
from kubeflow.katib import V1beta1ExperimentSpec
from kubeflow.katib import V1beta1AlgorithmSpec
from kubeflow.katib import V1beta1ObjectiveSpec
from kubeflow.katib import V1beta1ParameterSpec
from kubeflow.katib import V1beta1FeasibleSpace
from kubeflow.katib import V1beta1TrialTemplate
from kubeflow.katib import V1beta1TrialParameterSpec
from kubeflow.katib import V1beta1Experiment
from kubernetes.client import V1ObjectMeta
from kubeflow.katib import V1beta1MetricsCollectorSpec
from kubeflow.katib import V1beta1SourceSpec
from kubeflow.katib import V1beta1FileSystemPath
from kubeflow.katib import V1beta1CollectorSpec
from kubeflow.katib import V1beta1FilterSpec

In [74]:
max_trial_count = 5
max_failed_trial_count = 3
parallel_trial_count = 2
namespace = "lazarlemic2"
experiment_name = "katib-simple-test"

# PYTORCH MNIST
# docker_img = "ghcr.io/flexigrobots-h2020/torchtesting2:latest"

#VERY SIMPLE IMAGE WITH HARDCODED OUTPUT
docker_img = "ghcr.io/flexigrobots-h2020/simple-katib-example:latest"

# Objective specification.
objective = V1beta1ObjectiveSpec(
    type="maximize",
    goal=0.99,
    objective_metric_name="accuracy"
)

metrics = V1beta1MetricsCollectorSpec(
    source=V1beta1SourceSpec(
        filter=V1beta1FilterSpec(
            metrics_format=["{metricName: ([\\w|-]+), metricValue: ((-?\\d+)(\\.\\d+)?)}"]
        ),
        file_system_path=V1beta1FileSystemPath(
            path="/katib/mnist.log",
            kind="File"
        )
    ),
    collector=V1beta1CollectorSpec(
        kind="File"
    )
)

# Algorithm specification.
algorithm = V1beta1AlgorithmSpec(
    algorithm_name="random",
)

metadata = V1ObjectMeta(
    name=experiment_name,
    namespace=namespace
)

# Experiment search space.
# In this example we tune learning rate and batch size.
parameters = [
    V1beta1ParameterSpec(
        name="lr",
        parameter_type="double",
        feasible_space=V1beta1FeasibleSpace(
            min="0.01",
            max="0.05"
        ),
    )
]

# Experiment Trial template.
# TODO (andreyvelich): Use community image for the mnist example.
trial_spec = {
    "apiVersion": "batch/v1",
    "kind": "Job",
    "spec": {
        "template": {
            "metadata": {
                "annotations": {
                    "sidecar.istio.io/inject": "false"
                }
            },
            "spec": { 
                "containers": [
                            {
                    "name": "training-container",
                    "image": docker_img,
                    "command": [
                        "python3",
                         "simple.py",
                        "--epochs=3",
                        "--lr=${trialParameters.learningRate}",
                        "--log-path=/katib/mnist.log",
                                ]
                     }
                        ],
                "restartPolicy": "Never"
            }
        }
    }
}

# Configure parameters for the Trial template.
trial_template = V1beta1TrialTemplate(
    primary_container_name="training-container",
    trial_parameters=[
        V1beta1TrialParameterSpec(
            name="learningRate",
            description="Learning rate for the training model",
            reference="lr"
        )
    ],
    trial_spec=trial_spec
)

# Create an Experiment from the above parameters.
experiment_spec = V1beta1ExperimentSpec(
    max_trial_count=max_trial_count,
    max_failed_trial_count=max_failed_trial_count,
    parallel_trial_count=parallel_trial_count,
    objective=objective,
    algorithm=algorithm,
    parameters=parameters,
    trial_template=trial_template,
    metrics_collector_spec=metrics
)

experiment = V1beta1Experiment(
    api_version="kubeflow.org/v1beta1",
    kind="Experiment",
    metadata=metadata,
    spec=experiment_spec
)

In [75]:
print(experiment.metadata.name)
print(experiment.spec.algorithm.algorithm_name)

simple-libraries-test3
random


In [76]:
# Create client.
kclient = KatibClient()

# Create your Experiment.
kclient.create_experiment(experiment,namespace="lazarlemic2")

{'apiVersion': 'kubeflow.org/v1beta1',
 'kind': 'Experiment',
 'metadata': {'creationTimestamp': '2023-02-07T13:02:22Z',
  'generation': 1,
  'managedFields': [{'apiVersion': 'kubeflow.org/v1beta1',
    'fieldsType': 'FieldsV1',
    'fieldsV1': {'f:spec': {'.': {},
      'f:algorithm': {'.': {}, 'f:algorithmName': {}},
      'f:maxFailedTrialCount': {},
      'f:maxTrialCount': {},
      'f:metricsCollectorSpec': {'.': {},
       'f:collector': {'.': {}, 'f:kind': {}},
       'f:source': {'.': {},
        'f:fileSystemPath': {'.': {}, 'f:kind': {}, 'f:path': {}},
        'f:filter': {'.': {}, 'f:metricsFormat': {}}}},
      'f:objective': {'.': {},
       'f:goal': {},
       'f:objectiveMetricName': {},
       'f:type': {}},
      'f:parallelTrialCount': {},
      'f:parameters': {},
      'f:trialTemplate': {'.': {},
       'f:primaryContainerName': {},
       'f:trialParameters': {},
       'f:trialSpec': {'.': {},
        'f:apiVersion': {},
        'f:kind': {},
        'f:spec': 

In [69]:
# print(kclient.get_optimal_hyperparameters("torch-notebook-moj-kod")["currentOptimalTrial"]["parameterAssignments"])
# optimal_hyperparams = kclient.get_optimal_hyperparameters("torch-notebook-moj-kod")["currentOptimalTrial"]["parameterAssignments"]
# for param in optimal_hyperparams:
#     print(param["name"])
#     print(param["value"])

In [None]:
# optimal_hyperparams = kclient.get_experiment("torch-notebook-moj-kod")
# list_trials = kclient.get_success_trial_details("torch-notebook-moj-kod", "lazarlemic2")
# for trial in list_trials:
#     print(trial['hyperparameters'])
# # import pprint
# # pprint.pprint(optimal_hyperparams)
# # print(optimal_hyperparams["metadata"]["name"], optimal_hyperparams["metadata"]["namespace"])