## Change Directory to Main Katib folder

In [1]:
cd /root/katib/sdk/python/

/root/katib/sdk/python


## Install Katib SDK

In [2]:
! python setup.py install

## Import Required Libraries

In [3]:
from kubernetes.client import V1PodTemplateSpec
from kubernetes.client import V1ObjectMeta
from kubernetes.client import V1PodSpec
from kubernetes.client import V1Container

In [4]:
import kubeflow.katib as kc
from kubeflow.katib import constants
from kubeflow.katib import utils
from kubeflow.katib import V1alpha3AlgorithmSetting
from kubeflow.katib import V1alpha3AlgorithmSetting
from kubeflow.katib import V1alpha3AlgorithmSpec
from kubeflow.katib import V1alpha3CollectorSpec
from kubeflow.katib import V1alpha3EarlyStoppingSetting
from kubeflow.katib import V1alpha3EarlyStoppingSpec
from kubeflow.katib import V1alpha3Experiment
from kubeflow.katib import V1alpha3ExperimentCondition
from kubeflow.katib import V1alpha3ExperimentList
from kubeflow.katib import V1alpha3ExperimentSpec
from kubeflow.katib import V1alpha3ExperimentStatus
from kubeflow.katib import V1alpha3FeasibleSpace
from kubeflow.katib import V1alpha3FileSystemPath
from kubeflow.katib import V1alpha3FilterSpec
from kubeflow.katib import V1alpha3GoTemplate
from kubeflow.katib import V1alpha3GraphConfig
from kubeflow.katib import V1alpha3Metric
from kubeflow.katib import V1alpha3MetricsCollectorSpec
from kubeflow.katib import V1alpha3NasConfig
from kubeflow.katib import V1alpha3ObjectiveSpec
from kubeflow.katib import V1alpha3Observation
from kubeflow.katib import V1alpha3Operation
from kubeflow.katib import V1alpha3OptimalTrial
from kubeflow.katib import V1alpha3ParameterAssignment
from kubeflow.katib import V1alpha3ParameterSpec
from kubeflow.katib import V1alpha3SourceSpec
from kubeflow.katib import V1alpha3Suggestion
from kubeflow.katib import V1alpha3SuggestionCondition
from kubeflow.katib import V1alpha3SuggestionList
from kubeflow.katib import V1alpha3SuggestionSpec
from kubeflow.katib import V1alpha3SuggestionStatus
from kubeflow.katib import V1alpha3TemplateSpec
from kubeflow.katib import V1alpha3Trial
from kubeflow.katib import V1alpha3TrialAssignment
from kubeflow.katib import V1alpha3TrialCondition
from kubeflow.katib import V1alpha3TrialList
from kubeflow.katib import V1alpha3TrialSpec
from kubeflow.katib import V1alpha3TrialStatus
from kubeflow.katib import V1alpha3TrialTemplate

In [5]:
from kubeflow.tfjob import constants
from kubeflow.tfjob import utils
from kubeflow.tfjob import V1ReplicaSpec
from kubeflow.tfjob import V1TFJob
from kubeflow.tfjob import V1TFJobSpec
from kubeflow.tfjob import TFJobClient

## Define Experiment related Specs

In [11]:
algorithmsettings = V1alpha3AlgorithmSetting(
    name= None,
    value = None
    )
algorithm = V1alpha3AlgorithmSpec(
    algorithm_name = "random",
    algorithm_settings = [algorithmsettings]
  )

# Metric Collector
collector = V1alpha3CollectorSpec(kind = "TensorFlowEvent")
FileSystemPath = V1alpha3FileSystemPath(kind = "/train" , path = "Directory")
metrics_collector_spec = V1alpha3MetricsCollectorSpec(
    collector = collector,
    source = FileSystemPath)

# Objective
objective = V1alpha3ObjectiveSpec(
    goal = 0.9999,
    objective_metric_name = "accuracy_1",
    type = "maximize")

# Parameters
feasible_space = V1alpha3FeasibleSpace(min = "100", max = "200")
parameters = [V1alpha3ParameterSpec(
    feasible_space = feasible_space, 
    name = "--batch-size",
    parameter_type = "int"
    )]

#Defining the namespace where tfjob needs to be created
namespace = utils.get_default_target_namespace()


#Defining a Container
container = V1Container(
    name="tensorflow",
    image="gcr.io/kubeflow-ci/tf-mnist-with-summaries:1.0",
    image_pull_policy="Always",
    command=["python", "/var/tf_mnist/mnist_with_summaries.py", "--log_dir=/train/metrics","{{- with .HyperParameters}}","{{- range .}}","{{.Name}}={{.Value}}", "{{- end}}","{{- end}}" ]
    
    
)

#Defining Worker Spec
worker = V1ReplicaSpec(
   replicas=1,
   restart_policy="OnFailure",
   template=V1PodTemplateSpec(
        spec=V1PodSpec(
            containers=[container]
        )
    )
)


#Defining TFJob
tfjob = V1TFJob(
    api_version="kubeflow.org/v1",
    kind="TFJob",
    metadata=V1ObjectMeta(name="trialsample",namespace=namespace),
    spec=V1TFJobSpec(
        clean_pod_policy="None",
        tf_replica_specs={"Worker": worker}
    )
)

#Creating TFJob
tfjob_client = TFJobClient()
tfjob_client.create(tfjob, namespace=namespace)


# Experiment
experiment = V1alpha3Experiment(
    api_version="kubeflow.org/v1alpha3",
    kind="Experiment",
    metadata=V1ObjectMeta(name="tfjob-experiment",namespace="anonymous"),

    spec=V1alpha3ExperimentSpec(
         algorithm = algorithm,
         max_failed_trial_count=3,
         max_trial_count=12,
         metrics_collector_spec= metrics_collector_spec ,
         objective = objective,
         parallel_trial_count=4,
         parameters = parameters
    )
)

In [12]:
namespace = kc.utils.get_default_target_namespace()

In [13]:
kclient = kc.KatibClient()

## Create Experiment

In [14]:
kclient.create_experiment(experiment,namespace=namespace)

{'apiVersion': 'kubeflow.org/v1alpha3',
 'kind': 'Experiment',
 'metadata': {'creationTimestamp': '2020-03-31T08:44:20Z',
  'generation': 1,
  'name': 'tfjob-experiment',
  'namespace': 'anonymous',
  'resourceVersion': '19279496',
  'selfLink': '/apis/kubeflow.org/v1alpha3/namespaces/anonymous/experiments/tfjob-experiment',
  'uid': 'd0a61ede-732b-11ea-bedf-42010aa00fec'},
 'spec': {'algorithm': {'algorithmName': 'random', 'algorithmSettings': [{}]},
  'maxFailedTrialCount': 3,
  'maxTrialCount': 12,
  'metricsCollectorSpec': {'collector': {'kind': 'TensorFlowEvent'},
   'source': {'fileSystemPath': {'kind': 'Directory',
     'path': '/var/log/katib/tfevent/'},
    'kind': '/train',
    'path': 'Directory'}},
  'objective': {'goal': 0.9999,
   'objectiveMetricName': 'accuracy_1',
   'type': 'maximize'},
  'parallelTrialCount': 4,
  'parameters': [{'feasibleSpace': {'max': '200', 'min': '100'},
    'name': '--batch-size',
    'parameterType': 'int'}],
  'trialTemplate': {'goTemplate': 

## Get Single Experiment

In [15]:
kclient.get_experiment(name="tfjob-experiment", namespace=namespace)

{'apiVersion': 'kubeflow.org/v1alpha3',
 'kind': 'Experiment',
 'metadata': {'creationTimestamp': '2020-03-31T08:44:20Z',
  'finalizers': ['update-prometheus-metrics'],
  'generation': 2,
  'name': 'tfjob-experiment',
  'namespace': 'anonymous',
  'resourceVersion': '19279498',
  'selfLink': '/apis/kubeflow.org/v1alpha3/namespaces/anonymous/experiments/tfjob-experiment',
  'uid': 'd0a61ede-732b-11ea-bedf-42010aa00fec'},
 'spec': {'algorithm': {'algorithmName': 'random', 'algorithmSettings': [{}]},
  'maxFailedTrialCount': 3,
  'maxTrialCount': 12,
  'metricsCollectorSpec': {'collector': {'kind': 'TensorFlowEvent'},
   'source': {'fileSystemPath': {'kind': 'Directory',
     'path': '/var/log/katib/tfevent/'}}},
  'objective': {'goal': 0.9999,
   'objectiveMetricName': 'accuracy_1',
   'type': 'maximize'},
  'parallelTrialCount': 4,
  'parameters': [{'feasibleSpace': {'max': '200', 'min': '100'},
    'name': '--batch-size',
    'parameterType': 'int'}],
  'trialTemplate': {'goTemplate': 

## Get all Experiments

In [16]:
kclient.get_experiment(namespace=namespace)

{'apiVersion': 'kubeflow.org/v1alpha3',
 'items': [{'apiVersion': 'kubeflow.org/v1alpha3',
   'kind': 'Experiment',
   'metadata': {'creationTimestamp': '2020-03-30T09:15:16Z',
    'finalizers': ['update-prometheus-metrics'],
    'generation': 1,
    'name': 'bayesianoptimizationjob',
    'namespace': 'anonymous',
    'resourceVersion': '18661856',
    'selfLink': '/apis/kubeflow.org/v1alpha3/namespaces/anonymous/experiments/bayesianoptimizationjob',
    'uid': 'f82b3ae2-7266-11ea-bedf-42010aa00fec'},
   'spec': {'algorithm': {'algorithmName': 'bayesianoptimization',
     'algorithmSettings': [{'name': 'random_state', 'value': '10'}]},
    'maxFailedTrialCount': 3,
    'maxTrialCount': 5,
    'metricsCollectorSpec': {'collector': {'kind': 'StdOut'}},
    'objective': {'goal': 0.9999,
     'objectiveMetricName': 'Validation-accuracy',
     'type': 'maximize'},
    'parallelTrialCount': 5,
    'parameters': [{'feasibleSpace': {'list': ['sgd', 'adam', 'ftrl']},
      'name': '--optimizer'

## Get experiment status

In [17]:
kclient.get_experiment_status(name="tfjob-experiment", namespace=namespace)

'Running'

## Check whether experiment has succeeded

In [18]:
kclient.is_experiment_succeeded(name="tfjob-experiment", namespace=namespace)

False

## Delete experiment

In [None]:
# kclient.delete_experiment(name="tfjob-example", namespace=namespace)

## List Trials of an experiment

In [23]:
kclient.list_trials(name="tfjob-experiment", namespace=namespace)

[{'name': 'tfjob-experiment-l6gfrps2', 'status': 'Succeeded'},
 {'name': 'tfjob-experiment-nwrkfsgh', 'status': 'Succeeded'},
 {'name': 'tfjob-experiment-tpmmmrrp', 'status': 'Succeeded'},
 {'name': 'tfjob-experiment-zhk7p56g', 'status': 'Succeeded'}]

## List all Experiments

In [25]:
kclient.list_experiments(namespace=namespace)

[{'name': 'bayesianoptimizationjob', 'status': 'Succeeded'},
 {'name': 'bayesianoptimizationjob1', 'status': 'Succeeded'},
 {'name': 'blerssi-bayesopt', 'status': 'Succeeded'},
 {'name': 'tfjob-experiment', 'status': 'Running'}]

## Get Optimal Hyperparameters

In [26]:
kclient.get_optimal_hyperparmeters(name="tfjob-experiment",namespace=namespace)


{'status': 'Running',
 'currentOptimalTrial': {'bestTrialName': '',
  'observation': {'metrics': None},
  'parameterAssignments': None}}