# Deploy inference Server using KFServing

In [None]:
from kubernetes import client

In [None]:
from kfserving import KFServingClient, constants, utils, V1alpha2EndpointSpec, V1alpha2PredictorSpec, V1alpha2ExplainerSpec, V1alpha2SKLearnSpec, V1alpha2AlibiExplainerSpec, V1alpha2InferenceService, V1alpha2InferenceServiceSpec
from kubernetes.client import V1ResourceRequirements, V1ObjectMeta

# Define constants

In [None]:
api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION
kind = constants.KFSERVING_KIND
namespace = 'explainable-ml-engine-demo'
model_storage_uri = 'gs://deeploy-ml/explainable-ml-engine/model'
explainer_storage_uri = 'gs://deeploy-ml/explainable-ml-engine/explainer'
service_account_name = 'kfserving-service-credentials'

# Create service account with GCS credentials (if not public)
>Do this once

In [None]:
KFServing = KFServingClient()
# https://cloud.google.com/docs/authentication/getting-started
KFServing.set_credentials(storage_type='GCS', 
                          namespace=namespace,
                          credentials_file='tmp/gcp.json',
                          service_account=service_account_name)

# Create inference with explainer service

In [None]:
KFServing = KFServingClient()

In [None]:
default_endpoint_spec = V1alpha2EndpointSpec(
    predictor=V1alpha2PredictorSpec(
        service_account_name=service_account_name,
        #min_replicas=0,
        sklearn=V1alpha2SKLearnSpec(
            storage_uri=model_storage_uri,
            runtime_version='v0.4.1',
            resources=V1ResourceRequirements(
                requests={'cpu':'100m','memory':'256Mi'},
                limits={'cpu':'250m', 'memory':'1Gi'}
                )
            )
        ),
    explainer=V1alpha2ExplainerSpec(
        service_account_name=service_account_name,
        alibi=V1alpha2AlibiExplainerSpec(
            storage_uri=explainer_storage_uri,
            type='AnchorTabular',
            resources=V1ResourceRequirements(
                requests={'cpu':'100m','memory':'256Mi'},
                limits={'cpu':'250m', 'memory':'2Gi'}
            )

        )
    )
)

In [None]:
inference_svc = V1alpha2InferenceService(
    api_version=api_version,
    kind=kind,
    metadata=V1ObjectMeta(
        name='income-model', 
        namespace=namespace
    ),
    spec=V1alpha2InferenceServiceSpec(
        default=default_endpoint_spec,
        #canary=default_endpoint_spec,
        #canary_traffic_percent=10
    )
)

In [None]:
KFServing.create(inference_svc)

# Clean up

In [18]:
KFServing.delete('income-model', namespace=namespace)

{'kind': 'Status',
 'apiVersion': 'v1',
 'metadata': {},
 'status': 'Success',
 'details': {'name': 'income-model',
  'group': 'serving.kubeflow.org',
  'kind': 'inferenceservices',
  'uid': 'f433b086-7503-431e-bd0e-f602799f4260'}}