## Setup and download Data

In [1]:
from tempo.serve.metadata import ModelFramework
from tempo.serve.model import Model
from tempo.seldon.docker import SeldonDockerRuntime
from tempo.kfserving.protocol import KFServingV2Protocol
from tempo.serve.utils import pipeline, predictmethod
from tempo.seldon.k8s import SeldonKubernetesRuntime
from tempo.serve.metadata import ModelFramework, KubernetesOptions
from alibi.utils.wrappers import ArgmaxTransformer
from typing import Any

import numpy as np
import os 
import pprint
import dill
import json

In [2]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from alibi.explainers import AnchorTabular
from alibi.datasets import fetch_adult

adult = fetch_adult()
data = adult.data
target = adult.target
feature_names = adult.feature_names
category_map = adult.category_map

In [3]:
np.random.seed(0)
data_perm = np.random.permutation(np.c_[data, target])
data = data_perm[:,:-1]
target = data_perm[:,-1]
idx = 30000
X_train,Y_train = data[:idx,:], target[:idx]
X_test, Y_test = data[idx+1:,:], target[idx+1:]

## Build SKLearn Model and Alibi Anchors Tabular Explainer

In [None]:
ordinal_features = [x for x in range(len(feature_names)) if x not in list(category_map.keys())]
ordinal_transformer = Pipeline(steps=[('imputer', SimpleImputer(strategy='median')),
                                      ('scaler', StandardScaler())])
categorical_features = list(category_map.keys())
categorical_transformer = Pipeline(steps=[('imputer', SimpleImputer(strategy='median')),
                                          ('onehot', OneHotEncoder(handle_unknown='ignore'))])
preprocessor = ColumnTransformer(transformers=[('num', ordinal_transformer, ordinal_features),
                                               ('cat', categorical_transformer, categorical_features)])
clf = RandomForestClassifier(n_estimators=50)
model=Pipeline(steps=[("preprocess",preprocessor),("model",clf)])
model.fit(X_train,Y_train)


print('Train accuracy: ', accuracy_score(Y_train, model.predict(X_train)))
print('Test accuracy: ', accuracy_score(Y_test, model.predict(X_test)))

In [None]:
from alibi.explainers import AnchorTabular
predict_fn = lambda x: model.predict(x)
explainer = AnchorTabular(predict_fn, feature_names, categorical_names=category_map, seed=1)
explainer.fit(X_train, disc_perc=[25, 50, 75])

In [None]:
explanation = explainer.explain(X_test[0], threshold=0.95)
print('Anchor: %s' % (' AND '.join(explanation.anchor)))
print('Precision: %.2f' % explanation.precision)
print('Coverage: %.2f' % explanation.coverage)

In [None]:
from joblib import dump
dump(model, os.getcwd()+'/income_model/model.joblib') 
with open(os.getcwd()+"/income_explainer/explainer.dill", 'wb') as f:
    dill.dump(explainer,f)

## Create Tempo Artifacts

In [30]:
k8s_options = KubernetesOptions(namespace="production")
k8s_runtime = SeldonKubernetesRuntime(k8s_options=k8s_options)

sklearn_model = Model(
        name="income-sklearn",
        runtime=SeldonDockerRuntime(),
        platform=ModelFramework.SKLearn,
        local_folder=os.getcwd()+"/income_model",
        uri="gs://seldon-models/test/income/model"
)


In [43]:


@pipeline(name="income-explainer",
          runtime=SeldonDockerRuntime(protocol=KFServingV2Protocol()),
          uri="gs://seldon-models/test/income/explainer",
          local_folder=os.getcwd()+"/income_explainer",
          conda_env="tempo-test",
          models=[sklearn_model])
class ExplainerPipeline(object):

    def __init__(self):
        if "MLSERVER_MODELS_DIR" in os.environ:
            models_folder = ""
        else:
            models_folder = "/income_explainer"
        with open(os.getcwd()+models_folder+"/explainer.dill", "rb") as f:
            self.explainer = dill.load(f)
        self.ran_init = True
        
    def update_predict_fn(self, x):
        if np.argmax(sklearn_model(x).shape) == 0:
            self.explainer.predictor = sklearn_model
            self.explainer.samplers[0].predictor = sklearn_model
        else:
            self.explainer.predictor = ArgmaxTransformer(sklearn_model)
            self.explainer.samplers[0].predictor = ArgmaxTransformer(sklearn_model)

    @predictmethod
    def explain(self, payload: np.ndarray, parameters: dict) -> str:
        print("Explain called with ", parameters)
        if not self.ran_init:
            print("Loading explainer")
            self.__init__()
        self.update_predict_fn(payload)
        print("calling explain")
        explanation = self.explainer.explain(payload, **parameters)
        return explanation.to_json()

### Deploy model to Docker and test

In [32]:
sklearn_model.deploy()
sklearn_model.wait_ready()

True

In [33]:
sklearn_model(X_test[0:1])

array([[0.96, 0.04]])

### Create explainer and test against model

In [44]:
p = ExplainerPipeline() 

In [45]:
p.save(save_env=False)

In [50]:
r = json.loads(p.explain(X_test[0:1], {"threshold":0.99}))
print(r["data"]["anchor"])

Explain called with  {'threshold': 0.99}
calling explain
['Relationship = Unmarried', 'Sex = Female', 'Capital Gain <= 0.00', 'Capital Loss <= 0.00']


### Save environment and deploy explainer to docker

In [52]:
p.deploy()
p.wait_ready()

True

In [53]:
r = json.loads(p.remote(payload=X_test[0:1], parameters={"threshold":0.99}))
print(r["data"]["anchor"])

Create v2 from any
['Relationship = Unmarried', 'Sex = Female', 'Capital Gain <= 0.00', 'Marital Status = Separated']


In [54]:
p.undeploy()

### Deploy to production on Kubernetes

In [55]:
k8s_options = KubernetesOptions(namespace="production")
k8s_runtime = SeldonKubernetesRuntime(k8s_options=k8s_options)
k8s_runtime_v2 = SeldonKubernetesRuntime(k8s_options=k8s_options, protocol=KFServingV2Protocol())

sklearn_model.set_runtime(k8s_runtime)
p.set_runtime(k8s_runtime_v2)

In [56]:
p.save(save_env=False)

In [57]:
sklearn_model.upload()
p.upload()

In [58]:
p.deploy()
p.wait_ready()

True

In [59]:
r = json.loads(p.remote(payload=X_test[0:1], parameters={"threshold":0.99}))
print(r["data"]["anchor"])

Create v2 from any
['Relationship = Unmarried', 'Capital Gain <= 0.00', 'Sex = Female', 'Marital Status = Separated', 'Education = Associates', 'Capital Loss <= 0.00']


In [9]:
p.undeploy()

In [8]:
yaml = p.to_k8s_yaml()
print (eval(pprint.pformat(yaml)))

apiVersion: machinelearning.seldon.io/v1
kind: SeldonDeployment
metadata:
  name: income-explainer
  namespace: production
spec:
  predictors:
  - componentSpecs:
    - spec:
        containers:
        - args: []
          env:
          - name: MLSERVER_HTTP_PORT
            value: '9000'
          - name: MLSERVER_GRPC_PORT
            value: '9500'
          - name: MLSERVER_MODEL_IMPLEMENTATION
            value: mlserver_tempo.TempoModel
          - name: MLSERVER_MODEL_NAME
            value: income-explainer
          - name: MLSERVER_MODEL_URI
            value: /mnt/models
          image: seldonio/mlserver:0.3.1.dev5
          name: income-explainer
    graph:
      implementation: TRITON_SERVER
      modelUri: gs://seldon-models/test/income/explainer
      name: income-explainer
      serviceAccountName: tempo-pipeline
      type: MODEL
    name: default
    replicas: 1
  protocol: kfserving

---
apiVersion: machinelearning.seldon.io/v1
kind: SeldonDeployment
metadata:
  na