In [None]:
### Study ML Model Fairness with Whatif tool 

# In this study, we use kubeflow pipeine to construct:
# 1. persistent volume clain for holding data from https://storage.googleapis.com/what-if-tool-resources/uci-census-demo/uci-census-demo.zip
# 2. create an ETL job to convert the downloaded zip file
# 3. launch a kserve for holding pretrain models for later inference
# 4. launch tensorboard and navigate to what if tool 
#    for tensorboard issue: please refer to https://github.com/tensorflow/tensorboard/issues/5472
#.   for RBAC issue: please apply allow-all authorization policy under hack folder.
#
#
# for more details on each step, please refers to our slide section: https://github.com/FootprintAI/kubeflow-workshop/tree/main/slides
#
# reference: https://pair-code.github.io/what-if-tool/learn/tutorials/walkthrough/

In [None]:
with open("requirements.txt", "w") as f:
    f.write("kubernetes>=12.0.0\n")
    f.write("kfp==1.8.9\n")
    f.write("requests\n")
    
!pip install -r requirements.txt  --upgrade --user

In [None]:
import json
import kfp.dsl as dsl
import kfp
from kfp import components
from kfp.components import func_to_container_op
from typing import NamedTuple

kfserving_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/master/components/kubeflow/kfserving/component.yaml')

@func_to_container_op
def tensorboard_func_op() -> NamedTuple('Outputs', [('mlpipeline_ui_metadata', 'UI_metadata')]):
    metadata = {
      'outputs' : [{
        'type': 'tensorboard',
        'source': 'gs://what-if-tool-resources/uci-census-demo/uci-census-demo.zip',
      }]
    }
    import json
    return ([json.dumps(metadata)])


@dsl.pipeline(
  name='KFServing pipeline',
  description='A pipeline for KFServing.'
)
def kfservingPipeline(
    action='apply',
    model_name='uci-census',
    namespace='kubeflow-user-example-com'):
    
    vop = dsl.VolumeOp(
        name="mypvc",
        resource_name="newpvc",
        size="1Gi",
        modes=dsl.VOLUME_MODE_RWO
    )

    etl = dsl.ContainerOp(
        name="etl",
        image="library/bash:4.4.23",
        command=["sh", "-c"],
        arguments=["cd /data && wget https://storage.googleapis.com/what-if-tool-resources/uci-census-demo/uci-census-demo.zip && unzip uci-census-demo.zip"],
        pvolumes={"/data": vop.volume}
    )
    pvc_uri = 'pvc://{}/uci_census/model/'.format(vop.outputs['name'])
    
    # open port grpc=9000 for tensorboard's what if tool inference
    isvc_yaml = '''
apiVersion: "serving.kubeflow.org/v1beta1"
kind: "InferenceService"
metadata:
  name: {}
  namespace: {}
spec:
  predictor:
    tensorflow:
      storageUri: {}
      ports:
        - containerPort: 9000
          name: h2c
          protocol: TCP
'''.format(model_name, namespace, pvc_uri)
    kfserving = kfserving_op(
        action=action,
        inferenceservice_yaml=isvc_yaml
    )

    kfserving.after(etl)
    
    tenaorboard_task = tensorboard_func_op().add_pvolumes({
        '/data':vop.volume,
    })
    tenaorboard_task.after(kfserving)
    
kfp.compiler.Compiler().compile(kfservingPipeline, 'tf-uci.zip')