# Test Pipeline

In [None]:
import json
import re
nicename = re.compile('^[0-9a-zA-Z_-]+$')
bucketname = re.compile('^[0-9a-z_-]+$')

#################################
### Configure your variables ####
#################################
SAMPLES = 5

EXPERIMENT_NAME = "pipeline-test"
assert nicename.match(EXPERIMENT_NAME)

OUTPUT_BUCKET = 'ken-wifr-data'
assert bucketname.match(OUTPUT_BUCKET)

LOADDATA_IMAGE_NAME     = "get-covid-data:v2"
LOADDATA_CONTAINER_NAME = "load-data"
assert nicename.match(LOADDATA_CONTAINER_NAME)

AVERAGE_IMAGE_NAME     = "kenchu-kf-image-aggregate"
AVERAGE_CONTAINER_NAME = "aggregate"
assert nicename.match(AVERAGE_CONTAINER_NAME)

########################################
### This gets fed into the map step ####
########################################
def seeds(how_many=SAMPLES):
    """ Define the seeds for the algorithms """
    for i in range(how_many):
        yield { "seed" : 3 * i }

In [None]:
import os
os.environ['ZZZZZZZZZ'] = 'ZZZZZZZZZ'
print(os.environ['ZZZZZZZZZ'])

In [None]:
###################################
### DON'T EDIT:                 ###
### Create the Experiment       ###
###################################
import kfp
client = kfp.Client()
exp = client.create_experiment(name=EXPERIMENT_NAME)

In [None]:
###################################
### DON'T EDIT:                 ###
### Register our storage output ###
###################################
import defaults

# defaults.make_bucket(OUTPUT_BUCKET)

In [None]:
###################################
### You can change below this   ###
### Create the pipeline         ###
###################################
from kfp import dsl


def load_data_op(params,output):
    return dsl.ContainerOp(
        name=LOADDATA_CONTAINER_NAME,
        image=f'k8scc01covidacr.azurecr.io/{LOADDATA_IMAGE_NAME}',
        arguments=[
            '--params', params,
            '--output', output,
            '--data',   'ken-wifr-data'
        ],
        file_outputs={
            'data1': '/output/raw-covid19-ECDC.csv',
            'data2': '/output/raw-covid19-GoCInfobase-patched.csv',
            'data3': '/output/input-wIFR.csv'
        }
    )
    #.set_memory_request(
    #    "100M"
    #).set_memory_limit(
    #    "150M"
    #).set_cpu_request(
    #    "0.1"
    #).set_cpu_limit(
    #    "1"
    #)


def average_op(l, output):
    return dsl.ContainerOp(
        name=AVERAGE_CONTAINER_NAME,
        image=f'k8scc01covidacr.azurecr.io/{AVERAGE_IMAGE_NAME}',
        arguments=[
            '--output',
            output,
            '--numbers',
            *l
        ],
        file_outputs={
            'data': '/output/pi.json'
        }
    )


@dsl.pipeline(
    name="Fatality of Infected Ratio Analysis",
    description='Test sesitivity to the wIFR'
)


def pipeline_test(output):
    """ just a test pipeline """

    results = [
        load_data_op(json.dumps(param),f'{output}/load_data/{i}')
        for (i, param) in enumerate(seeds())
    ]

    # average = average_op([r.output for r in results], f"{output}/pi/")

    # Do you need this?
    defaults.inject_env_vars()

In [None]:
###################################
### DON'T EDIT:                 ###
### Ship the pipeline to run    ###
###################################
from kfp import compiler
compiler.Compiler().compile(
    pipeline_test,
    EXPERIMENT_NAME + '.zip'
)

# Ready to roll! Let's run this pipeline!

In [None]:
import time
run = client.run_pipeline(
    exp.id,
    EXPERIMENT_NAME + '-' + time.strftime("%Y%m%d-%H%M%S"),
    EXPERIMENT_NAME + '.zip',
    params={
        'output': OUTPUT_BUCKET
    }
)