In [16]:
import kfp
import kfp.dsl as dsl
import kfp.components as comp
from kfp.components import InputPath, OutputPath
from typing import NamedTuple

In [17]:
import os
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/Users/weiqiang/.dotbot/cloud/quakeflow_wayne.json"

In [18]:
def download_dataset(root_path: str = "/tmp"):
# data_path = "./"
# if True:
    from google.cloud import storage
    import os
    import pandas as pd
    join_path = lambda x: os.path.join(root_path, x)
    
    ## download from gcp bucket
    bucket_name = "ncedc"
    def download_blob(bucket_name, source_blob_name, destination_file_name):

        storage_client = storage.Client()
        bucket = storage_client.bucket(bucket_name)
        blob = bucket.blob(source_blob_name)
        blob.download_to_filename(destination_file_name)
        print(f"download {source_blob_name} to {destination_file_name}.")
        
    if not os.path.exists(join_path("catalogs/")):
        os.mkdir(join_path("catalogs/"))
    download_blob("ncedc", "catalogs/combined_phases.csv", join_path("catalogs/combined_phases.csv"))
    catalog = pd.read_csv(join_path("catalogs/combined_phases.csv"), sep="\t")
    
    index = catalog["event_index"]
    test = catalog[ index >= (index.max()-1) ]
    valid = catalog[ (index < (index.max()-1)) & (index >= (index.max()-2)) ]
    train = catalog[ (index < (index.max()-2)) & (index >= (index.max()-10)) ]
    
    for dataset in ["train", "valid", "test"]:
        print(f"Downloading {dataset} dataset...")
#         eval(dataset).to_csv(join_path(f"{dataset}.csv"), sep="\t")
        eval(dataset)["itp"] = eval(dataset)["p_idx"]
        eval(dataset)["its"] = eval(dataset)["s_idx"]
        eval(dataset).to_csv(join_path(f"{dataset}.csv"), sep="\t")
        print(f"Save {join_path(f'{dataset}.csv')}")
        if not os.path.exists(join_path(f"{dataset}_data/")):
            os.mkdir(join_path(f"{dataset}_data/"))
        num = len(eval(dataset)["fname"])
        for i, fname in enumerate(eval(dataset)["fname"]):
            print(f"{i+1}/{num}",  end=' ')
            download_blob(bucket_name, f"data/{fname}", join_path(f"{dataset}_data/{fname}"))
    

In [19]:
# download_dataset(root_path = "./")

In [20]:
download_dataset_op = comp.func_to_container_op(download_dataset, 
                                                base_image='python:3.7',
                                                packages_to_install= [
                                                    "pandas",
                                                    "google-cloud-storage"
                                                ])

In [21]:
def training_op(root_path: str, 
                model_path: str,
                batch_size: int = 2,
                train_path: str = "train_data", 
                train_csv: str = "train.csv",
                ):
    
    import os
    join_path = lambda x: os.path.join(str(root_path), x)
    return dsl.ContainerOp(name='PhaseNet training',
                           image="zhuwq0/phasenet:latest",
#                            command=['sh', '-c'],
#                            arguments=["mkdir /tmp/model && cp -r /opt/model/190703-214543 /tmp/model/190703-214543"],
                           command=['python'],
                           arguments=[
                               'train.py',
                               '--epoch', 10,
                               '--batch_size', batch_size,
                               '--train_dir', join_path(str(train_path)),
                               '--train_list', join_path(str(train_csv)),
                               '--model_dir', join_path(str(model_path))
                               ],
#                            file_outputs = {"model_path": model_path}
                           )

In [22]:
# !python PhaseNet/phasenet/train.py --epoch=10 --batch_size=2 --model_dir=models --train_list=train.csv --train_dir=train_data 

In [23]:
def test_op(root_path: str, 
            model_path: str,
            batch_size: int = 2,
            test_path: str = "test_data", 
            test_csv: str = "test.csv",
            result_path: str = "results"):
    
    import os
    join_path = lambda x: os.path.join(str(root_path), x)
    return dsl.ContainerOp(name='PhaseNet test',
                           image="zhuwq0/phasenet:latest",
#                            command=['sh', '-c'],
#                            arguments=["ls ./"],
                           command=['python'],
                           arguments=[
                               'train.py',
                               '--mode', "test",
                               '--model', join_path(str(model_path)),
                               '--batch_size', batch_size,
                               '--test_dir', join_path(str(test_path)),
                               '--test_list', join_path(str(test_csv)),
                               '--result_dir', join_path(str(result_path))
                               ],
#                            file_outputs = {"result": f"{result_path}/loss.log"}
                           )

In [24]:
# !python PhaseNet/phasenet/train.py --mode=test --model=models --batch_size=2 --result_dir=results --test_list=test.csv --test_dir=test_data

In [25]:
def upload_model(root_path: str,
                 local_path: str,
                 remote_path:str = "phasenet",
                 bucket_name:str = "models",
                 s3_url:str = "localhost:9000", 
                 secure:bool = True):
    
    import os
    from minio import Minio
    minioClient = Minio(s3_url,
                  access_key='minio',
                  secret_key='minio123',
                  secure=secure)
    if not minioClient.bucket_exists(bucket_name):
        minioClient.make_bucket(bucket_name)
    
    for f in os.listdir(os.path.join(root_path, local_path)):
        if os.path.isfile(os.path.join(root_path, local_path, f)):
            minioClient.fput_object(bucket_name, os.path.join(remote_path, f), os.path.join(root_path, local_path, f))
            print(f"upload {os.path.join(root_path, local_path, f)} to {os.path.join(remote_path, f)}.")

In [26]:
# upload_model("./", "PhaseNet/model/190703-214543", s3_url="localhost:9000", secure=False)

In [27]:
upload_model_op = comp.func_to_container_op(upload_model, 
                                            base_image='python:3.7',
                                            packages_to_install= [
                                                "pandas",
                                                "minio"
                                            ])

In [31]:
def quakeflow_training(root_path:str, model_path:str, bucket_name:str, s3_url:str, secure:bool):
    
    vop_ = dsl.VolumeOp(name="Create_volume", resource_name="data-volume", size="10Gi", modes=dsl.VOLUME_MODE_RWO).set_display_name('Persistent Volume')
    
    download_ = download_dataset_op(root_path).add_pvolumes({root_path: vop_.volume}).set_display_name('Download Datasets')
    
    train_ = training_op(root_path, model_path).add_pvolumes({root_path: download_.pvolume}).set_display_name('Training').set_gpu_limit(1)
    train_.execution_options.caching_strategy.max_cache_staleness = "P0D"
    
    test_ = test_op(root_path, model_path=model_path).add_pvolumes({root_path: train_.pvolume}).set_display_name('Inference')
#     test_.execution_options.caching_strategy.max_cache_staleness = "P0D"
    
#     with dsl.Condition(test_.output > 0.9):
    upload_ = upload_model_op(root_path, model_path, bucket_name=bucket_name, s3_url=s3_url, secure=secure).add_pvolumes({root_path: test_.pvolume}).set_display_name('Upload Model')
#         upload_.execution_options.caching_strategy.max_cache_staleness = "P0D"


In [32]:
# client = kfp.Client(host='https://45ff9cb0ebef.ngrok.io')
client = kfp.Client(host="5bbb5e6223b3b3e9-dot-us-west1.pipelines.googleusercontent.com")
# client = kfp.Client(host='127.0.0.1:8080')

In [33]:
experiment_name = 'QuakeFlow Training'
pipeline_func = quakeflow_training
run_name = pipeline_func.__name__ + '_run'

arguments = {"root_path": "/tmp/",
             "model_path": "models",
             "bucket_name": "models",
#              "s3_url": "localhost:9000",
#              "secure": False
             "s3_url": "minio-service:9000",
             "secure": False
             }

# Compile pipeline to generate compressed YAML definition of the pipeline.
kfp.compiler.Compiler().compile(pipeline_func, '{}.zip'.format(experiment_name))

# Submit pipeline directly from pipeline function
results = client.create_run_from_pipeline_func(pipeline_func, 
                                               experiment_name=experiment_name, 
                                               run_name=run_name, 
                                               arguments=arguments)

-  To check the uploaded models

kubectl port-forward -n kubeflow svc/minio-service 9000:9000

Access Key: minio
Secret Key: minio123

open: http://localhost:9000/minio/models/phasenet/


- GPU training:

https://github.com/kubeflow/pipelines/blob/master/samples/tutorials/gpu/gpu.ipynb