In [1]:
import kfp
import kfp.dsl as dsl
import kfp.components as comp
from kfp.components import InputPath, OutputPath

In [2]:
def set_config(data_path: str, 
               config_file:OutputPath("pickle"), 
               datetime_file:OutputPath("pickle")) -> list:
    
    import obspy
    import os
    import pickle
    import datetime
    
    # Location
    pi = 3.1415926
    degree2km = pi*6371/180
    center = (-115.53, 32.98) #salton sea
    horizontal_degree = 0.5
    vertical_degree = 0.5
    zero_anchor = (center[0]-horizontal_degree, center[1]-vertical_degree)

    # Time
    starttime = obspy.UTCDateTime("2020-10-01")
    endtime = obspy.UTCDateTime("2020-10-03") ## not included

    # seismic stations
    network_list = "CI"
    # channel_list = "HNE,HNN,HNZ,HHE,HHN,HHZ,BHE,BHN,BHZ,EHE,EHN,EHZ"
    channel_list = "HHE,HHN,HHZ"
    
#     print(data_path)
#     if not os.path.exists(data_path):
#         os.makedirs(data_path)
#     if not os.path.exists(os.path.join(data_path, "outputs/Output/")):
#         os.makedirs(os.path.join(data_path, "outputs/Output/"))
#         open(os.path.join(data_path, "outputs/Output/data"), "a").close()
    
    ####### save config ########
    config = {}
    config["center"] = center
    config["horizontal_degree"] = horizontal_degree
    config["vertical_degree"] = vertical_degree
    config["zero_anchor"] = zero_anchor
    config["xlim"] = [0, horizontal_degree*2*degree2km]
    config["ylim"] = [0, vertical_degree*2*degree2km]
    config["anchor"] = zero_anchor
    config["degree2km"] = degree2km
    config["starttime"] = starttime
    config["endtime"] = endtime
    config["networks"] = network_list
    config["channels"] = channel_list
    config["network_list"] = network_list
    config["channel_list"] = channel_list
#     config["station_response"] = stations
#     with open(os.path.join(data_path, "config.pkl"), "wb") as fp:
#         pickle.dump(config, fp)
    with open(config_file, "wb") as fp:
        pickle.dump(config, fp)
        
    one_day = datetime.timedelta(days=1)
    one_hour = datetime.timedelta(hours=1)
    starttimes = []
    tmp_start = starttime
    while tmp_start < endtime:
        starttimes.append(tmp_start)
        tmp_start += one_hour
    
#     with open(os.path.join(data_path, "datetimes.pkl"), "wb") as fp:
#         pickle.dump({"starttimes": starttimes, "interval": one_hour}, fp)
    with open(datetime_file, "wb") as fp:
        pickle.dump({"starttimes": starttimes, "interval": one_hour}, fp)
        
    num_parallel = 2
    idx = [[] for i in range(num_parallel)]
    for i in range(len(starttimes)):
        idx[i - i//num_parallel*num_parallel].append(i)

    return tuple(idx)

In [3]:
idx = set_config("", "config.pkl", "datetimes.pkl")
print(idx)

([0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46], [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41, 43, 45, 47])


In [4]:
import pickle
with open("./test/datetimes.pkl", "rb") as fp:
    data = pickle.load(fp)
print(data["starttimes"])

[UTCDateTime(2020, 10, 1, 0, 0), UTCDateTime(2020, 10, 1, 1, 0)]


In [5]:
config_op = comp.func_to_container_op(set_config, 
                                      base_image='python:3.8',
                                      packages_to_install= [
                                          "obspy"
                                      ])

In [6]:
def download_events(data_path, 
                    config_file: InputPath("pickle"),
                    event_file: OutputPath(str)):
    
    import pickle, os
    import obspy
    from obspy.clients.fdsn import Client
    import matplotlib
    matplotlib.use("agg")
    import matplotlib.pyplot as plt
    from collections import defaultdict
    import pandas as pd
    
#     with open(os.path.join(data_path, "config.pkl"), "rb") as fp:
#         config = pickle.load(fp)
    with open(config_file, "rb") as fp:
        config = pickle.load(fp)
    
    ####### IRIS catalog ########
    events = Client("IRIS").get_events(starttime=config["starttime"],
                                       endtime=config["endtime"],
                                       minlatitude=config["center"][1]-config["vertical_degree"],
                                       maxlatitude=config["center"][1]+config["vertical_degree"],
                                       minlongitude=config["center"][0]-config["horizontal_degree"],
                                       maxlongitude=config["center"][0]+config["horizontal_degree"],
                                       filename=os.path.join(data_path, 'events.xml'))

    events = obspy.read_events(os.path.join(data_path, 'events.xml'))
    print(f"Number of events: {len(events)}")
    # events.plot('local', outfile="events.png")

    ####### Save catalog ########
    catalog = defaultdict(list)
    for event in events:
        catalog["time"].append(event.origins[0].time.datetime)
        catalog["x(km)"].append((event.origins[0].longitude- config["zero_anchor"][0])*config["degree2km"])
        catalog["y(km)"].append((event.origins[0].latitude - config["zero_anchor"][1])*config["degree2km"])
        catalog["z(km)"].append(event.origins[0].depth/1e3)
        catalog["mag"].append(event.magnitudes[0].mag)
        catalog["lng"].append(event.origins[0].longitude)
        catalog["lat"].append(event.origins[0].latitude)
        catalog["depth(m)"].append(event.origins[0].depth)
    catalog = pd.DataFrame.from_dict(catalog).sort_values(["time"])
#     catalog.to_csv("events.csv",
    catalog.to_csv(event_file,
                    sep="\t", index=False, float_format="%.3f",
                    date_format='%Y-%m-%dT%H:%M:%S.%f',
                    columns=["time", "x(km)", "y(km)", "z(km)", "mag", "lng", "lat", "depth(m)"])

    ####### Plot catalog ########
#     t = []
#     mag = []
#     for event in events:
#         t.append(event.origins[0].time.datetime)
#         mag.append(event.magnitudes[0].mag)
#     plt.figure()
#     plt.plot_date(t, mag)
#     plt.gcf().autofmt_xdate()
#     plt.ylabel("Magnitude")
#     plt.title(f"Number of events: {len(events)}")
#     plt.savefig(os.path.join(data_path, "events_mag_time.png"))
    # plt.show()

In [7]:
download_events("", "config.pkl", "events.csv")

Number of events: 1062


In [8]:
download_events_op = comp.func_to_container_op(download_events, 
                                              base_image='python:3.8',
                                              packages_to_install= [
                                                  "obspy",
                                                  "pandas",
                                                  "matplotlib"
                                              ])

In [9]:
def download_stations(data_path: str, 
                      config_file: InputPath("pickle"),
                      station_list: OutputPath(str),
                      station_file: OutputPath("pickle")):
    import pickle, os
    import obspy
    from obspy.clients.fdsn import Client
    import matplotlib
    matplotlib.use("agg")
    import matplotlib.pyplot as plt
    from collections import defaultdict
    import pandas as pd
    
#     with open(os.path.join(data_path, "config.pkl"), "rb") as fp:
#         config = pickle.load(fp)
    with open(config_file, "rb") as fp:
        config = pickle.load(fp)

    ####### Download stations ########
    stations = Client("IRIS").get_stations(network = config["network_list"],
                                           station = "*",
                                           starttime=config["starttime"],
                                           endtime=config["endtime"],
                                           minlatitude=config["center"][1]-config["vertical_degree"],
                                           maxlatitude=config["center"][1]+config["vertical_degree"],
                                           minlongitude=config["center"][0]-config["horizontal_degree"],
                                           maxlongitude=config["center"][0]+config["horizontal_degree"],
                                           channel=config["channel_list"],
                                           level="response",
                                           filename=os.path.join(data_path, 'stations.xml'))

    stations = obspy.read_inventory(os.path.join(data_path, 'stations.xml'))
    print("Number of stations: {}".format(sum([len(x) for x in stations])))
    # stations.plot('local', outfile="stations.png")

    ####### Save stations ########
    station_locs = defaultdict(dict)
    for network in stations:
        for station in network:
            for chn in station:
                x = (chn.longitude - config["zero_anchor"][0])*config["degree2km"]
                y = (chn.latitude - config["zero_anchor"][1])*config["degree2km"]
                z = -chn.elevation / 1e3 #km
                sid = f"{network.code}.{station.code}.{chn.location_code}.{chn.code[:-1]}"
                if sid in station_locs:
                    station_locs[sid]["component"] += f",{chn.code[-1]}"
                    station_locs[sid]["response"] += f",{chn.response.instrument_sensitivity.value:.2f}"
                else:
                    component = f"{chn.code[-1]}"
                    response = f"{chn.response.instrument_sensitivity.value:.2f}"
                    dtype = chn.response.instrument_sensitivity.input_units.lower()
                    tmp_dict = {}
                    tmp_dict["x(km)"], tmp_dict["y(km)"], tmp_dict["z(km)"] = x, y, z
                    tmp_dict["lng"], tmp_dict["lat"], tmp_dict["elv(m)"] = chn.longitude, chn.latitude, chn.elevation
                    tmp_dict["component"], tmp_dict["response"], tmp_dict["type"] = component, response, dtype
                    station_locs[sid] = tmp_dict
    station_locs = pd.DataFrame.from_dict(station_locs, orient='index')
#     station_locs.to_csv("stations.csv",
#                     sep="\t", float_format="%.3f",
#                     index_label="station",
#                     columns=["x(km)", "y(km)", "z(km)", "lat", "lng", "elv(m)", "type", "component", "response"])
    station_locs.to_csv(station_list,
                        sep="\t", float_format="%.3f",
                        index_label="station",
                        columns=["x(km)", "y(km)", "z(km)", "lat", "lng", "elv(m)", "type", "component", "response"])

#     ####### Plot stations ########
#     plt.figure()
#     plt.plot(station_locs["x(km)"], station_locs["y(km)"], "^", label="Stations")
# #     plt.plot(catalog["x(km)"], catalog["y(km)"], "k.", label="Earthquakes")
#     plt.xlabel("X (km)")
#     plt.ylabel("Y (km)")
#     plt.axis("scaled")
#     plt.legend()
#     plt.title(f"Number of stations: {len(station_locs)}")
#     plt.savefig(os.path.join(data_path, "stations_events.png"))
#     # plt.show()
    
#     config["station_list"] = stations
#     with open(os.path.join(data_path, "config.pkl"), "wb") as fp:
#         pickle.dump(config, fp)
    with open(station_file, "wb") as fp:
        pickle.dump(stations, fp)

In [10]:
download_stations("", "config.pkl", "stations.csv", "stations.pkl")

Number of stations: 16


In [11]:
download_stations_op = comp.func_to_container_op(download_stations, 
                                                 base_image='python:3.8',
                                                 packages_to_install= [
                                                     "obspy",
                                                     "pandas",
                                                     "matplotlib"
                                                 ])

In [12]:
def download_waveform(data_path: str, 
                      idx: list, 
                      config_file: InputPath("pickle"),
                      datetime_file: InputPath("pickle"),
                      station_file: InputPath("pickle"),
                      fname_list: OutputPath(str),
                      s3_url:str="localhost:9000", 
                      secure:bool=False) -> str:
    
    import pickle, os
    import obspy
    from obspy.clients.fdsn import Client
#     from minio import Minio
#     from minio.error import (ResponseError, BucketAlreadyOwnedByYou,
#                              BucketAlreadyExists)
#     minioClient = Minio(f'{s3_url}',
#                   access_key='quakeflow',
#                   secret_key='quakeflow',
#                   secure=secure)
    
#     with open(os.path.join(data_path, "config.pkl"), "rb") as fp:
#         config = pickle.load(fp)
    with open(config_file, "rb") as fp:
        config = pickle.load(fp)
#     with open(os.path.join(data_path, "datetimes.pkl"), "rb") as fp:
    with open(datetime_file, "rb") as fp:
        tmp = pickle.load(fp)
        starttimes = tmp["starttimes"]
        interval = tmp["interval"]
    with open(station_file, "rb") as fp:
        stations = pickle.load(fp)
    
    waveform_dir = os.path.join(data_path, "waveforms")
    ####### Download data ########
    client = Client("SCEDC")
#     def download_all_stations(starttime, interval, overwrite=False):
    fp = open(fname_list, "w")
    fp.write("fname\n")
    for i in idx: 
        starttime = starttimes[i] 
        endtime = starttime + interval
#         fname = os.path.join(output_dir, "{}.mseed".format(starttime.datetime.strftime("%Y-%m-%dT%H")))
        fname = "{}.mseed".format(starttime.datetime.strftime("%Y-%m-%dT%H"))
#         if not overwrite:
#         if os.path.exists(fname):
#             print(f"{fname} exists")
#             return

        max_retry = 3
        stream = obspy.Stream()
        print(f"{fname} download starts")
#         for network in config["station_list"]:
        for network in stations:
            for station in network:
                # logger.info(f"********{network.code}.{station.code}********")
                retry = 0
                while retry < max_retry:
                    try:
                        tmp = client.get_waveforms(network.code, station.code, "*", config["channel_list"], starttime, endtime)
                        stream += tmp
                        break
                    except Exception as e:
                    #           logger.warning("Error {}.{}: {}".format(network.code, station.code,e))
                        err = e
                        retry += 1
                        time.sleep(1)
                        continue
                if retry == max_retry:
                    print(f"{fname}: MAX {max_retry} retries reached : {network.code}.{station.code} with error: {err}")

#         stream.write(os.path.join("/tmp/", fname))
        if not os.path.exists(waveform_dir):
            os.makedirs(waveform_dir)
        stream.write(os.path.join(waveform_dir, fname))
        print(f"{fname} download succeeds")
        fp.write(f"{fname}\n")

#         # Make a bucket with the make_bucket API call.
#         try:
#             minioClient.make_bucket("waveforms")
#         except BucketAlreadyOwnedByYou as err:
#             pass
#         except BucketAlreadyExists as err:
#             pass
#         except ResponseError as err:
#             raise

#         # Put an object 'pumaserver_debug.log' with contents from 'pumaserver_debug.log'.
#         try:
# #             minioClient.fput_object('waveforms', fname, os.path.join("/tmp/", fname))
#             minioClient.fput_object('waveforms', fname, os.path.join(data_path, "waveforms", fname))
#         except ResponseError as err:
#             print(err)
    print(os.listdir(waveform_dir))
    fp.close()

    return waveform_dir

In [13]:
# download_waveform("./test", idx=[1], s3_url="18663dd7908c.ngrok.io", secure=True)
download_waveform("./test", [1], "config.pkl", "datetimes.pkl", "stations.pkl", "fname.csv")

2020-10-01T01.mseed download starts
2020-10-01T01.mseed download succeeds
['2020-10-01T01.mseed']


'./test/waveforms'

In [14]:
download_waveform_op = comp.func_to_container_op(download_waveform, 
                                                 base_image='python:3.8',
                                                 packages_to_install= [
                                                     "obspy",
#                                                      "minio"
                                                 ])

In [15]:
def phasenet_op(data_dir: str, 
                data_list: str, 
                stations: str):

    return dsl.ContainerOp(name='PhaseNet picking',
                           image="zhuwq0/phasenet:0.2",
                           command=['python'],
                           arguments=[
                             'predict.py',
                             '--model', "model/190703-214543",
                             '--data_dir', data_dir,
                             '--data_list', dsl.InputArgumentPath(data_list),
                             '--stations', dsl.InputArgumentPath(stations),
#                              '--result_dir', "results",
                             '--input_mseed'
                             ],
                           file_outputs = {"picks": "/opt/results/picks.csv"}
                         )

In [16]:
def gmma(data_path: str,
         idx: list,
         pick_list: InputPath(str),
         station_list: InputPath(str),
         catalog_path: OutputPath(str)):
    
    import pandas as pd
    from datetime import datetime, timedelta
    from gmma import mixture
    from tqdm import tqdm
    import numpy as np
    import os
    
    def read_picks(fpick):
        with open(fpick, "r") as fp:
            picks = pd.read_csv(fp)
        picks["time"] = picks["fname"].map(lambda x: x.split(".")[0])
        picks["station"] = picks["fname"].map(lambda x: ".".join(x.split(".")[1:]))
        picks["itp"] = picks["itp"].map(lambda x: [round(float(i)*dt, 2) for i in x.strip("[]").split(" ") if i != ""])
        picks["its"] = picks["its"].map(lambda x: [round(float(i)*dt, 2) for i in x.strip("[]").split(" ") if i != ""])
        picks["tp_prob"] = picks["tp_prob"].map(lambda x: [round(float(i), 2) for i in x.strip("[]").split(" ") if i != ""])
        picks["ts_prob"] = picks["ts_prob"].map(lambda x: [round(float(i), 2) for i in x.strip("[]").split(" ") if i != ""])
        return picks
        
    def convert_data(meta, dims):
        data = []
        locs = []
        phase_type = []
        phase_weight = []
        for i in range(len(meta)):
            for tp, tp_prob in zip(meta.iloc[i]["itp"], meta.iloc[i]["tp_prob"]):
                data.append(tp)
                locs.append(meta.iloc[i][dims].values.astype("float"))
                phase_type.append("p")
                phase_weight.append(tp_prob)
            for ts, ts_prob in zip(meta.iloc[i]["its"], meta.iloc[i]["ts_prob"]):
                data.append(ts)
                locs.append(meta.iloc[i][dims].values.astype("float"))
                phase_type.append("s")
                phase_weight.append(ts_prob)
        locs = np.array(locs)
        data = np.array(data)[:, np.newaxis]
        phase_weight = np.array(phase_weight)[:, np.newaxis]
        return data, locs, phase_type, phase_weight
    
    dt = 0.01
    vp = 6.0
    vs = vp/1.75
    dims = ['x(km)', 'y(km)']
    
    picks = read_picks(pick_list)
    
    with open(station_list) as fp:
        stations = pd.read_csv(fp, delimiter="\t")
    num_sta = len(stations)
#     events = pd.read_csv("events.csv", delimiter="\t")
    meta = pd.merge(stations, picks, on="station")
    time_intervals = sorted(list(set(meta["time"])))
    
    ## 
    eq_t = []
    eq_loc = []
    eq_std = []
    eq_prob = []
    # data_prob = []
    # data_score = []

    num = 0
    for t in tqdm(time_intervals):

        data, locs, phase_type, phase_weight = convert_data(meta[meta["time"] == t], dims)

        num_event = max(int(len(data)/num_sta*3.0), 4)
        centers_init = np.vstack([np.ones(num_event)*np.mean(locs[:,0]),
                                  np.ones(num_event)*np.mean(locs[:,1]),
        #                           np.ones(num_event)*0.0,
                                  np.linspace(data.min(), data.max(), num_event)]).T # n_eve, n_dim(x, y, z) + 1(t)

        gmm = mixture.GaussianMixture(n_components=num_event, covariance_type='full', 
                                      centers_init=centers_init.copy(), station_locs=locs, 
                                      phase_type=phase_type, phase_weight=phase_weight).fit(data) 
        pred = gmm.predict(data) 
        prob = gmm.predict_proba(data)
        prob_eq = prob.mean(axis=0)
        std_eq = gmm.covariances_.squeeze()
        ii = np.array([True if len(data[pred==i, 0]) > max(num_sta//2, 4) else False for i in range(len(prob_eq))]) & (prob_eq > 1/num_event) & (std_eq < 40)
        prob_data = prob[range(len(data)), pred]
        score_data = gmm.score_samples(data)

        result = gmm.centers_[ii,:]
        time = pd.Timestamp(t) + result[:, -1].astype('timedelta64[s]')
        loc = result[:, :-1]

        eq_t.append(time)
        eq_loc.append(loc)
        eq_std.append(std_eq[ii])
        eq_prob.append(prob_eq[ii]*num_event)
#         break
    
    eq_t = np.hstack(eq_t)
    eq_loc = np.vstack(eq_loc)
    eq_std = np.hstack(eq_std)
    eq_prob = np.hstack(eq_prob)
    # data_prob = np.hstack(data_prob)
    # data_score = np.hstack(data_score)
    catalog = {}
    catalog["time"] = eq_t
    for i, k in enumerate(dims):
        catalog[k] = eq_loc[:,i]
    catalog["prob"] = eq_prob
    catalog["std"] = eq_std
    catalog = pd.DataFrame(catalog, columns=["time"]+dims+["prob", "std"])
    
    with open(catalog_path, 'w') as fp:
        catalog.to_csv(fp, sep="\t", index=False, 
                       float_format="%.3f",
                       date_format='%Y-%m-%dT%H:%M:%S.%f')
    
    with open(os.path.join(data_path, f"catalog_{idx[0]:04d}.csv"), 'w') as fp:
        catalog.to_csv(fp, sep="\t", index=False, 
                       float_format="%.3f",
                       date_format='%Y-%m-%dT%H:%M:%S.%f')

In [17]:
gmma("test", [1], "picks.csv", "stations.csv", "catalog.csv")

100%|██████████| 48/48 [08:04<00:00, 10.09s/it]


In [18]:
gmma_op = comp.func_to_container_op(gmma, 
                                     base_image='python:3.8',
                                     packages_to_install= [
                                         "tqdm",
                                         "pandas",
                                         "numpy",
                                         "gmma"
                                     ])

In [19]:
def combine_catalog(data_path, 
                    catalog_path: OutputPath(str)):
    import pandas as pd
    from glob import glob
    import os
    catalog_list = list(sorted(glob(os.path.join(data_path, "catalog_*.csv"))))
    tmp = []
    for c in catalog_list:
        with open(c, 'r') as fp:
            tmp.append(pd.read_csv(fp, sep="\t"))
    catalog = pd.concat(tmp)
    with open(catalog_path, "w") as fp:
        catalog.to_csv(fp, sep="\t", index=False, 
                       float_format="%.3f",
                       date_format='%Y-%m-%dT%H:%M:%S.%f')

In [20]:
combine_catalog("./test", "catalog_final.csv")

In [30]:
combine_op = comp.func_to_container_op(combine_catalog, 
                                       base_image='python:3.8',
                                       packages_to_install= [
                                           "pandas",
                                       ])


In [32]:
# Define the pipeline
@dsl.pipeline(name='QuakeFlow', description='')
def quakeflow_pipeline(data_path: str, s3_url:str="localhost:9000", secure:bool=False):
    
#     s3_url = "10.3.252.218"
#     s3_url = "127.0.0.1"
#     pvop = dsl.VolumeOp(name="Create_volume",
#                        resource_name="data-volume", 
#                        size="10Gi", 
#                        modes=dsl.VOLUME_MODE_RWO).volume
    pvop = dsl.PipelineVolume(pvc="quakeflow-smw46-data-volume")
    
    config = config_op(data_path)#.add_pvolumes({data_path: vop.volume})
        
    events = download_events_op(data_path, config.outputs["config"])#.add_pvolumes({data_path: config.pvolume})
    
    stations = download_stations_op(data_path, config.outputs["config"])#.add_pvolumes({data_path: config.pvolume})

    with kfp.dsl.ParallelFor(config.outputs["output"]) as idx:
        download_op_ = download_waveform_op(data_path, idx, 
                                            config.outputs["config"], 
                                            config.outputs["datetime"], 
                                            stations.outputs["station"]
                                           ).add_pvolumes({data_path: pvop}).after(stations)
        phasenet_op_ = phasenet_op(download_op_.outputs["output"], 
                                   download_op_.outputs["fname_list"], 
                                   stations.outputs["station_list"]
                                   ).add_pvolumes({data_path: pvop}).after(download_op_)
        gmma_op_ = gmma_op(data_path, idx,
                          phasenet_op_.outputs["picks"],
                          stations.outputs["station_list"]
                          ).add_pvolumes({data_path: pvop}).after(phasenet_op_)

    combine_op_ = combine_op(data_path).add_pvolumes({data_path: pvop}).after(gmma_op_)
    combine_op_.execution_options.caching_strategy.max_cache_staleness = "P0D"

In [33]:
client = kfp.Client(host='https://91eef1af6962.ngrok.io/')
# client = kfp.Client(host='553ab00ece5a86e5-dot-us-west1.pipelines.googleusercontent.com')

In [34]:
experiment_name = 'QuakeFlow'
pipeline_func = quakeflow_pipeline
run_name = pipeline_func.__name__ + '_run'

arguments = {"data_path": "/tmp/",
#              "s3_url": "18663dd7908c.ngrok.io",
#              "secure": True
#              "s3_url": "10.3.248.45:9000",
#              "secure": False
            }

# Compile pipeline to generate compressed YAML definition of the pipeline.
kfp.compiler.Compiler().compile(pipeline_func, '{}.zip'.format(experiment_name))

# Submit pipeline directly from pipeline function
results = client.create_run_from_pipeline_func(pipeline_func, 
                                               experiment_name=experiment_name, 
                                               run_name=run_name, 
                                               arguments=arguments)