<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"></ul></div>

In [1]:
import kfp
import kfp.components as comp

In [8]:
%%writefile ./pipeline/pipeline.py

import kfp

merge_and_split_op = kfp.components.load_component_from_file("./kf_utils/merge_and_split_op.yaml")
preprocess_dataset_op = kfp.components.load_component_from_file("./kf_utils/preprocess_dataset_op.yaml")
prepare_data_op = kfp.components.load_component_from_file("./kf_utils/prepare_data_op.yaml")
train_svm_op = kfp.components.load_component_from_file("./kf_utils/train_svm_op.yaml")
train_randomforest_op = kfp.components.load_component_from_file("./kf_utils/train_randomforest_op.yaml")
train_xgb_op = kfp.components.load_component_from_file("./kf_utils/train_xgb_op.yaml")
evaluate_models_op = kfp.components.load_component_from_file("./kf_utils/evaluate_models_op.yaml")
train_best_model_op = kfp.components.load_component_from_file("./kf_utils/train_best_model_op.yaml")
model_predict_op = kfp.components.load_component_from_file("./kf_utils/model_predict_op.yaml")

@kfp.dsl.pipeline(
   name='Emission prediction pipeline',
   description='An example pipeline.'
)
def emission_pipeline(
    bucket,
    data_2015,
    data_2016,
    hyperopt_iterations,
    subfolder
):
    merge_and_split_task = merge_and_split_op(bucket, data_2015, data_2016)
    preprocess_task = preprocess_dataset_op(merge_and_split_task.outputs['output_edfcsv'])
    preparation_task = prepare_data_op(preprocess_task.outputs['output_cleandatacsv'])
    
    rf_train_task = train_randomforest_op(preparation_task.outputs['output_xtraincsv'],
                                         preparation_task.outputs['output_ytraincsv'],
                                         preparation_task.outputs['output_xtestcsv'],
                                         preparation_task.outputs['output_ytestcsv'],
                                         2)
    
    xgb_train_task = train_xgb_op(preparation_task.outputs['output_xtraincsv'],
                                 preparation_task.outputs['output_ytraincsv'],
                                 preparation_task.outputs['output_xtestcsv'],
                                 preparation_task.outputs['output_ytestcsv'],
                                 2)
    
    svm_train_task = train_svm_op(preparation_task.outputs['output_xtraincsv'],
                                 preparation_task.outputs['output_ytraincsv'],
                                 preparation_task.outputs['output_xtestcsv'],
                                 preparation_task.outputs['output_ytestcsv'],
                                 2)
    evaluate_models_task = evaluate_models_op(bucket,
                                              subfolder,
                                              svm_train_task.outputs['MSE'],
                                              svm_train_task.outputs['R2'],
                                              svm_train_task.outputs['hyperparams'],
                                              xgb_train_task.outputs['MSE'],
                                              xgb_train_task.outputs['R2'],
                                              xgb_train_task.outputs['hyperparams'],
                                              rf_train_task.outputs['MSE'],
                                              rf_train_task.outputs['R2'],
                                              rf_train_task.outputs['hyperparams']
                                             )
    
    train_best_model_task = train_best_model_op(evaluate_models_task.outputs['best_model'],
                                               evaluate_models_task.outputs['hyperparams'],
                                               preparation_task.outputs['output_xtraincsv'],
                                               preparation_task.outputs['output_ytraincsv'])
    
    model_predict_task = model_predict_op(train_best_model_task.outputs['output_pickle_model'],
                                          preparation_task.outputs['output_xtestcsv'])

Overwriting ./pipeline/pipeline.py


In [9]:
%%sh
dsl-compile --py pipeline/pipeline.py --output pipeline/pipeline.yaml

In [2]:
ENDPOINT = 'http://f3f6f1e6-istiosystem-istio-2af2-1570305981.eu-west-1.elb.amazonaws.com/pipeline'

In [6]:
authservice_session='authservice_session=MTY0MzYzNjI0NHxOd3dBTkZaTk16TktWVVJZV0VRMFNrUTBXRm8xTTFaRU5WaFZRa05ITlVOQlJFNUNXVkJETlZkRVNFeEROMHhIUmxSSVdqSlZRVkU9fMXwGJrw3YahHYcxrcQCmxVs2IY_ZkZJJmnKAa1VC0l1'
client = kfp.Client(host=ENDPOINT, cookies=authservice_session)

In [12]:
client = kfp.Client(host=ENDPOINT, other_client_id="admin@kubeflow.org", other_client_secret="12341234")

In [13]:
client.list_experiments(namespace="admin")

{'experiments': None, 'next_page_token': None, 'total_size': None}

In [86]:
upload_pipe = client.upload_pipeline(pipeline_package_path="./pipeline.yaml",
                      pipeline_name="test_from_jupyter_TESTDELETE",
                      description="frend")

In [92]:
import re

In [98]:
def get_id(text):
    match = re.search('{\'id\': \'(.+?)\',\\n', text)        
    if match:
        found = match.group(1)
        return(found)

In [101]:
get_id(str(upload_pipe))

'd6c10b81-2d78-41fa-8343-97c0c4065553'

In [56]:
experiment_id = 'fe0390c9-a311-4248-89e2-72522f17c26c'
job_name = 'run_from_ipynb'
pipeline_id = '3ac54c42-f463-49cb-9cf3-8b1eb14b7eae'
version_id = '1'
params = {'bucket' : 'sgf-wedr-src-data',
        'data_2015' : 'temptest/2015-building-energy-benchmarking.csv',
        'data_2016' : 'temptest/2016-building-energy-benchmarking.csv',
        'hyperopt_iterations' : '1',
        'subfolder' : 'temptest'}

In [58]:
client.run_pipeline(experiment_id=experiment_id,
                   job_name=job_name,
                   params=params,
                   pipeline_id=pipeline_id)

{'created_at': datetime.datetime(2022, 1, 28, 14, 46, 38, tzinfo=tzutc()),
 'description': None,
 'error': None,
 'finished_at': datetime.datetime(1970, 1, 1, 0, 0, tzinfo=tzutc()),
 'id': 'a3f90e7c-1376-46f4-8ccb-d95ab15d2d06',
 'metrics': None,
 'name': 'run_from_ipynb',
 'pipeline_spec': {'parameters': [{'name': 'bucket',
                                   'value': 'sgf-wedr-src-data'},
                                  {'name': 'data_2015',
                                   'value': 'temptest/2015-building-energy-benchmarking.csv'},
                                  {'name': 'data_2016',
                                   'value': 'temptest/2016-building-energy-benchmarking.csv'},
                                  {'name': 'hyperopt_iterations', 'value': '1'},
                                  {'name': 'subfolder', 'value': 'temptest'}],
                   'pipeline_id': '3ac54c42-f463-49cb-9cf3-8b1eb14b7eae',
                   'pipeline_manifest': None,
                   'pipel

In [102]:
from datetime import datetime
today = str(datetime.now())
version_name = "update-pipeline-" + today
desc = 'updated on {} from jupyter'.format(str(date.today()))

updade_pipe = client.upload_pipeline_version(pipeline_package_path="./pipeline.yaml",
                              pipeline_version_name=version_name,
                               pipeline_name = "test_from_jupyter_NEW")
                              #pipeline_id='bc6f946d-30ce-453c-b08e-84030f87163d')

In [103]:
get_id(str(updade_pipe))

'bc6f946d-30ce-453c-b08e-84030f87163d'

In [None]:
Signature:
kfp.Client.upload_pipeline_version(
    self,
    pipeline_package_path,
    pipeline_version_name: str,
    pipeline_id: Union[str, NoneType] = None,
    pipeline_name: Union[str, NoneType] = None,
    description: Union[str, NoneType] = None,
) -> kfp_server_api.models.api_pipeline_version.ApiPipelineVersion
Docstring:
Uploads a new version of the pipeline to the Kubeflow Pipelines cluster.

Args:
  pipeline_package_path: Local path to the pipeline package.
  pipeline_version_name:  Name of the pipeline version to be shown in the UI.
  pipeline_id: Optional. Id of the pipeline.
  pipeline_name: Optional. Name of the pipeline.
  description: Optional. Description of the pipeline version to be shown in the UI.

In [66]:
from datetime import date

today = date.today()
today = str(today.year) + "-" + str(today.month) + "-" + str(today.day)

In [70]:
str(date.today())

'2022-01-28'

In [82]:
from datetime import datetime

In [83]:
str(datetime.now())

'2022-01-28 16:57:52.701631'

In [21]:
import urllib, cookiejar

username = 'admin@kubeflow.org'
password = '12341234'

cj = cookiejar.CookieJar()
opener = urllib.build_opener(urllib2.HTTPCookieProcessor(cj))
login_data = urllib.urlencode({'username' : username, 'j_password' : password})
opener.open('http://f3f6f1e6-istiosystem-istio-2af2-1570305981.eu-west-1.elb.amazonaws.com/dex/auth/local?req=dvcqbyl6ybhhxb3ldce7evhub', login_data)
resp = opener.open('http://f3f6f1e6-istiosystem-istio-2af2-1570305981.eu-west-1.elb.amazonaws.com/?ns=admin')
print(resp.read())

AttributeError: module 'cookiejar' has no attribute 'CookieJar'

In [30]:
from requests import session
import requests
import sys

EMAIL = 'admin@kubeflow.org'
PASSWORD = '12341234'

URL = 'http://f3f6f1e6-istiosystem-istio-2af2-1570305981.eu-west-1.elb.amazonaws.com/'

def main():
    # Start a session so we can have persistant cookies
    session = requests.session()

    # This is the form data that the page sends when logging in
    login_data = {
        'login': EMAIL,
        'password': PASSWORD,
        'submit': 'login',
    }

    # Authenticate
    r = session.post(URL, data=login_data)

    # Try accessing a page that requires you to be logged in
    r = session.get('http://f3f6f1e6-istiosystem-istio-2af2-1570305981.eu-west-1.elb.amazonaws.com/_/jupyter/?ns=admin')

    print(r)
    
main()

<Response [200]>


In [39]:
def get_cookie(text):
    """
    Function that retrieves login cookie

    Parameters
    ----------
    text : str
        string version of the logs.

    Returns
    -------
    str : cookie value.

    """
    match = re.search('authservice_session=(.+?) ', text)        
    if match:
        found = match.group(1)
        return(found)

In [43]:
import mechanize
from bs4 import BeautifulSoup
import urllib
import http.cookiejar as cookielib ## http.cookiejar in python3
import re

cj = cookielib.CookieJar()
br = mechanize.Browser()
br.set_cookiejar(cj)
br.open(URL)

br.select_form(nr=0)
br.form['login'] = EMAIL
br.form['password'] = PASSWORD
br.submit()

get_cookie(str(cj))

'MTY0MzYzOTU3OHxOd3dBTkROV1JWTlBXbFpRVGxoVFVsUlBSa2hNU3pORFNUYzBVRFZXTTB0WFVWWTBXa1ZEVkZvM04wTkpSVkkwU2s1QlZGVkVVa0U9fEa0lCWonkFmWsW11VLQG3F4N56mhdtJTSO5UTEDQK_3'

In [44]:
str(cj)

'<CookieJar[<Cookie authservice_session=MTY0MzYzOTU3OHxOd3dBTkROV1JWTlBXbFpRVGxoVFVsUlBSa2hNU3pORFNUYzBVRFZXTTB0WFVWWTBXa1ZEVkZvM04wTkpSVkkwU2s1QlZGVkVVa0U9fEa0lCWonkFmWsW11VLQG3F4N56mhdtJTSO5UTEDQK_3 for f3f6f1e6-istiosystem-istio-2af2-1570305981.eu-west-1.elb.amazonaws.com/>]>'

In [45]:
authservice_session='authservice_session=MTY0MzYzOTU3OHxOd3dBTkROV1JWTlBXbFpRVGxoVFVsUlBSa2hNU3pORFNUYzBVRFZXTTB0WFVWWTBXa1ZEVkZvM04wTkpSVkkwU2s1QlZGVkVVa0U9fEa0lCWonkFmWsW11VLQG3F4N56mhdtJTSO5UTEDQK_3'
client = kfp.Client(host=ENDPOINT, cookies=authservice_session)
client.list_experiments(namespace="admin")

{'experiments': [{'created_at': datetime.datetime(2022, 1, 17, 10, 59, 39, tzinfo=tzutc()),
                  'description': None,
                  'id': '4786bc8c-05be-4594-a6c2-036690d963e7',
                  'name': 'test',
                  'resource_references': [{'key': {'id': 'admin',
                                                   'type': 'NAMESPACE'},
                                           'name': None,
                                           'relationship': 'OWNER'}],
                  'storage_state': 'STORAGESTATE_AVAILABLE'},
                 {'created_at': datetime.datetime(2022, 1, 21, 19, 4, 1, tzinfo=tzutc()),
                  'description': None,
                  'id': '743a78d1-3f92-4571-810f-1be5f2339188',
                  'name': 'exp1',
                  'resource_references': [{'key': {'id': 'admin',
                                                   'type': 'NAMESPACE'},
                                           'name': None,
                    