In [4]:
import mlrun

In [5]:
from mlrun.artifacts import DatasetArtifact,Artifact

In [3]:
from sklearn.datasets import load_breast_cancer
import pandas as pd
breast_cancer = load_breast_cancer()
breast_cancer_dataset = pd.DataFrame(
    data=breast_cancer.data, columns=breast_cancer.feature_names
)
breast_cancer_labels = pd.DataFrame(data=breast_cancer.target, columns=["label"])
breast_cancer_dataset = pd.concat(
    [breast_cancer_dataset, breast_cancer_labels], axis=1
)


In [6]:
project = mlrun.load_project('./')

In [7]:
# project.register_artifacts()
project.log_dataset('cancer-dataset',df=breast_cancer_dataset)

<mlrun.artifacts.dataset.DatasetArtifact at 0x7efd29c38150>

In [8]:
project.save()

<mlrun.projects.project.MlrunProject at 0x7efd29c68a10>

#### Project YAML

In [9]:
print(project.to_yaml())

kind: project
metadata:
  name: clone-test-shapira
spec:
  functions:
  - url: ./fetch_data.py
    name: fetch_data
    kind: job
    image: mlrun/mlrun
    handler: fetch_data
  - url: ./trainier.py
    name: trainer
    kind: job
    image: mlrun/mlrun
    handler: train
  - url: ./serving.yaml
    name: serving
  workflows:
  - path: workflow.py
    name: main
    schedule: '*/10 * * * *'
  artifacts:
  - kind: dataset
    metadata:
      project: clone-test-shapira
      key: cancer-dataset
    spec:
      format: csv
    status:
      state: created
  conda: ''
  source: git://github.com/GiladShapira94/Scheduled_project.git#refs/heads/master
  origin_url: git://github.com/GiladShapira94/Scheduled_project.git#refs/heads/master
  desired_state: online



In [10]:
dataset = project.get_artifact('cancer-dataset')

In [11]:
dataset.target_path

'v3io:///projects/clone-test-shapira/artifacts/cancer-dataset.parquet'

In [12]:
job_function_run = project.run_function('fetch_data',inputs={'dataset' : dataset.target_path})

> 2023-03-19 13:25:00,791 [info] starting run fetch-data-fetch-data uid=99655d8dc53b47abb4326ad07f235183 DB=http://mlrun-api:8080
> 2023-03-19 13:25:01,045 [info] Job is running in the background, pod: fetch-data-fetch-data-4rvsm
> 2023-03-19 13:25:06,113 [info] saving dataframe to s3
> 2023-03-19 13:25:06,519 [info] To track results use the CLI: {'info_cmd': 'mlrun get run 99655d8dc53b47abb4326ad07f235183 -p clone-test-shapira', 'logs_cmd': 'mlrun logs 99655d8dc53b47abb4326ad07f235183 -p clone-test-shapira'}
> 2023-03-19 13:25:06,519 [info] Or click for UI: {'ui_url': 'https://dashboard.default-tenant.app.cust-cs-il-3-5-2.iguazio-cd2.com/mlprojects/clone-test-shapira/jobs/monitor/99655d8dc53b47abb4326ad07f235183/overview'}
> 2023-03-19 13:25:06,519 [info] run executed, status=completed
final state: completed


project,uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
clone-test-shapira,...7f235183,0,Mar 19 13:25:04,completed,fetch-data-fetch-data,v3io_user=shapirakind=jobowner=shapiramlrun/client_version=1.3.0-rc35mlrun/client_python_version=3.7.6host=fetch-data-fetch-data-4rvsm,dataset,,,dataset





> 2023-03-19 13:25:07,289 [info] run executed, status=completed


In [13]:
job_function_run.outputs

{'dataset': 'store://artifacts/clone-test-shapira/fetch-data-fetch-data_dataset:99655d8dc53b47abb4326ad07f235183'}

In [14]:
trainer_run = project.run_function('trainer',inputs = {"dataset":job_function_run.outputs['dataset']},params = {"n_estimators": 100, "learning_rate": 1e-1, "max_depth": 3})

> 2023-03-19 13:25:07,393 [info] starting run trainer-train uid=ff4e0fba69254eeb8951e8054ad58da7 DB=http://mlrun-api:8080
> 2023-03-19 13:25:07,625 [info] Job is running in the background, pod: trainer-train-krsvx
> 2023-03-19 13:25:13,516 [info] To track results use the CLI: {'info_cmd': 'mlrun get run ff4e0fba69254eeb8951e8054ad58da7 -p clone-test-shapira', 'logs_cmd': 'mlrun logs ff4e0fba69254eeb8951e8054ad58da7 -p clone-test-shapira'}
> 2023-03-19 13:25:13,516 [info] Or click for UI: {'ui_url': 'https://dashboard.default-tenant.app.cust-cs-il-3-5-2.iguazio-cd2.com/mlprojects/clone-test-shapira/jobs/monitor/ff4e0fba69254eeb8951e8054ad58da7/overview'}
> 2023-03-19 13:25:13,517 [info] run executed, status=completed
final state: completed


project,uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
clone-test-shapira,...4ad58da7,0,Mar 19 13:25:11,completed,trainer-train,v3io_user=shapirakind=jobowner=shapiramlrun/client_version=1.3.0-rc35mlrun/client_python_version=3.7.6host=trainer-train-krsvx,dataset,n_estimators=100learning_rate=0.1max_depth=3,,model





> 2023-03-19 13:25:13,870 [info] run executed, status=completed


In [15]:
serving_func = project.deploy_function('serving',models=[{'key':'cancer-classifier','model_path':trainer_run.outputs["model"], 'class_name':'mlrun.frameworks.sklearn.SklearnModelServer'}])

> 2023-03-19 13:25:13,922 [info] Starting remote function deploy
2023-03-19 13:25:14  (info) Deploying function
2023-03-19 13:25:14  (info) Building
2023-03-19 13:25:14  (info) Staging files and preparing base images
2023-03-19 13:25:14  (info) Building processor image
2023-03-19 13:26:04  (info) Build complete
2023-03-19 13:26:12  (info) Function deploy complete
> 2023-03-19 13:26:14,694 [info] successfully deployed function: {'internal_invocation_urls': ['nuclio-clone-test-shapira-serving.default-tenant.svc.cluster.local:8080'], 'external_invocation_urls': ['']}


In [21]:
my_data = {"inputs"
           :[[
               1.371e+01, 2.083e+01, 9.020e+01, 5.779e+02, 1.189e-01, 1.645e-01,
               9.366e-02, 5.985e-02, 2.196e-01, 7.451e-02, 5.835e-01, 1.377e+00,
               3.856e+00, 5.096e+01, 8.805e-03, 3.029e-02, 2.488e-02, 1.448e-02,
               1.486e-02, 5.412e-03, 1.706e+01, 2.814e+01, 1.106e+02, 8.970e+02,
               1.654e-01, 3.682e-01, 2.678e-01, 1.556e-01, 3.196e-01, 1.151e-01]
            ]
}
serving_func.function.invoke("http://nuclio-clone-test-shapira-serving.default-tenant.svc.cluster.local:8080", body=my_data)

> 2023-03-19 13:28:57,946 [info] invoking function: {'method': 'POST', 'path': 'http://nuclio-clone-test-shapira-serving.default-tenant.svc.cluster.local:8080'}


{'id': '04ffca41-97a9-4af1-b4bf-2b73a4a54785',
 'model_name': 'cancer-classifier',
 'outputs': [0]}

In [24]:
project.run('main',schedule='0 * * * *')

> 2023-03-19 13:29:50,533 [info] executing workflow scheduling 'workflow-runner-main' remotely with kfp engine
> 2023-03-19 13:29:50,536 [info] starting run main uid=b955dca05a8b46529b337925ad055e04 DB=http://mlrun-api:8080
> 2023-03-19 13:29:50,835 [info] task scheduled, {'schedule': '0 * * * *', 'project': 'clone-test-shapira', 'name': 'main'}
