diff --git a/cli/jobs/automl-standalone-jobs/cli-automl-classification-task-bankmarketing/cli-automl-classification-task-bankmarketing-serverless.yml b/cli/jobs/automl-standalone-jobs/cli-automl-classification-task-bankmarketing/cli-automl-classification-task-bankmarketing-serverless.yml new file mode 100644 index 0000000000..c6596f0c47 --- /dev/null +++ b/cli/jobs/automl-standalone-jobs/cli-automl-classification-task-bankmarketing/cli-automl-classification-task-bankmarketing-serverless.yml @@ -0,0 +1,53 @@ +$schema: https://azuremlsdk2.blob.core.windows.net/preview/0.0.1/autoMLJob.schema.json +type: automl +experiment_name: dpv2-cli-automl-classifier-experiment +description: A Classification job using bank marketing +# Serverless compute is used to run this AutoML job. +# Through serverless compute, Azure Machine Learning takes care of creating, scaling, deleting, patching and managing compute, along with providing managed network isolation, reducing the burden on you. + +task: classification +log_verbosity: debug +primary_metric: accuracy + +target_column_name: "y" + +#validation_data_size: 0.20 +#n_cross_validations: 5 +#test_data_size: 0.1 + +training_data: + path: "./training-mltable-folder" + type: mltable +validation_data: + path: "./validation-mltable-folder" + type: mltable +test_data: + path: "./test-mltable-folder" + type: mltable + +limits: + timeout_minutes: 180 + max_trials: 40 + max_concurrent_trials: 5 + trial_timeout_minutes: 20 + enable_early_termination: true + exit_score: 0.92 + +featurization: + mode: custom + transformer_params: + imputer: + - fields: ["job"] + parameters: + strategy: most_frequent + blocked_transformers: + - WordEmbedding +training: + enable_model_explainability: true + allowed_training_algorithms: + - gradient_boosting + - logistic_regression +# Resources to run this serverless job +resources: + instance_type="Standard_E4s_v3" + instance_count=5 diff --git a/cli/jobs/automl-standalone-jobs/cli-automl-forecasting-orange-juice-sales/cli-automl-forecasting-orange-juice-sales.yml b/cli/jobs/automl-standalone-jobs/cli-automl-forecasting-orange-juice-sales/cli-automl-forecasting-orange-juice-sales.yml index fdb80711fd..03a13fc4bb 100644 --- a/cli/jobs/automl-standalone-jobs/cli-automl-forecasting-orange-juice-sales/cli-automl-forecasting-orange-juice-sales.yml +++ b/cli/jobs/automl-standalone-jobs/cli-automl-forecasting-orange-juice-sales/cli-automl-forecasting-orange-juice-sales.yml @@ -4,7 +4,6 @@ type: automl experiment_name: dpv2-cli-automl-forecasting-orange-juice-sales # name: dpv2-sdk-forecasting-train-job-01 description: A Time-Series Forecasting job using orange juice sales dataset -compute: azureml:cpu-cluster task: forecasting primary_metric: normalized_root_mean_squared_error log_verbosity: info @@ -54,4 +53,4 @@ forecasting: training: enable_model_explainability: true enable_stack_ensemble: false - blocked_training_algorithms: [] \ No newline at end of file + blocked_training_algorithms: [] diff --git a/cli/jobs/automl-standalone-jobs/cli-automl-forecasting-task-github-dau/cli-automl-forecasting-task-github-dau.yml b/cli/jobs/automl-standalone-jobs/cli-automl-forecasting-task-github-dau/cli-automl-forecasting-task-github-dau.yml index 600916fda1..4d2bb84b05 100644 --- a/cli/jobs/automl-standalone-jobs/cli-automl-forecasting-task-github-dau/cli-automl-forecasting-task-github-dau.yml +++ b/cli/jobs/automl-standalone-jobs/cli-automl-forecasting-task-github-dau/cli-automl-forecasting-task-github-dau.yml @@ -4,7 +4,6 @@ type: automl experiment_name: dpv2-cli-automl-forecasting-github-dau-experiment description: A Time-Series Forecasting job using Github DAU dataset that trains only the TCNForecaster model. -compute: azureml:automl-gpu-cluster task: forecasting primary_metric: normalized_root_mean_squared_error @@ -33,3 +32,6 @@ training: enable_stack_ensemble: false allowed_training_algorithms: - TCNForecaster +resources: + instance_type: Standard_E4s_v3 + instance_count: 4 diff --git a/cli/jobs/basics/hello-automl/hello-automl-job-basic.yml b/cli/jobs/basics/hello-automl/hello-automl-job-basic.yml index 98d9a094a4..26819e18bf 100644 --- a/cli/jobs/basics/hello-automl/hello-automl-job-basic.yml +++ b/cli/jobs/basics/hello-automl/hello-automl-job-basic.yml @@ -5,7 +5,6 @@ experiment_name: dpv2-cli-automl-classifier-experiment # name: dpv2-cli-classifier-train-job-basic-01 description: A Classification job using bank marketing -compute: azureml:cpu-cluster task: classification primary_metric: accuracy diff --git a/cli/jobs/basics/hello-code.yml b/cli/jobs/basics/hello-code.yml index cc06597dcc..4664863131 100644 --- a/cli/jobs/basics/hello-code.yml +++ b/cli/jobs/basics/hello-code.yml @@ -3,4 +3,3 @@ command: ls code: src environment: image: library/python:latest -compute: azureml:cpu-cluster diff --git a/cli/jobs/basics/hello-data-uri-folder.yml b/cli/jobs/basics/hello-data-uri-folder.yml index 7d64be86a5..10fdbbf1c4 100644 --- a/cli/jobs/basics/hello-data-uri-folder.yml +++ b/cli/jobs/basics/hello-data-uri-folder.yml @@ -8,4 +8,3 @@ inputs: path: azureml:local-folder-example@latest mode: ro_mount environment: azureml:AzureML-sklearn-1.0-ubuntu20.04-py38-cpu@latest -compute: azureml:cpu-cluster diff --git a/cli/jobs/basics/hello-dataset.yml b/cli/jobs/basics/hello-dataset.yml index b5aa3fa6d5..e0dbaf8f0a 100644 --- a/cli/jobs/basics/hello-dataset.yml +++ b/cli/jobs/basics/hello-dataset.yml @@ -8,4 +8,3 @@ inputs: path: azureml:sampledata@latest mode: ro_mount environment: azureml:AzureML-sklearn-1.0-ubuntu20.04-py38-cpu@latest -compute: azureml:cpu-cluster \ No newline at end of file diff --git a/cli/jobs/basics/hello-git.yml b/cli/jobs/basics/hello-git.yml index 8a9c823545..8fdffc9d97 100644 --- a/cli/jobs/basics/hello-git.yml +++ b/cli/jobs/basics/hello-git.yml @@ -4,4 +4,3 @@ command: >- code: src environment: image: library/python:latest -compute: azureml:cpu-cluster \ No newline at end of file diff --git a/cli/jobs/basics/hello-interactive.yml b/cli/jobs/basics/hello-interactive.yml index 443234e213..350c20906e 100644 --- a/cli/jobs/basics/hello-interactive.yml +++ b/cli/jobs/basics/hello-interactive.yml @@ -2,7 +2,6 @@ $schema: https://azuremlschemas.azureedge.net/latest/commandJob.schema.json command: python hello-interactive.py && sleep 600 code: src environment: azureml:AzureML-tensorflow-2.7-ubuntu20.04-py38-cuda11-gpu@latest -compute: azureml:cpu-cluster services: my_vscode: @@ -15,4 +14,4 @@ services: # my_ssh: # type: tensor_board # ssh_public_keys: -# nodes: all # Use the `nodes` property for a distributed job to run interactive services on all nodes. If `nodes` are not selected, by default, interactive applications are only enabled on the head node. \ No newline at end of file +# nodes: all # Use the `nodes` property for a distributed job to run interactive services on all nodes. If `nodes` are not selected, by default, interactive applications are only enabled on the head node. diff --git a/cli/jobs/basics/hello-iris-datastore-file.yml b/cli/jobs/basics/hello-iris-datastore-file.yml index a833dff512..eb17d4e9c7 100644 --- a/cli/jobs/basics/hello-iris-datastore-file.yml +++ b/cli/jobs/basics/hello-iris-datastore-file.yml @@ -8,4 +8,3 @@ inputs: type: uri_file path: azureml://datastores/workspaceblobstore/paths/example-data/iris.csv environment: azureml:AzureML-sklearn-1.0-ubuntu20.04-py38-cpu@latest -compute: azureml:cpu-cluster diff --git a/cli/jobs/basics/hello-iris-datastore-folder.yml b/cli/jobs/basics/hello-iris-datastore-folder.yml index 6ee528c910..73c83b9cc4 100644 --- a/cli/jobs/basics/hello-iris-datastore-folder.yml +++ b/cli/jobs/basics/hello-iris-datastore-folder.yml @@ -9,4 +9,3 @@ inputs: type: uri_folder path: azureml://datastores/workspaceblobstore/paths/example-data/ environment: azureml:AzureML-sklearn-1.0-ubuntu20.04-py38-cpu@latest -compute: azureml:cpu-cluster diff --git a/cli/jobs/basics/hello-iris-file.yml b/cli/jobs/basics/hello-iris-file.yml index d4350fb152..3ccbe5d630 100644 --- a/cli/jobs/basics/hello-iris-file.yml +++ b/cli/jobs/basics/hello-iris-file.yml @@ -8,4 +8,3 @@ inputs: type: uri_file path: https://azuremlexamples.blob.core.windows.net/datasets/iris.csv environment: azureml:AzureML-sklearn-1.0-ubuntu20.04-py38-cpu@latest -compute: azureml:cpu-cluster diff --git a/cli/jobs/basics/hello-iris-folder.yml b/cli/jobs/basics/hello-iris-folder.yml index 3a0c866e97..cb4a4d6ccf 100644 --- a/cli/jobs/basics/hello-iris-folder.yml +++ b/cli/jobs/basics/hello-iris-folder.yml @@ -9,4 +9,3 @@ inputs: type: uri_folder path: wasbs://datasets@azuremlexamples.blob.core.windows.net/ environment: azureml:AzureML-sklearn-1.0-ubuntu20.04-py38-cpu@latest -compute: azureml:cpu-cluster diff --git a/cli/jobs/basics/hello-iris-literal.yml b/cli/jobs/basics/hello-iris-literal.yml index 0f0025dc9b..d2dd08f846 100644 --- a/cli/jobs/basics/hello-iris-literal.yml +++ b/cli/jobs/basics/hello-iris-literal.yml @@ -7,4 +7,3 @@ inputs: type: uri_file iris_csv: https://azuremlexamples.blob.core.windows.net/datasets/iris.csv environment: azureml:AzureML-sklearn-1.0-ubuntu20.04-py38-cpu@latest -compute: azureml:cpu-cluster diff --git a/cli/jobs/basics/hello-mlflow.yml b/cli/jobs/basics/hello-mlflow.yml index c9788bd1f7..0944266c5b 100644 --- a/cli/jobs/basics/hello-mlflow.yml +++ b/cli/jobs/basics/hello-mlflow.yml @@ -2,4 +2,3 @@ $schema: https://azuremlschemas.azureedge.net/latest/commandJob.schema.json command: python hello-mlflow.py code: src environment: azureml:AzureML-sklearn-1.0-ubuntu20.04-py38-cpu@latest -compute: azureml:cpu-cluster diff --git a/cli/jobs/basics/hello-model-as-input.yml b/cli/jobs/basics/hello-model-as-input.yml index 728fd12156..f1b7fc00b7 100644 --- a/cli/jobs/basics/hello-model-as-input.yml +++ b/cli/jobs/basics/hello-model-as-input.yml @@ -13,4 +13,3 @@ inputs: type: mlflow_model # List of all model types here: https://learn.microsoft.com/azure/machine-learning/reference-yaml-model#yaml-syntax path: ../../assets/model/mlflow-model environment: azureml:AzureML-sklearn-1.0-ubuntu20.04-py38-cpu@latest -compute: azureml:cpu-cluster diff --git a/cli/jobs/basics/hello-model-as-output.yml b/cli/jobs/basics/hello-model-as-output.yml index f1c6425104..aaba2c14e8 100644 --- a/cli/jobs/basics/hello-model-as-output.yml +++ b/cli/jobs/basics/hello-model-as-output.yml @@ -20,4 +20,3 @@ outputs: output_folder: type: custom_model # mlflow_model,custom_model, triton_model environment: azureml:AzureML-sklearn-1.0-ubuntu20.04-py38-cpu@latest -compute: azureml:cpu-cluster \ No newline at end of file diff --git a/cli/jobs/basics/hello-notebook.yml b/cli/jobs/basics/hello-notebook.yml index 7d6a31b7b0..e6455d4599 100644 --- a/cli/jobs/basics/hello-notebook.yml +++ b/cli/jobs/basics/hello-notebook.yml @@ -5,4 +5,3 @@ command: | code: src environment: image: library/python:latest -compute: azureml:cpu-cluster diff --git a/cli/jobs/basics/hello-pipeline-abc-serverless.yml b/cli/jobs/basics/hello-pipeline-abc-serverless.yml new file mode 100644 index 0000000000..21626cbeca --- /dev/null +++ b/cli/jobs/basics/hello-pipeline-abc-serverless.yml @@ -0,0 +1,27 @@ +$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json +type: pipeline +display_name: hello_pipeline_abc +# Serverless compute is used to run this pipeline job. +# Through serverless compute, Azure Machine Learning takes care of creating, scaling, deleting, patching and managing compute, along with providing managed network isolation, reducing the burden on you. +settings: + default_compute: azureml:serverless + +inputs: + hello_string_top_level_input: "hello world" +jobs: + a: + command: echo hello ${{inputs.hello_string}} + environment: azureml:AzureML-sklearn-1.0-ubuntu20.04-py38-cpu@latest + inputs: + hello_string: ${{parent.inputs.hello_string_top_level_input}} + b: + command: echo "world" >> ${{outputs.world_output}}/world.txt + environment: azureml:AzureML-sklearn-1.0-ubuntu20.04-py38-cpu@latest + outputs: + world_output: + c: + command: echo ${{inputs.world_input}}/world.txt + environment: azureml:AzureML-sklearn-1.0-ubuntu20.04-py38-cpu@latest + inputs: + world_input: ${{parent.jobs.b.outputs.world_output}} + diff --git a/cli/jobs/basics/hello-pipeline-customize-output-file-serverless.yml b/cli/jobs/basics/hello-pipeline-customize-output-file-serverless.yml new file mode 100644 index 0000000000..43e4ec7670 --- /dev/null +++ b/cli/jobs/basics/hello-pipeline-customize-output-file-serverless.yml @@ -0,0 +1,19 @@ +$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json +type: pipeline +display_name: hello_pipeline_customize_output_file +# Serverless compute is used to run this pipeline job. +# Through serverless compute, Azure Machine Learning takes care of creating, scaling, deleting, patching and managing compute, along with providing managed network isolation, reducing the burden on you. +settings: + default_compute: azureml:serverless + +outputs: + output: + type: uri_file + path: azureml://datastores/workspaceblobstore/paths/${{name}}/hello_world.txt + mode: rw_mount +jobs: + hello_world: + command: echo "hello" && echo "world" > ${{outputs.output}} + environment: azureml:AzureML-sklearn-1.0-ubuntu20.04-py38-cpu@latest + outputs: + output: ${{parent.outputs.output}} diff --git a/cli/jobs/basics/hello-pipeline-customize-output-folder-serverless.yml b/cli/jobs/basics/hello-pipeline-customize-output-folder-serverless.yml new file mode 100644 index 0000000000..081dd38e2e --- /dev/null +++ b/cli/jobs/basics/hello-pipeline-customize-output-folder-serverless.yml @@ -0,0 +1,15 @@ +$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json +type: pipeline +display_name: hello_pipeline_customize_output_folder +# Serverless compute is used to run this pipeline job. +# Through serverless compute, Azure Machine Learning takes care of creating, scaling, deleting, patching and managing compute, along with providing managed network isolation, reducing the burden on you. +settings: + default_compute: azureml:serverless +jobs: + hello_world: + command: echo "hello" && echo "world" > ${{outputs.output}}/hello_world-folder.txt + environment: azureml:AzureML-sklearn-1.0-ubuntu20.04-py38-cpu@latest + outputs: + output: + type: uri_folder + path: azureml://datastores/workspaceblobstore/paths/${{name}}/ diff --git a/cli/jobs/basics/hello-pipeline-default-artifacts-serverless.yml b/cli/jobs/basics/hello-pipeline-default-artifacts-serverless.yml new file mode 100644 index 0000000000..94ba27ee35 --- /dev/null +++ b/cli/jobs/basics/hello-pipeline-default-artifacts-serverless.yml @@ -0,0 +1,19 @@ +$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json +type: pipeline +display_name: hello_pipeline_default_artifacts +# Serverless compute is used to run this pipeline job. +# Through serverless compute, Azure Machine Learning takes care of creating, scaling, deleting, patching and managing compute, along with providing managed network isolation, reducing the burden on you. +settings: + default_compute: azureml:serverless +jobs: + hello_job: + command: echo "hello" && echo "world" > ./outputs/world.txt + environment: azureml:AzureML-sklearn-1.0-ubuntu20.04-py38-cpu@latest + outputs: + artifacts: + world_job: + command: cat ${{inputs.world_input}}/outputs/world.txt + environment: azureml:AzureML-sklearn-1.0-ubuntu20.04-py38-cpu@latest + inputs: + world_input: ${{parent.jobs.hello_job.outputs.artifacts}} + diff --git a/cli/jobs/basics/hello-pipeline-io-serverless.yml b/cli/jobs/basics/hello-pipeline-io-serverless.yml new file mode 100644 index 0000000000..39c438634d --- /dev/null +++ b/cli/jobs/basics/hello-pipeline-io-serverless.yml @@ -0,0 +1,19 @@ +$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json +type: pipeline +display_name: hello_pipeline_io +# Serverless compute is used to run this pipeline job. +# Through serverless compute, Azure Machine Learning takes care of creating, scaling, deleting, patching and managing compute, along with providing managed network isolation, reducing the burden on you. +settings: + default_compute: azureml:serverless +jobs: + hello_job: + command: echo "hello" && echo "world" > ${{outputs.world_output}}/world.txt + environment: azureml:AzureML-sklearn-1.0-ubuntu20.04-py38-cpu@latest + outputs: + world_output: + world_job: + command: cat ${{inputs.world_input}}/world.txt + environment: azureml:AzureML-sklearn-1.0-ubuntu20.04-py38-cpu:1 + inputs: + world_input: ${{parent.jobs.hello_job.outputs.world_output}} + diff --git a/cli/jobs/basics/hello-pipeline-serverless.yml b/cli/jobs/basics/hello-pipeline-serverless.yml new file mode 100644 index 0000000000..063bea99e5 --- /dev/null +++ b/cli/jobs/basics/hello-pipeline-serverless.yml @@ -0,0 +1,15 @@ +$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json +type: pipeline +display_name: hello_pipeline +# Serverless compute is used to run this pipeline job. +# Through serverless compute, Azure Machine Learning takes care of creating, scaling, deleting, patching and managing compute, along with providing managed network isolation, reducing the burden on you. +settings: + default_compute: azureml:serverless +jobs: + hello_job: + command: echo "hello" + environment: azureml:AzureML-sklearn-1.0-ubuntu20.04-py38-cpu@latest + world_job: + command: echo "world" + environment: azureml:AzureML-sklearn-1.0-ubuntu20.04-py38-cpu@latest + diff --git a/cli/jobs/basics/hello-pipeline-settings-serverless.yml b/cli/jobs/basics/hello-pipeline-settings-serverless.yml new file mode 100644 index 0000000000..3c09418e9c --- /dev/null +++ b/cli/jobs/basics/hello-pipeline-settings-serverless.yml @@ -0,0 +1,15 @@ +$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json +type: pipeline +display_name: hello_pipeline_settings +# Serverless compute is used to run this pipeline job. +# Through serverless compute, Azure Machine Learning takes care of creating, scaling, deleting, patching and managing compute, along with providing managed network isolation, reducing the burden on you. +settings: + default_datastore: azureml:workspaceblobstore + default_compute: azureml:serverless +jobs: + hello_job: + command: echo 202204190 & echo "hello" + environment: azureml:AzureML-sklearn-1.0-ubuntu20.04-py38-cpu:1 + world_job: + command: echo 202204190 & echo "hello" + environment: azureml:AzureML-sklearn-1.0-ubuntu20.04-py38-cpu:1 diff --git a/cli/jobs/basics/hello-sweep.yml b/cli/jobs/basics/hello-sweep.yml index 6c9bcbbdcd..7821d2fc86 100644 --- a/cli/jobs/basics/hello-sweep.yml +++ b/cli/jobs/basics/hello-sweep.yml @@ -10,7 +10,6 @@ trial: environment: azureml:AzureML-sklearn-1.0-ubuntu20.04-py38-cpu@latest inputs: A: 0.5 -compute: azureml:cpu-cluster sampling_algorithm: random search_space: B: diff --git a/cli/jobs/basics/hello-world-env-var.yml b/cli/jobs/basics/hello-world-env-var.yml index ed2313ce0f..e74de2d9e0 100644 --- a/cli/jobs/basics/hello-world-env-var.yml +++ b/cli/jobs/basics/hello-world-env-var.yml @@ -2,6 +2,5 @@ $schema: https://azuremlschemas.azureedge.net/latest/commandJob.schema.json command: echo $hello_env_var environment: image: library/python:latest -compute: azureml:cpu-cluster environment_variables: hello_env_var: "hello world" diff --git a/cli/jobs/basics/hello-world-input.yml b/cli/jobs/basics/hello-world-input.yml index 6e76889299..4b0e1308a5 100644 --- a/cli/jobs/basics/hello-world-input.yml +++ b/cli/jobs/basics/hello-world-input.yml @@ -7,4 +7,3 @@ environment: inputs: hello_string: "hello world" hello_number: 42 -compute: azureml:cpu-cluster diff --git a/cli/jobs/basics/hello-world-org.yml b/cli/jobs/basics/hello-world-org.yml index 29599f7bac..be5ef923c8 100644 --- a/cli/jobs/basics/hello-world-org.yml +++ b/cli/jobs/basics/hello-world-org.yml @@ -2,7 +2,6 @@ $schema: https://azuremlschemas.azureedge.net/latest/commandJob.schema.json command: echo "hello world" environment: image: library/python:latest -compute: azureml:cpu-cluster tags: hello: world display_name: hello-world-example diff --git a/cli/jobs/basics/hello-world-output-data.yml b/cli/jobs/basics/hello-world-output-data.yml index bd995eb495..84104a35e3 100644 --- a/cli/jobs/basics/hello-world-output-data.yml +++ b/cli/jobs/basics/hello-world-output-data.yml @@ -4,4 +4,3 @@ outputs: hello_output: environment: image: python -compute: azureml:cpu-cluster diff --git a/cli/jobs/basics/hello-world-output.yml b/cli/jobs/basics/hello-world-output.yml index 8db7888e4c..0c734455d6 100644 --- a/cli/jobs/basics/hello-world-output.yml +++ b/cli/jobs/basics/hello-world-output.yml @@ -2,4 +2,3 @@ $schema: https://azuremlschemas.azureedge.net/latest/commandJob.schema.json command: echo "hello world" > ./outputs/helloworld.txt environment: image: library/python:latest -compute: azureml:cpu-cluster diff --git a/cli/jobs/basics/hello-world.yml b/cli/jobs/basics/hello-world.yml index 8941f94f59..59eb0f4061 100644 --- a/cli/jobs/basics/hello-world.yml +++ b/cli/jobs/basics/hello-world.yml @@ -2,4 +2,3 @@ $schema: https://azuremlschemas.azureedge.net/latest/commandJob.schema.json command: echo "hello world" environment: image: library/python:latest -compute: azureml:cpu-cluster diff --git a/cli/jobs/pipelines-with-components/basics/1b_e2e_registered_components/pipeline-serverless.yml b/cli/jobs/pipelines-with-components/basics/1b_e2e_registered_components/pipeline-serverless.yml new file mode 100644 index 0000000000..437498ca7d --- /dev/null +++ b/cli/jobs/pipelines-with-components/basics/1b_e2e_registered_components/pipeline-serverless.yml @@ -0,0 +1,66 @@ +$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json +type: pipeline +display_name: 1b_e2e_registered_components +description: E2E dummy train-score-eval pipeline with registered components +# Serverless compute is used to run this pipeline job. +# Through serverless compute, Azure Machine Learning takes care of creating, scaling, deleting, patching and managing compute, along with providing managed network isolation, reducing the burden on you. +inputs: + pipeline_job_training_max_epocs: 20 + pipeline_job_training_learning_rate: 1.8 + pipeline_job_learning_rate_schedule: 'time-based' + +outputs: + pipeline_job_trained_model: + mode: upload + pipeline_job_scored_data: + mode: upload + pipeline_job_evaluation_report: + mode: upload + +settings: + default_compute: azureml:serverless + +jobs: + train_job: + type: command + component: azureml:my_train@latest + inputs: + training_data: + type: uri_folder + path: ./data + max_epocs: ${{parent.inputs.pipeline_job_training_max_epocs}} + learning_rate: ${{parent.inputs.pipeline_job_training_learning_rate}} + learning_rate_schedule: ${{parent.inputs.pipeline_job_learning_rate_schedule}} + outputs: + model_output: ${{parent.outputs.pipeline_job_trained_model}} + services: + my_vscode: + type: vs_code + my_jupyter_lab: + type: jupyter_lab + my_tensorboard: + type: tensor_board + log_dir: "outputs/tblogs" + # my_ssh: + # type: tensor_board + # ssh_public_keys: + # nodes: all # Use the `nodes` property to pick which node you want to enable interactive services on. If `nodes` are not selected, by default, interactive applications are only enabled on the head node. + + score_job: + type: command + component: azureml:my_score@latest + inputs: + model_input: ${{parent.jobs.train_job.outputs.model_output}} + test_data: + type: uri_folder + path: ./data + outputs: + score_output: ${{parent.outputs.pipeline_job_scored_data}} + + evaluate_job: + type: command + component: azureml:my_eval@latest + inputs: + scoring_result: ${{parent.jobs.score_job.outputs.score_output}} + outputs: + eval_output: ${{parent.outputs.pipeline_job_evaluation_report}} diff --git a/sdk/python/jobs/automl-standalone-jobs/automl-classification-task-bankmarketing/automl-classification-task-bankmarketing-serverless.ipynb b/sdk/python/jobs/automl-standalone-jobs/automl-classification-task-bankmarketing/automl-classification-task-bankmarketing-serverless.ipynb new file mode 100644 index 0000000000..375134bee1 --- /dev/null +++ b/sdk/python/jobs/automl-standalone-jobs/automl-classification-task-bankmarketing/automl-classification-task-bankmarketing-serverless.ipynb @@ -0,0 +1,996 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# AutoML: Train \"the best\" classifier model for the UCI Bank Marketing dataset. \n", + "\n", + "**Requirements** - In order to benefit from this tutorial, you will need:\n", + "- A basic understanding of Machine Learning\n", + "- An Azure account with an active subscription. [Create an account for free](https://azure.microsoft.com/free/?WT.mc_id=A261C142F)\n", + "- An Azure ML workspace. [Check this notebook for creating a workspace](../../../resources/workspace/workspace.ipynb) \n", + "- This notebook leverages **serverless compute** to run the job. There is no need for user to create and manage compute. \n", + "- A python environment\n", + "- Installed Azure Machine Learning Python SDK v2 - [install instructions](../../../README.md) - check the getting started section\n", + "\n", + "\n", + "**Learning Objectives** - By the end of this tutorial, you should be able to:\n", + "- Connect to your AML workspace from the Python SDK\n", + "- Create an `AutoML classification Job` with the 'classification()' factory-fuction.\n", + "- Train the model using AmlCompute by submitting/running the AutoML training job\n", + "- Obtaing the model and score predictions with it\n", + "- Leverage the auto generated training code and use it for retraining on an updated dataset\n", + "\n", + "**Motivations** - This notebook explains how to setup and run an AutoML classification job. This is one of the nine ML-tasks supported by AutoML. Other ML-tasks are 'regression', 'time-series forecasting', 'image classification', 'image object detection', 'nlp text classification', etc.\n", + "\n", + "In this notebook, we go over how you can use AutoML for training a Classification model. We will use the UCI Bank Marketing dataset to train and deploy the model to use in inference scenarios. The classification goal is to predict if the client will subscribe to a term deposit with the bank. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 1. Connect to Azure Machine Learning Workspace\n", + "\n", + "The [workspace](https://docs.microsoft.com/en-us/azure/machine-learning/concept-workspace) is the top-level resource for Azure Machine Learning, providing a centralized place to work with all the artifacts you create when you use Azure Machine Learning. In this section we will connect to the workspace in which the job will be run.\n", + "\n", + "## 1.1. Import the required libraries" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "gather": { + "logged": 1634852261599 + }, + "name": "automl-import" + }, + "outputs": [], + "source": [ + "# Import required libraries\n", + "import os\n", + "from azure.identity import DefaultAzureCredential\n", + "from azure.identity import AzureCliCredential\n", + "from azure.ai.ml import automl, Input, MLClient, command\n", + "\n", + "from azure.ai.ml.constants import AssetTypes\n", + "from azure.ai.ml.entities import Data\n", + "from azure.ai.ml.automl import (\n", + " classification,\n", + " ClassificationPrimaryMetrics,\n", + " ClassificationModels,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1.2. Configure workspace details and get a handle to the workspace\n", + "\n", + "To connect to a workspace, we need identifier parameters - a subscription, resource group and workspace name. We will use these details in the `MLClient` from `azure.ai.ml` to get a handle to the required Azure Machine Learning workspace. We use the [default azure authentication](https://docs.microsoft.com/en-us/python/api/azure-identity/azure.identity.defaultazurecredential?view=azure-python) for this tutorial. Check the [configuration notebook](../../configuration.ipynb) for more details on how to configure credentials and connect to a workspace." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "gather": { + "logged": 1634852261744 + }, + "jupyter": { + "outputs_hidden": false, + "source_hidden": false + }, + "name": "mlclient-setup", + "nteract": { + "transient": { + "deleting": false + } + } + }, + "outputs": [], + "source": [ + "credential = DefaultAzureCredential()\n", + "ml_client = None\n", + "try:\n", + " ml_client = MLClient.from_config(credential)\n", + "except Exception as ex:\n", + " print(ex)\n", + " # Enter details of your AML workspace\n", + " subscription_id = \"\"\n", + " resource_group = \"\"\n", + " workspace = \"\"\n", + " ml_client = MLClient(credential, subscription_id, resource_group, workspace)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Show Azure ML Workspace information" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "workspace = ml_client.workspaces.get(name=ml_client.workspace_name)\n", + "\n", + "subscription_id = ml_client.connections._subscription_id\n", + "resource_group = workspace.resource_group\n", + "workspace_name = ml_client.workspace_name\n", + "\n", + "output = {}\n", + "output[\"Workspace\"] = workspace_name\n", + "output[\"Subscription ID\"] = subscription_id\n", + "output[\"Resource Group\"] = resource_group\n", + "output[\"Location\"] = workspace.location\n", + "output" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 2. MLTable with input Training Data\n", + "\n", + "## 2.1. Create MLTable data input" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "name": "data-load" + }, + "outputs": [], + "source": [ + "# Training MLTable defined locally, with local data to be uploaded\n", + "my_training_data_input = Input(\n", + " type=AssetTypes.MLTABLE, path=\"./data/training-mltable-folder\"\n", + ")\n", + "\n", + "# WITH REMOTE PATH: If available already in the cloud/workspace-blob-store\n", + "# my_training_data_input = Input(type=AssetTypes.MLTABLE, path=\"azureml://datastores/workspaceblobstore/paths/Classification/Train\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 3. Configure and run the AutoML classification job\n", + "In this section we will configure and run the AutoML classification job.\n", + "\n", + "## 3.1 Configure the job through the classification() factory function\n", + "\n", + "### classification() parameters:\n", + "\n", + "The `classification()` factory function allows user to configure AutoML for the classification task for the most common scenarios with the following properties.\n", + "\n", + "- `target_column_name` - The name of the column to target for predictions. It must always be specified. This parameter is applicable to 'training_data', 'validation_data' and 'test_data'.\n", + "- `primary_metric` - The metric that AutoML will optimize for Classification model selection.\n", + "- `training_data` - The data to be used for training. It should contain both training feature columns and a target column. Optionally, this data can be split for segregating a validation or test dataset. \n", + "You can use a registered MLTable in the workspace using the format ':' OR you can use a local file or folder as a MLTable. For e.g Input(mltable='my_mltable:1') OR Input(mltable=MLTable(local_path=\"./data\"))\n", + "The parameter 'training_data' must always be provided.\n", + "- `name` - The name of the Job/Run. This is an optional property. If not specified, a random name will be generated.\n", + "- `experiment_name` - The name of the Experiment. An Experiment is like a folder with multiple runs in Azure ML Workspace that should be related to the same logical machine learning experiment.\n", + "\n", + "### set_limits() function parameters:\n", + "This is an optional configuration method to configure limits parameters such as timeouts. \n", + " \n", + "- `timeout_minutes` - Maximum amount of time in minutes that the whole AutoML job can take before the job terminates. This timeout includes setup, featurization and training runs but does not include the ensembling and model explainability runs at the end of the process since those actions need to happen once all the trials (children jobs) are done. If not specified, the default job's total timeout is 6 days (8,640 minutes). To specify a timeout less than or equal to 1 hour (60 minutes), make sure your dataset's size is not greater than 10,000,000 (rows times column) or an error results.\n", + "\n", + "- `trial_timeout_minutes` - Maximum time in minutes that each trial (child job) can run for before it terminates. If not specified, a value of 1 month or 43200 minutes is used.\n", + " \n", + "- `max_trials` - The maximum number of trials/runs each with a different combination of algorithm and hyperparameters to try during an AutoML job. If not specified, the default is 1000 trials. If using 'enable_early_termination' the number of trials used can be smaller.\n", + " \n", + "- `max_concurrent_trials` - Represents the maximum number of trials (children jobs) that would be executed in parallel. It's a good practice to match this number with the number of nodes your cluster.\n", + " \n", + "- `enable_early_termination` - Whether to enable early termination if the score is not improving in the short term. \n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# General job parameters\n", + "max_trials = 5\n", + "exp_name = \"dpv2-classifier-experiment\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "gather": { + "logged": 1634852262026 + }, + "jupyter": { + "outputs_hidden": false, + "source_hidden": false + }, + "name": "classification-configuration", + "nteract": { + "transient": { + "deleting": false + } + } + }, + "outputs": [], + "source": [ + "# Create the AutoML classification job with the related factory-function.\n", + "from azure.ai.ml.entities import ResourceConfiguration \n", + "\n", + "classification_job = automl.classification(\n", + " experiment_name=exp_name,\n", + " training_data=my_training_data_input,\n", + " target_column_name=\"y\",\n", + " primary_metric=\"accuracy\",\n", + " n_cross_validations=5,\n", + " enable_model_explainability=True,\n", + " tags={\"my_custom_tag\": \"My custom value\"},\n", + ")\n", + "\n", + "# Limits are all optional\n", + "classification_job.set_limits(\n", + " timeout_minutes=600,\n", + " trial_timeout_minutes=20,\n", + " max_trials=max_trials,\n", + " # max_concurrent_trials = 4,\n", + " # max_cores_per_trial: -1,\n", + " enable_early_termination=True,\n", + ")\n", + "\n", + "# Training properties are optional\n", + "classification_job.set_training(\n", + " blocked_training_algorithms=[ClassificationModels.LOGISTIC_REGRESSION],\n", + " enable_onnx_compatible_models=True,\n", + ")\n", + "\n", + "# Serverless compute resources used to run the job\n", + "classification_job.resources = \n", + "ResourceConfiguration(instance_type=\"Standard_E4s_v3\", instance_count=6)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3.2 Run the Command\n", + "Using the `MLClient` created earlier, we will now run this Command in the workspace." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "gather": { + "logged": 1634852267930 + }, + "jupyter": { + "outputs_hidden": false, + "source_hidden": false + }, + "name": "job-submit", + "nteract": { + "transient": { + "deleting": false + } + } + }, + "outputs": [], + "source": [ + "# Submit the AutoML job\n", + "returned_job = ml_client.jobs.create_or_update(\n", + " classification_job\n", + ") # submit the job to the backend\n", + "\n", + "print(f\"Created job: {returned_job}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Wait until the AutoML job is finished\n", + "ml_client.jobs.stream(returned_job.name) waits until the specified job is finished" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ml_client.jobs.stream(returned_job.name)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Get a URL for the status of the job\n", + "returned_job.services[\"Studio\"].endpoint" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(returned_job.name)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 4. Retrieve the Best Trial (Best Model's trial/run)\n", + "Use the MLFLowClient to access the results (such as Models, Artifacts, Metrics) of a previously completed AutoML Trial." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Initialize MLFlow Client\n", + "The models and artifacts that are produced by AutoML can be accessed via the MLFlow interface. \n", + "Initialize the MLFlow client here, and set the backend as Azure ML, via. the MLFlow Client.\n", + "\n", + "*IMPORTANT*, you need to have installed the latest MLFlow packages with:\n", + "\n", + " pip install azureml-mlflow\n", + "\n", + " pip install mlflow" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Obtain the tracking URI for MLFlow" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import mlflow\n", + "\n", + "# Obtain the tracking URL from MLClient\n", + "MLFLOW_TRACKING_URI = ml_client.workspaces.get(\n", + " name=ml_client.workspace_name\n", + ").mlflow_tracking_uri\n", + "\n", + "print(MLFLOW_TRACKING_URI)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Set the MLFLOW TRACKING URI\n", + "\n", + "mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)\n", + "\n", + "print(\"\\nCurrent tracking uri: {}\".format(mlflow.get_tracking_uri()))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from mlflow.tracking.client import MlflowClient\n", + "from mlflow.artifacts import download_artifacts\n", + "\n", + "# Initialize MLFlow client\n", + "mlflow_client = MlflowClient()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Get the AutoML parent Job" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "job_name = returned_job.name\n", + "\n", + "# Example if providing an specific Job name/ID\n", + "# job_name = \"b4e95546-0aa1-448e-9ad6-002e3207b4fc\"\n", + "\n", + "# Get the parent run\n", + "mlflow_parent_run = mlflow_client.get_run(job_name)\n", + "\n", + "print(\"Parent Run: \")\n", + "print(mlflow_parent_run)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Print parent run tags. 'automl_best_child_run_id' tag should be there.\n", + "print(mlflow_parent_run.data.tags)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get the AutoML best child run" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Get the best model's child run\n", + "\n", + "best_child_run_id = mlflow_parent_run.data.tags[\"automl_best_child_run_id\"]\n", + "print(\"Found best child run id: \", best_child_run_id)\n", + "\n", + "best_run = mlflow_client.get_run(best_child_run_id)\n", + "\n", + "print(\"Best child run: \")\n", + "print(best_run)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get best model run's metrics\n", + "\n", + "Access the results (such as Models, Artifacts, Metrics) of a previously completed AutoML Run." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "best_run.data.metrics" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Download the best model locally\n", + "\n", + "Access the results (such as Models, Artifacts, Metrics) of a previously completed AutoML Run." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "# Create local folder\n", + "local_dir = \"./artifact_downloads\"\n", + "if not os.path.exists(local_dir):\n", + " os.mkdir(local_dir)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Download run's artifacts/outputs\n", + "local_path = download_artifacts(\n", + " run_id=best_run.info.run_id, artifact_path=\"outputs\", dst_path=local_dir\n", + ")\n", + "print(\"Artifacts downloaded in: {}\".format(local_path))\n", + "print(\"Artifacts: {}\".format(os.listdir(local_path)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Show the contents of the MLFlow model folder\n", + "os.listdir(\"./artifact_downloads/outputs/mlflow-model\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 5. Register Best Model and Deploy" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5.1 Create managed online endpoint" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# import required libraries\n", + "from azure.ai.ml.entities import (\n", + " ManagedOnlineEndpoint,\n", + " ManagedOnlineDeployment,\n", + " Model,\n", + " Environment,\n", + " CodeConfiguration,\n", + " ProbeSettings,\n", + ")\n", + "from azure.ai.ml.constants import ModelType" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Creating a unique endpoint name with current datetime to avoid conflicts\n", + "import datetime\n", + "\n", + "online_endpoint_name = \"bankmarketing-\" + datetime.datetime.now().strftime(\"%m%d%H%M%f\")\n", + "\n", + "# create an online endpoint\n", + "endpoint = ManagedOnlineEndpoint(\n", + " name=online_endpoint_name,\n", + " description=\"this is a sample online endpoint for mlflow model\",\n", + " auth_mode=\"key\",\n", + " tags={\"foo\": \"bar\"},\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ml_client.begin_create_or_update(endpoint).result()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5.2 Register best model and deploy" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Register model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model_name = \"bankmarketing-model\"\n", + "model = Model(\n", + " path=f\"azureml://jobs/{best_run.info.run_id}/outputs/artifacts/outputs/mlflow-model/\",\n", + " name=model_name,\n", + " description=\"my sample classification model\",\n", + " type=AssetTypes.MLFLOW_MODEL,\n", + ")\n", + "\n", + "# for downloaded file\n", + "# model = Model(path=\"artifact_downloads/outputs/model.pkl\", name=model_name)\n", + "\n", + "registered_model = ml_client.models.create_or_update(model)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "registered_model.id" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Deploy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "deployment = ManagedOnlineDeployment(\n", + " name=\"bankmarketing-deploy\",\n", + " endpoint_name=online_endpoint_name,\n", + " model=registered_model.id,\n", + " instance_type=\"Standard_DS2_V2\",\n", + " instance_count=1,\n", + " liveness_probe=ProbeSettings(\n", + " failure_threshold=30,\n", + " success_threshold=1,\n", + " timeout=2,\n", + " period=10,\n", + " initial_delay=2000,\n", + " ),\n", + " readiness_probe=ProbeSettings(\n", + " failure_threshold=10,\n", + " success_threshold=1,\n", + " timeout=10,\n", + " period=10,\n", + " initial_delay=2000,\n", + " ),\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ml_client.online_deployments.begin_create_or_update(deployment).result()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "# bankmarketing deployment to take 100% traffic\n", + "endpoint.traffic = {\"bankmarketing-deploy\": 100}\n", + "ml_client.begin_create_or_update(endpoint)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Test the deployment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# test the blue deployment with some sample data\n", + "import pandas as pd\n", + "\n", + "test_data = pd.read_csv(\"./data/test-mltable-folder/bank_marketing_test_data.csv\")\n", + "\n", + "test_data = test_data.drop(\"y\", axis=1)\n", + "\n", + "test_data_json = test_data.to_json(orient=\"records\", indent=4)\n", + "data = (\n", + " '{ \\\n", + " \"input_data\": {\"data\": '\n", + " + test_data_json\n", + " + \"}}\"\n", + ")\n", + "\n", + "request_file_name = \"sample-request-bankmarketing.json\"\n", + "\n", + "with open(request_file_name, \"w\") as request_file:\n", + " request_file.write(data)\n", + "\n", + "ml_client.online_endpoints.invoke(\n", + " endpoint_name=online_endpoint_name,\n", + " deployment_name=\"bankmarketing-deploy\",\n", + " request_file=request_file_name,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# wait and delete endpoint\n", + "import time\n", + "\n", + "time.sleep(60)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Get endpoint details" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Get the details for online endpoint\n", + "endpoint = ml_client.online_endpoints.get(name=online_endpoint_name)\n", + "\n", + "# existing traffic details\n", + "print(endpoint.traffic)\n", + "\n", + "# Get the scoring URI\n", + "print(endpoint.scoring_uri)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Delete the deployment and endpoint" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ml_client.online_endpoints.begin_delete(name=online_endpoint_name)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Next Step: Load the best model and try predictions\n", + "\n", + "Loading the models locally assume that you are running the notebook in an environment compatible with the model. The list of dependencies that is expected by the model is specified in the MLFlow model produced by AutoML (in the 'conda.yaml' file within the mlflow-model folder).\n", + "\n", + "Since the AutoML model was trained remotelly in a different environment with different dependencies to your current local conda environment where you are running this notebook, if you want to load the model you have several options:\n", + "\n", + "1. A recommended way to locally load the model in memory and try predictions is to create a new/clean conda environment with the dependencies specified in the conda.yaml file within the MLFlow model's folder, then use MLFlow to load the model and call .predict() as explained in the notebook **mlflow-model-local-inference-test.ipynb** in this same folder.\n", + "\n", + "2. You can install all the packages/dependencies specified in conda.yaml into your current conda environment you used for using Azure ML SDK and AutoML. MLflow SDK also have a method to install the dependencies in the current environment. However, this option could have risks of package version conflicts depending on what's installed in your current environment.\n", + "\n", + "3. You can also use: mlflow models serve -m 'xxxxxxx'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 6. Using the auto generated model training code for retraining on new data\n", + "Code generation is automatically enabled on all sdk v2 jobs, so we now have access to the code that was used to generate any of the AutoML tried models. Below we'll be using the generated training script of the best model to retrain on a new dataset.\n", + "\n", + "## 6.1 Register Dataset\n", + "For this demo, we'll begin by registering a dataset to use for retraining and setting the MLFlow tracking uri. It should be noted that this is not new data, just a combination of the Training and Validation datasets.\n", + "\n", + "*IMPORTANT*, MLFlow is used to access the results for the above run, so make sure you have installed the latest MLFlow packages with:\n", + "\n", + " pip install azureml-mlflow\n", + "\n", + " pip install mlflow" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "retrain_data = Data(\n", + " path=\"./data/retrained-mltable-folder/\",\n", + " type=AssetTypes.MLTABLE,\n", + " description=\"Updated training dataset, includes validation data.\",\n", + " name=\"bankmarketing-retrain-data\",\n", + ")\n", + "retrain_data = ml_client.data.create_or_update(retrain_data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import mlflow\n", + "\n", + "# Obtain the tracking URL from MLClient\n", + "MLFLOW_TRACKING_URI = ml_client.workspaces.get(\n", + " name=ml_client.workspace_name\n", + ").mlflow_tracking_uri\n", + "\n", + "print(MLFLOW_TRACKING_URI)\n", + "\n", + "# Set the MLFLOW TRACKING URI\n", + "mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)\n", + "\n", + "print(\"\\nCurrent tracking uri: {}\".format(mlflow.get_tracking_uri()))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 6.2 Download Script\n", + "Next, we'll download the generated script for the best run and use it for retraining. For more advanced scenarios, you can customize the training script as you need: change the featurization pipeline, change the learner algorithm or its hyperparameters, etc.\n", + "\n", + "For this exercise, we'll leave the script as it was generated." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from mlflow.tracking.client import MlflowClient\n", + "\n", + "# Initialize MLFlow client\n", + "mlflow_client = MlflowClient()\n", + "\n", + "# Get the parent run\n", + "job_name = returned_job.name\n", + "mlflow_parent_run = mlflow_client.get_run(job_name)\n", + "print(\"Parent Run: \")\n", + "print(mlflow_parent_run)\n", + "\n", + "# Get the best model's child run\n", + "best_child_run_id = mlflow_parent_run.data.tags[\"automl_best_child_run_id\"]\n", + "print(\"Found best child run id: \", best_child_run_id)\n", + "best_run = mlflow_client.get_run(best_child_run_id)\n", + "\n", + "# Download run's artifacts/outputs\n", + "local_dir = \"./artifact_downloads/\"\n", + "if not os.path.exists(local_dir):\n", + " os.mkdir(local_dir)\n", + "local_path = download_artifacts(\n", + " run_id=best_run.info.run_id, artifact_path=\"outputs\", dst_path=local_dir\n", + ")\n", + "print(\"Artifacts downloaded in: {}\".format(local_path))\n", + "print(\"Artifacts: {}\".format(os.listdir(local_path)))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 6.3 Create and Run the Command\n", + "Finally, we will create a command and submit the job." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "command_str = f\"python script.py --training_dataset_uri {retrain_data.path}\"\n", + "command_job = command(\n", + " code=\"./artifact_downloads/outputs/generated_code/\",\n", + " command=command_str,\n", + " tags=dict(automl_child_run_id=best_run.info.run_id),\n", + " environment=\"AzureML-AutoML@latest\",\n", + " compute=compute_name,\n", + " experiment_name=exp_name,\n", + ")\n", + "\n", + "script_job = ml_client.create_or_update(command_job)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "After the job completes, we can download/test/deploy the model it has built." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "script_job.studio_url\n", + "ml_client.jobs.stream(script_job.name)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Next Steps\n", + "You can see further examples of other AutoML tasks such as Image-Classification, Image-Object-Detection, NLP-Text-Classification, Time-Series-Forcasting, etc." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernel_info": { + "name": "python3-azureml" + }, + "kernelspec": { + "display_name": "Python 3.10 - SDK V2", + "language": "python", + "name": "python310-sdkv2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.4" + }, + "microsoft": { + "host": { + "AzureML": { + "notebookHasBeenCompleted": true + } + } + }, + "nteract": { + "version": "nteract-front-end@1.0.0" + }, + "vscode": { + "interpreter": { + "hash": "a0fdce2a800559117d1dfcce1e32c89fae80776a81d787167901ec651db4530b" + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/sdk/python/jobs/automl-standalone-jobs/automl-forecasting-github-dau/auto-ml-forecasting-github-dau.ipynb b/sdk/python/jobs/automl-standalone-jobs/automl-forecasting-github-dau/auto-ml-forecasting-github-dau.ipynb index 34f401d310..8830ce0092 100644 --- a/sdk/python/jobs/automl-standalone-jobs/automl-forecasting-github-dau/auto-ml-forecasting-github-dau.ipynb +++ b/sdk/python/jobs/automl-standalone-jobs/automl-forecasting-github-dau/auto-ml-forecasting-github-dau.ipynb @@ -38,14 +38,14 @@ "- A basic understanding of Machine Learning\n", "- An Azure account with an active subscription. [Create an account for free](https://azure.microsoft.com/free/?WT.mc_id=A261C142F)\n", "- An Azure ML workspace. [Check this notebook for creating a workspace](../../../resources/workspace/workspace.ipynb) \n", - "- A Compute Cluster. [Check this notebook to create a compute cluster](../../../resources/compute/compute.ipynb)\n", + "- Serverless compute to run the job\n", "- A python environment\n", "- Installation instructions - [install instructions](../../../README.md)\n", "\n", "**Learning Objectives** - By the end of this tutorial, you should be able to:\n", "- Connect to your AML workspace from the Python SDK\n", "- Create an `AutoML time-series forecasting Job` with the 'forecasting()' factory-fuction\n", - "- Train the model using AmlCompute by submitting/running the AutoML forecasting training job\n", + "- Train the model using serverless compute by submitting/running the AutoML forecasting training job\n", "- Obtain the model and use it to generate forecast\n", "\n", "**Motivations** - This notebook explains how to setup and run an AutoML forecasting job. This is one of the nine ML-tasks supported by AutoML. Other ML-tasks are 'regression', 'classification', 'image classification', 'image object detection', 'nlp text classification', etc.\n", @@ -296,49 +296,10 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# 3. Create or Attach existing AmlCompute.\n", - "Azure Machine Learning Compute is a managed-compute infrastructure that allows the user to easily create a single or multi-node compute. In this tutorial, you will create and an AmlCompute cluster as your training compute resource.\n", - "\n", - "### Creation of AmlCompute takes approximately 5 minutes.\n", - "If the AmlCompute with that name is already in your workspace this code will skip the creation process. As with other Azure services, there are limits on certain resources (e.g. AmlCompute) associated with the Azure Machine Learning service. Please read this article on the default limits and how to request more quota." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from azure.core.exceptions import ResourceNotFoundError\n", - "from azure.ai.ml.entities import AmlCompute\n", - "\n", - "compute_name = \"github-cluster-sdkv2\"\n", - "\n", - "try:\n", - " # Retrieve an already attached Azure Machine Learning Compute.\n", - " compute = ml_client.compute.get(compute_name)\n", - "except ResourceNotFoundError as e:\n", - " compute = AmlCompute(\n", - " name=compute_name,\n", - " size=\"STANDARD_DS12_V2\",\n", - " type=\"amlcompute\",\n", - " min_instances=0,\n", - " max_instances=4,\n", - " idle_time_before_scale_down=120,\n", - " )\n", - " poller = ml_client.begin_create_or_update(compute)\n", - " poller.wait()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# 4. Configure and run the AutoML Forecasting training job\n", + "# 3. Configure and run the AutoML Forecasting training job\n", "In this section we will configure and run the AutoML job, for training the model.\n", "\n", - "## 4.1 Configure the job through the forecasting() factory function\n", + "## 3.1 Configure the job through the forecasting() factory function\n", "\n", "### forecasting() function parameters:\n", "\n", @@ -349,7 +310,6 @@ "|**target_column_name**|The name of the column to target for predictions. It must always be specified. This parameter is applicable to 'training_data', 'validation_data' and 'test_data'.|\n", "|**primary_metric**|The metric that AutoML will optimize for model selection.|\n", "|**training_data**|The data to be used for training. It should contain both training feature columns and a target column. Optionally, this data can be split for segregating a validation or test dataset. You can use a registered MLTable in the workspace using the format ':' OR you can use a local file or folder as a MLTable. For e.g Input(mltable='my_mltable:1') OR Input(mltable=MLTable(local_path=\"./data\")). The parameter 'training_data' must always be provided.\n", - "|**compute**|The compute on which the AutoML job will run. In this example we are using a compute called 'github-cluster-sdkv2' present in the workspace. You can replace it any other compute in the workspace.|\n", "|**name**|The name of the Job/Run. This is an optional property. If not specified, a random name will be generated.|\n", "|**experiment_name**|The name of the Experiment. An Experiment is like a folder with multiple runs in Azure ML Workspace that should be related to the same logical machine learning experiment.|\n", "\n", @@ -417,9 +377,9 @@ "outputs": [], "source": [ "# Create the AutoML forecasting job with the related factory-function.\n", + "from azure.ai.ml.entities import ResourceConfiguration\n", "\n", "forecasting_job = automl.forecasting(\n", - " compute=compute_name,\n", " experiment_name=exp_name,\n", " training_data=my_training_data_input,\n", " # validation_data = my_validation_data_input,\n", @@ -444,6 +404,10 @@ "# Enable Dnn training and allow only TCNForecaster model\n", "forecasting_job.set_training(\n", " allowed_training_algorithms=[\"TCNForecaster\"], enable_dnn_training=True\n", + ")\n", + "# Serverless compute resources used to run the job\n", + "forecasting_job.resources = ResourceConfiguration(\n", + " instance_type=\"Standard_E4s_v3\", instance_count=4\n", ")" ] }, @@ -452,7 +416,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## 4.2 Train the AutoML model\n", + "## 3.2 Train the AutoML model\n", "Using the `MLClient` created earlier, we will now run this Command in the workspace." ] }, @@ -484,7 +448,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# 5. Retrieve the Best Trial (Best Model's trial/run)\n", + "# 4. Retrieve the Best Trial (Best Model's trial/run)\n", "Use the MLFLowClient to access the results (such as Models, Artifacts, Metrics) of a previously completed AutoML Trial." ] }, @@ -493,7 +457,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## 5.1 Initialize MLFlow Client\n", + "## 4.1 Initialize MLFlow Client\n", "The models and artifacts that are produced by AutoML can be accessed via the MLFlow interface. \n", "Initialize the MLFlow client here, and set the backend as Azure ML, via. the MLFlow Client.\n", "\n", @@ -620,7 +584,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## 5.2 Get best model run's validation metrics" + "## 4.2 Get best model run's validation metrics" ] }, { @@ -637,9 +601,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# 6 Model Evaluation and Deployment\n", + "# 5 Model Evaluation and Deployment\n", "\n", - "## 6.1 Download the best model\n", + "## 5.1 Download the best model\n", "\n", "Access the results (such as Models, Artifacts, Metrics) of a previously completed AutoML Run." ] @@ -677,15 +641,51 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## 6.2 Forecasting using batch endpoint\n", + "## 5.2 Forecasting using batch endpoint\n", "Now that we have retrieved the best pipeline/model, it can be used to make predictions on test data. We will do batch scoring on the test dataset which must have the same schema as training dataset.\n", "\n", - "The inference will run on a remote compute. In this example, it will re-use the training compute. First we will load model and environment from the local file.\n", + "The inference will run on a remote compute. First we need to create compute and then load model and environment from the local file.\n", "\n", - "### Create a model endpoint\n", - "First we need to register the model, environment and batch endpoint." + "### Creation of AmlCompute takes approximately 5 minutes.\n", + "If the AmlCompute with that name is already in your workspace this code will skip the creation process. As with other Azure services, there are limits on certain resources (e.g. AmlCompute) associated with the Azure Machine Learning service. Please read this article on the default limits and how to request more quota." ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azure.core.exceptions import ResourceNotFoundError\n", + "from azure.ai.ml.entities import AmlCompute\n", + "\n", + "compute_name = \"github-cluster-sdkv2\"\n", + "\n", + "try:\n", + " # Retrieve an already attached Azure Machine Learning Compute.\n", + " compute = ml_client.compute.get(compute_name)\n", + "except ResourceNotFoundError as e:\n", + " compute = AmlCompute(\n", + " name=compute_name,\n", + " size=\"STANDARD_DS12_V2\",\n", + " type=\"amlcompute\",\n", + " min_instances=0,\n", + " max_instances=4,\n", + " idle_time_before_scale_down=120,\n", + " )\n", + " poller = ml_client.begin_create_or_update(compute)\n", + " poller.wait()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create a model endpoint\n", + "We need to register the model, environment and batch endpoint." + ] + }, { "cell_type": "code", "execution_count": null, @@ -948,7 +948,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## 6.3 Deployment\n", + "## 5.3 Deployment\n", "\n", "After we have tested our model on the batch endpoint, we may want to deploy it as a service. Currently no code deployment using mlflow is not supported for forecasting tasks and we will use the workaround which is described in the Deployment section of the [automl-forecasting-task-energy-demand](https://github.com/Azure/azureml-examples/blob/main/sdk/python/jobs/automl-standalone-jobs/automl-forecasting-task-energy-demand/automl-forecasting-task-energy-demand-advanced-mlflow.ipynb) notebook." ] diff --git a/sdk/python/jobs/automl-standalone-jobs/automl-forecasting-orange-juice-sales/automl-forecasting-orange-juice-sales-mlflow.ipynb b/sdk/python/jobs/automl-standalone-jobs/automl-forecasting-orange-juice-sales/automl-forecasting-orange-juice-sales-mlflow.ipynb index 20776c96d3..d4a9c4f510 100644 --- a/sdk/python/jobs/automl-standalone-jobs/automl-forecasting-orange-juice-sales/automl-forecasting-orange-juice-sales-mlflow.ipynb +++ b/sdk/python/jobs/automl-standalone-jobs/automl-forecasting-orange-juice-sales/automl-forecasting-orange-juice-sales-mlflow.ipynb @@ -11,7 +11,6 @@ "- A basic understanding of Machine Learning\n", "- An Azure account with an active subscription. [Create an account for free](https://azure.microsoft.com/free/?WT.mc_id=A261C142F)\n", "- An Azure ML workspace. [Check this notebook for creating a workspace](../../../resources/workspace/workspace.ipynb)\n", - "- A Compute Cluster. [Check this notebook to create a compute cluster](../../../resources/compute/compute.ipynb)\n", "- A python environment\n", "- Installed Azure Machine Learning Python SDK v2 - [install instructions](../../../README.md) - check the getting started section\n", "\n", @@ -389,46 +388,6 @@ "- https://learn.microsoft.com/en-us/azure/machine-learning/how-to-create-data-assets?tabs=Python-SDK covers how to work with them in the v2 CLI/SDK." ] }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# 3 Create or Attach existing AmlCompute.\n", - "[Azure Machine Learning Compute](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-set-up-training-targets#amlcompute) is a managed-compute infrastructure that allows the user to easily create a single or multi-node compute. In this tutorial, you create AmlCompute as your training compute resource.\n", - "\n", - "#### Creation of AmlCompute takes approximately 5 minutes.\n", - "If the AmlCompute with that name is already in your workspace this code will skip the creation process.\n", - "As with other Azure services, there are limits on certain resources (e.g. AmlCompute) associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from azure.core.exceptions import ResourceNotFoundError\n", - "from azure.ai.ml.entities import AmlCompute\n", - "\n", - "cluster_name = \"oj-cluster\"\n", - "\n", - "try:\n", - " # Retrieve an already attached Azure Machine Learning Compute.\n", - " compute = ml_client.compute.get(cluster_name)\n", - "except ResourceNotFoundError as e:\n", - " compute = AmlCompute(\n", - " name=cluster_name,\n", - " size=\"STANDARD_DS12_V2\",\n", - " type=\"amlcompute\",\n", - " min_instances=0,\n", - " max_instances=4,\n", - " idle_time_before_scale_down=120,\n", - " )\n", - " poller = ml_client.begin_create_or_update(compute)\n", - " poller.wait()" - ] - }, { "attachments": {}, "cell_type": "markdown", @@ -448,7 +407,6 @@ "|**target_column_name**|The name of the label column.|\n", "|**primary_metric**|This is the metric that you want to optimize.
Forecasting supports the following primary metrics
spearman_correlation
normalized_root_mean_squared_error
r2_score
normalized_mean_absolute_error|\n", "|**training_data**|The training data to be used within the experiment. You can use a registered MLTable in the workspace using the format '<mltable_name>:<version/>' OR you can use a local file or folder as a MLTable. For e.g Input(mltable='my_mltable:1') OR Input(mltable=MLTable(local_path=\"./data\")) The parameter 'training_data' must always be provided.|\n", - "|**compute**|The compute on which the AutoML job will run. In this example we are using a compute called 'oj-cluster' present in the workspace. You can replace it any other compute in the workspace.|\n", "|**n_cross_validations**|Number of cross-validation folds to use for model/pipeline selection. This can be set to \"auto\", in which case AutoMl determines the number of cross-validations automatically, if a validation set is not provided. Or users could specify an integer value.|\n", "|**name**|The name of the Job/Run. This is an optional property. If not specified, a random name will be generated.\n", "|**experiment_name**|The name of the Experiment. An Experiment is like a folder with multiple runs in Azure ML Workspace that should be related to the same logical machine learning experiment.|\n", @@ -583,8 +541,9 @@ }, "outputs": [], "source": [ + "from azure.ai.ml.entities import ResourceConfiguration\n", + "\n", "forecasting_job = automl.forecasting(\n", - " compute=cluster_name,\n", " experiment_name=exp_name,\n", " training_data=my_training_data_input,\n", " target_column_name=target_column_name,\n", @@ -617,7 +576,11 @@ ")\n", "\n", "# Training properties are optional\n", - "forecasting_job.set_training(blocked_training_algorithms=[\"ExtremeRandomTrees\"])" + "forecasting_job.set_training(blocked_training_algorithms=[\"ExtremeRandomTrees\"])\n", + "# Serverless compute resources used to run the job\n", + "forecasting_job.resources = ResourceConfiguration(\n", + " instance_type=\"Standard_E4s_v3\", instance_count=4\n", + ")" ] }, { @@ -978,6 +941,45 @@ "To create a batch deployment, we will use the forecasting_script.py which will load the model and will call the forecast method each time we will envoke the endpoint." ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create or Attach existing AmlCompute.\n", + "[Azure Machine Learning Compute](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-set-up-training-targets#amlcompute) is a managed-compute infrastructure that allows the user to easily create a single or multi-node compute. In this tutorial, you create AmlCompute as your training compute resource.\n", + "\n", + "#### Creation of AmlCompute takes approximately 5 minutes.\n", + "If the AmlCompute with that name is already in your workspace this code will skip the creation process.\n", + "As with other Azure services, there are limits on certain resources (e.g. AmlCompute) associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azure.core.exceptions import ResourceNotFoundError\n", + "from azure.ai.ml.entities import AmlCompute\n", + "\n", + "cluster_name = \"oj-cluster\"\n", + "\n", + "try:\n", + " # Retrieve an already attached Azure Machine Learning Compute.\n", + " compute = ml_client.compute.get(cluster_name)\n", + "except ResourceNotFoundError as e:\n", + " compute = AmlCompute(\n", + " name=cluster_name,\n", + " size=\"STANDARD_DS12_V2\",\n", + " type=\"amlcompute\",\n", + " min_instances=0,\n", + " max_instances=4,\n", + " idle_time_before_scale_down=120,\n", + " )\n", + " poller = ml_client.begin_create_or_update(compute)\n", + " poller.wait()" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/sdk/python/jobs/pipelines/1a_pipeline_with_components_from_yaml/pipeline_with_components_from_yaml_serverless.ipynb b/sdk/python/jobs/pipelines/1a_pipeline_with_components_from_yaml/pipeline_with_components_from_yaml_serverless.ipynb new file mode 100644 index 0000000000..df42d3b85e --- /dev/null +++ b/sdk/python/jobs/pipelines/1a_pipeline_with_components_from_yaml/pipeline_with_components_from_yaml_serverless.ipynb @@ -0,0 +1,275 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Build Pipeline with Components from yaml\n", + "\n", + "**Requirements** - In order to benefit from this tutorial, you will need:\n", + "- A basic understanding of Machine Learning\n", + "- An Azure account with an active subscription - [Create an account for free](https://azure.microsoft.com/free/?WT.mc_id=A261C142F)\n", + "- This pipeline job will use **Serverless Compute** to execute the job. You do not need to create and manage compute anymore to run the job\n", + "- A python environment\n", + "- Installed Azure Machine Learning Python SDK v2 - [install instructions](../../../README.md) - check the getting started section\n", + "\n", + "**Learning Objectives** - By the end of this tutorial, you should be able to:\n", + "- Connect to your AML workspace from the Python SDK\n", + "- Define and load `CommandComponent` from YAML\n", + "- Create `Pipeline` using loaded component.\n", + "\n", + "**Motivations** - This notebook covers the scenario that user define components using yaml then use these components to build pipeline." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 1. Connect to Azure Machine Learning Workspace\n", + "\n", + "The [workspace](https://docs.microsoft.com/en-us/azure/machine-learning/concept-workspace) is the top-level resource for Azure Machine Learning, providing a centralized place to work with all the artifacts you create when you use Azure Machine Learning. In this section we will connect to the workspace in which the job will be run.\n", + "\n", + "## 1.1 Import the required libraries" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Import required libraries\n", + "from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential\n", + "\n", + "from azure.ai.ml import MLClient, Input\n", + "from azure.ai.ml.dsl import pipeline\n", + "from azure.ai.ml import load_component" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1.2 Configure credential\n", + "\n", + "We are using `DefaultAzureCredential` to get access to workspace. \n", + "`DefaultAzureCredential` should be capable of handling most Azure SDK authentication scenarios. \n", + "\n", + "Reference for more available credentials if it does not work for you: [configure credential example](../../configuration.ipynb), [azure-identity reference doc](https://docs.microsoft.com/en-us/python/api/azure-identity/azure.identity?view=azure-python)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " credential = DefaultAzureCredential()\n", + " # Check if given credential can get token successfully.\n", + " credential.get_token(\"https://management.azure.com/.default\")\n", + "except Exception as ex:\n", + " # Fall back to InteractiveBrowserCredential in case DefaultAzureCredential not work\n", + " credential = InteractiveBrowserCredential()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1.3 Get a handle to the workspace\n", + "\n", + "We use config file to connect to a workspace. The Azure ML workspace should be configured with computer cluster. [Check this notebook for configure a workspace](../../configuration.ipynb)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 2. Define and create components into workspace\n", + "## 2.1 Load components from YAML" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "parent_dir = \".\"\n", + "train_model = load_component(source=parent_dir + \"/train_model.yml\")\n", + "score_data = load_component(source=parent_dir + \"/score_data.yml\")\n", + "eval_model = load_component(source=parent_dir + \"/eval_model.yml\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2.2 Inspect loaded component" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Print the component as yaml\n", + "print(train_model)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Inspect more information\n", + "print(type(train_model))\n", + "help(train_model._func)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 3. Sample pipeline job\n", + "## 3.1 Build pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {"name": "build-pipeline"}, + "outputs": [], + "source": [ + "# Construct pipeline\n", + "@pipeline()\n", + "def pipeline_with_components_from_yaml(\n", + " training_input,\n", + " test_input,\n", + " training_max_epochs=20,\n", + " training_learning_rate=1.8,\n", + " learning_rate_schedule=\"time-based\",\n", + "):\n", + " \"\"\"E2E dummy train-score-eval pipeline with components defined via yaml.\"\"\"\n", + " # Call component obj as function: apply given inputs & parameters to create a node in pipeline\n", + " train_with_sample_data = train_model(\n", + " training_data=training_input,\n", + " max_epochs=training_max_epochs,\n", + " learning_rate=training_learning_rate,\n", + " learning_rate_schedule=learning_rate_schedule,\n", + " )\n", + "\n", + " score_with_sample_data = score_data(\n", + " model_input=train_with_sample_data.outputs.model_output, test_data=test_input\n", + " )\n", + " score_with_sample_data.outputs.score_output.mode = \"upload\"\n", + "\n", + " eval_with_sample_data = eval_model(\n", + " scoring_result=score_with_sample_data.outputs.score_output\n", + " )\n", + "\n", + " # Return: pipeline outputs\n", + " return {\n", + " \"trained_model\": train_with_sample_data.outputs.model_output,\n", + " \"scored_data\": score_with_sample_data.outputs.score_output,\n", + " \"evaluation_report\": eval_with_sample_data.outputs.eval_output,\n", + " }\n", + "\n", + "\n", + "pipeline_job = pipeline_with_components_from_yaml(\n", + " training_input=Input(type=\"uri_folder\", path=parent_dir + \"/data/\"),\n", + " test_input=Input(type=\"uri_folder\", path=parent_dir + \"/data/\"),\n", + " training_max_epochs=20,\n", + " training_learning_rate=1.8,\n", + " learning_rate_schedule=\"time-based\",\n", + ")\n", + "\n", + "# set pipeline to use serverless compute\n", + "pipeline_job.settings.default_compute = \"serverless\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Inspect built pipeline\n", + "print(pipeline_job)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3.2 Submit pipeline job" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Submit pipeline job to workspace\n", + "pipeline_job = ml_client.jobs.create_or_update(\n", + " pipeline_job, experiment_name=\"pipeline_samples\"\n", + ")\n", + "pipeline_job" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Wait until the job completes\n", + "ml_client.jobs.stream(pipeline_job.name)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Next Steps\n", + "You can see further examples of running a pipeline job [here](../README.md)" + ] + } + ], + "metadata": { + "description": { + "description": "Create pipeline with CommandComponents from local YAML file" + }, + "interpreter": { + "hash": "3e9e0e270b75c5e6da2e22113ba4f77b864d68f95da6601809c29e46c73ae6bb" + }, + "kernelspec": { + "display_name": "Python 3.10 - SDK V2", + "language": "python", + "name": "python310-sdkv2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tutorials/azureml-in-a-day/azureml-in-a-day.ipynb b/tutorials/azureml-in-a-day/azureml-in-a-day.ipynb index f7c93d2741..89184d99c9 100644 --- a/tutorials/azureml-in-a-day/azureml-in-a-day.ipynb +++ b/tutorials/azureml-in-a-day/azureml-in-a-day.ipynb @@ -8,16 +8,16 @@ "\n", "Learn how a data scientist uses Azure Machine Learning (Azure ML) to train a model, then use the model for prediction. This tutorial will help you become familiar with the core concepts of Azure ML and their most common usage. \n", "\n", - "You'll learn how to submit a *command job* to run your *training script* on a specified *compute resource*, configured with the *job environment* necessary to run the script.\n", + "You'll learn how to submit a *command job* to run your *training script*, configured with the *job environment* necessary to run the script.\n", "\n", "The training script handles the data preparation, then trains and registers a model. Once you have the model, you'll *deploy* it as an *endpoint*, then call the endpoint for *inferencing*.\n", "\n", "The steps you'll take are:\n", "\n", "> * Connect to your Azure ML workspace\n", - "> * Create your compute resource and job environment\n", + "> * Create your job environment\n", "> * Create your training script\n", - "> * Create and run your command job to run the training script on the compute resource, configured with the appropriate job environment\n", + "> * Create and run your command job to run the training script, configured with the appropriate job environment\n", "> * View the output of your training script\n", "> * Deploy the newly-trained model as an endpoint\n", "> * Call the Azure ML endpoint for inferencing" @@ -125,66 +125,7 @@ "The result is a handler to the workspace that you'll use to manage other resources and jobs.\n", "\n", "> [!IMPORTANT]\n", - "> Creating MLClient will not connect to the workspace. The client initialization is lazy, it will wait for the first time it needs to make a call (in the notebook below, that will happen during compute creation)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Create a compute resource to run your job\n", - "\n", - "You already have a compute resource you're using to run the notebook. But now you'll add another type, a **compute cluster** that you'll use to run your training job. The compute cluster can be single or multi-node machines with Linux or Windows OS, or a specific compute fabric like Spark.\n", - "\n", - "You'll provision a Linux compute cluster. See the [full list on VM sizes and prices](https://azure.microsoft.com/pricing/details/machine-learning/) .\n", - "\n", - "For this example, you only need a basic cluster, so you'll use a Standard_DS3_v2 model with 2 vCPU cores, 7-GB RAM." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "name": "cpu_compute_target" - }, - "outputs": [], - "source": [ - "from azure.ai.ml.entities import AmlCompute\n", - "\n", - "# Name assigned to the compute cluster\n", - "cpu_compute_target = \"cpu-cluster\"\n", - "\n", - "try:\n", - " # let's see if the compute target already exists\n", - " cpu_cluster = ml_client.compute.get(cpu_compute_target)\n", - " print(\n", - " f\"You already have a cluster named {cpu_compute_target}, we'll reuse it as is.\"\n", - " )\n", - "\n", - "except Exception:\n", - " print(\"Creating a new cpu compute target...\")\n", - "\n", - " # Let's create the Azure ML compute object with the intended parameters\n", - " cpu_cluster = AmlCompute(\n", - " name=cpu_compute_target,\n", - " # Azure ML Compute is the on-demand VM service\n", - " type=\"amlcompute\",\n", - " # VM Family\n", - " size=\"STANDARD_DS3_V2\",\n", - " # Minimum running nodes when there is no job running\n", - " min_instances=0,\n", - " # Nodes in cluster\n", - " max_instances=4,\n", - " # How many seconds will the node running after the job termination\n", - " idle_time_before_scale_down=180,\n", - " # Dedicated or LowPriority. The latter is cheaper but there is a chance of job termination\n", - " tier=\"Dedicated\",\n", - " )\n", - " print(\n", - " f\"AMLCompute with name {cpu_cluster.name} will be created, with compute size {cpu_cluster.size}\"\n", - " )\n", - " # Now, we pass the object to MLClient's create_or_update method\n", - " cpu_cluster = ml_client.compute.begin_create_or_update(cpu_cluster)" + "> Creating MLClient will not connect to the workspace. The client initialization is lazy, it will wait for the first time it needs to make a call (in the notebook below, that will happen during job environment creation)." ] }, { @@ -193,7 +134,7 @@ "source": [ "## Create a job environment\n", "\n", - "To run your AzureML job on your compute cluster, you'll need an [environment](https://docs.microsoft.com/azure/machine-learning/concept-environments). An environment lists the software runtime and libraries that you want installed on the compute where you’ll be training. It's similar to your Python environment on your local machine.\n", + "To run your AzureML job, you'll need an [environment](https://docs.microsoft.com/azure/machine-learning/concept-environments). An environment lists the software runtime and libraries that you want installed on the compute where you’ll be training. It's similar to your Python environment on your local machine.\n", "\n", "AzureML provides many curated or ready-made environments, which are useful for common training and inference scenarios. You can also create your own custom environments using a docker image, or a conda configuration.\n", "\n", @@ -295,7 +236,7 @@ "source": [ "## What is a command job?\n", "\n", - "You'll create an Azure ML *command job* to train a model for credit default prediction. The command job is used to run a *training script* in a specified environment on a specified compute resource. You've already created the environment and the compute resource. Next you'll create the training script.\n", + "You'll create an Azure ML *command job* to train a model for credit default prediction. The command job is used to run a *training script* in a specified environment on serverless compute. You've already created the environment. Next you'll create the training script.\n", "\n", "The *training script* handles the data preparation, training and registering of the trained model. In this tutorial, you'll create a Python training script.\n", "\n", @@ -470,7 +411,6 @@ "Now that you have a script that can perform the desired tasks, you'll use the general purpose **command** that can run command line actions. This command line action can be directly calling system commands or by running a script. \n", "\n", "Here, you'll create input variables to specify the input data, split ratio, learning rate and registered model name. The command script will:\n", - "* Use the compute created earlier to run this command.\n", "* Use the environment created earlier - you can use the `@latest` notation to indicate the latest version of the environment when the command is run.\n", "* Configure some metadata like display name, experiment name etc. An *experiment* is a container for all the iterations you do on a certain project. All the jobs submitted under the same experiment name would be listed next to each other in Azure ML studio.\n", "* Configure the command line action itself - `python main.py` in this case. The inputs/outputs are accessible in the command via the `${{ ... }}` notation.\n", @@ -502,8 +442,7 @@ " ),\n", " code=\"./src/\", # location of source code\n", " command=\"python main.py --data ${{inputs.data}} --test_train_ratio ${{inputs.test_train_ratio}} --learning_rate ${{inputs.learning_rate}} --registered_model_name ${{inputs.registered_model_name}}\",\n", - " environment=\"aml-scikit-learn@latest\",\n", - " compute=\"cpu-cluster\",\n", + " environment=\"aml-scikit-learn@latest\",\n", " experiment_name=\"train_model_credit_default_prediction\",\n", " display_name=\"credit_default_prediction\",\n", ")" @@ -542,7 +481,7 @@ "![Screenshot that shows the job overview](media/view-job.gif \"View the job in studio\")\n", "\n", "> [!IMPORTANT]\n", - "> Wait until the status of the job is complete before returning to this notebook to continue. The job will take 2 to 3 minutes to run. It could take longer (up to 10 minutes) if the compute cluster has been scaled down to zero nodes and custom environment is still building.\n", + "> Wait until the status of the job is complete before returning to this notebook to continue. The job will take 2 to 3 minutes to run. It could take longer (up to 10 minutes) if the compute has been scaled down to zero nodes and custom environment is still building.\n", "\n" ] }, @@ -822,4 +761,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/tutorials/e2e-ds-experience/e2e-ml-workflow.ipynb b/tutorials/e2e-ds-experience/e2e-ml-workflow.ipynb index 63542f4498..31f640cd72 100644 --- a/tutorials/e2e-ds-experience/e2e-ml-workflow.ipynb +++ b/tutorials/e2e-ds-experience/e2e-ml-workflow.ipynb @@ -84,8 +84,7 @@ "Before creating the pipeline, you'll set up the resources the pipeline will use:\n", "\n", "* The dataset for training\n", - "* The software environment to run the pipeline\n", - "* A compute resource to where the job will run\n", + "* The software environment to run the pipeline\n", "\n", "## Connect to the workspace\n", "\n", @@ -237,69 +236,6 @@ ")" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In the future, you can fetch the same dataset from the workspace using `credit_dataset = ml_client.data.get(\"\", version='')`.\n", - "\n", - "## Create a Compute Resource to run our pipeline\n", - "\n", - "Each step of an Azure ML pipeline can use a different compute resource for running the specific job of that step. It can be single or multi-node machines with Linux or Windows OS, or a specific compute fabric like Spark.\n", - "\n", - "In this section, we provision a Linux [compute cluster](https://docs.microsoft.com/azure/machine-learning/how-to-create-attach-compute-cluster?tabs=python). See the [full list on VM sizes and prices](https://azure.microsoft.com/en-ca/pricing/details/machine-learning/) .\n", - "\n", - "For this tutorial we only need a basic cluster, let's pick a Standard_DS3_v2 model with 2 vCPU cores, 7 GB RAM and create an Azure ML Compute" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "name": "cpu_cluster" - }, - "outputs": [], - "source": [ - "from azure.ai.ml.entities import AmlCompute\n", - "\n", - "cpu_compute_target = \"cpu-cluster\"\n", - "\n", - "try:\n", - " # let's see if the compute target already exists\n", - " cpu_cluster = ml_client.compute.get(cpu_compute_target)\n", - " print(\n", - " f\"You already have a cluster named {cpu_compute_target}, we'll reuse it as is.\"\n", - " )\n", - "\n", - "except Exception:\n", - " print(\"Creating a new cpu compute target...\")\n", - "\n", - " # Let's create the Azure ML compute object with the intended parameters\n", - " cpu_cluster = AmlCompute(\n", - " # Name assigned to the compute cluster\n", - " name=\"cpu-cluster\",\n", - " # Azure ML Compute is the on-demand VM service\n", - " type=\"amlcompute\",\n", - " # VM Family\n", - " size=\"STANDARD_DS3_V2\",\n", - " # Minimum running nodes when there is no job running\n", - " min_instances=0,\n", - " # Nodes in cluster\n", - " max_instances=4,\n", - " # How many seconds will the node running after the job termination\n", - " idle_time_before_scale_down=180,\n", - " # Dedicated or LowPriority. The latter is cheaper but there is a chance of job termination\n", - " tier=\"Dedicated\",\n", - " )\n", - "\n", - " # Now, we pass the object to MLClient's create_or_update method\n", - " cpu_cluster = ml_client.begin_create_or_update(cpu_cluster)\n", - "\n", - "print(\n", - " f\"AMLCompute with name {cpu_cluster.name} is created, the compute size is {cpu_cluster.size}\"\n", - ")" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -889,7 +825,7 @@ "source": [ "The python functions returned by `load_component()` work as any regular python function that we'll use within a pipeline to call each step.\n", "\n", - "To code the pipeline, we use a specific `@dsl.pipeline` decorator that identifies the Azure ML pipelines. In the decorator, we can specify the pipeline description and default resources like compute and storage. Like a python function, pipelines can have inputs, you can then create multiple instances of a single pipeline with different inputs.\n", + "To code the pipeline, we use a specific `@dsl.pipeline` decorator that identifies the Azure ML pipelines. In the decorator, we can specify the pipeline description and default resources like compute (serverless is used here) and storage. Like a python function, pipelines can have inputs, you can then create multiple instances of a single pipeline with different inputs.\n", "\n", "Here, we used *input data*, *split ratio* and *registered model name* as input variables. We then call the components and connect them via their inputs /outputs identifiers. The outputs of each step can be accessed via the `.outputs` property." ] @@ -913,7 +849,7 @@ "\n", "\n", "@dsl.pipeline(\n", - " compute=cpu_compute_target,\n", + " compute=\"serverless\",\n", " description=\"E2E data_perp-train pipeline\",\n", ")\n", "def credit_defaults_pipeline(\n", @@ -1353,4 +1289,4 @@ }, "nbformat": 4, "nbformat_minor": 1 -} \ No newline at end of file +} diff --git a/tutorials/get-started-notebooks/pipeline.ipynb b/tutorials/get-started-notebooks/pipeline.ipynb index 1172935705..a721afd057 100644 --- a/tutorials/get-started-notebooks/pipeline.ipynb +++ b/tutorials/get-started-notebooks/pipeline.ipynb @@ -1,1011 +1,1013 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Tutorial: Create production machine learning pipelines\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The core of a machine learning pipeline is to split a complete machine learning task into a multistep workflow. Each step is a manageable component that can be developed, optimized, configured, and automated individually. Steps are connected through well-defined interfaces. The Azure Machine Learning pipeline service automatically orchestrates all the dependencies between pipeline steps. The benefits of using a pipeline are standardized the MLOps practice, scalable team collaboration, training efficiency and cost reduction. To learn more about the benefits of pipelines, see [What are Azure Machine Learning pipelines](https://learn.microsoft.comazure/machine-learning/concept-ml-pipelines).\n", - "\n", - "In this tutorial, you use Azure Machine Learning to create a production ready machine learning project, using Azure Machine Learning Python SDK v2.\n", - "\n", - "This means you will be able to leverage the AzureML Python SDK to:\n", - "\n", - "- Get a handle to your Azure Machine Learning workspace\n", - "- Create Azure Machine Learning data assets\n", - "- Create reusable Azure Machine Learning components\n", - "- Create, validate and run Azure Machine Learning pipelines\n", - "\n", - "During this tutorial, you create an Azure Machine Learning pipeline to train a model for credit default prediction. The pipeline handles two steps: \n", - "\n", - "1. Data preparation\n", - "1. Training and registering the trained model\n", - "\n", - "The next image shows a simple pipeline as you'll see it in the Azure studio once submitted.\n", - "\n", - "![Screenshot that shows the AML Pipeline](./media/pipeline-overview.jpg \"Overview of the pipeline\")\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Prerequisites\n", - "\n", - "* If you opened this notebook from Azure Machine Learning studio, you need a compute instance to run the code. If you don't have a compute instance, select **Create compute** on the toolbar to first create one. You can use all the default settings. \n", - "\n", - " ![Create compute](./media/create-compute.png)\n", - "\n", - "* If you're seeing this notebook elsewhere, complete [Create resources you need to get started](https://docs.microsoft.com/azure/machine-learning/quickstart-create-resources) to create an Azure Machine Learning workspace and a compute instance.\n", - "\n", - "## Set your kernel\n", - "\n", - "* If your compute instance is stopped, start it now. \n", - " \n", - " ![Start compute](./media/start-compute.png)\n", - "\n", - "* Once your compute instance is running, make sure the that the kernel, found on the top right, is `Python 3.10 - SDK v2`. If not, use the dropdown to select this kernel.\n", - "\n", - " ![Set the kernel](./media/set-kernel.png)\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Set up the pipeline resources\n", - "\n", - "The Azure Machine Learning framework can be used from CLI, Python SDK, or studio interface. In this example, you use the Azure Machine Learning Python SDK v2 to create a pipeline. \n", - "\n", - "Before creating the pipeline, you need the following resources:\n", - "\n", - "* The data asset for training\n", - "* The software environment to run the pipeline\n", - "* A compute resource to where the job runs\n", - "\n", - "## Create handle to workspace\n", - "\n", - "Before we dive in the code, you need a way to reference your workspace. You'll create `ml_client` for a handle to the workspace. You'll then use `ml_client` to manage resources and jobs.\n", - "\n", - "In the next cell, enter your Subscription ID, Resource Group name and Workspace name. To find these values:\n", - "\n", - "1. In the upper right Azure Machine Learning studio toolbar, select your workspace name.\n", - "1. Copy the value for workspace, resource group and subscription ID into the code.\n", - "1. You'll need to copy one value, close the area and paste, then come back for the next one.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "attributes": { - "classes": [ - "Python" - ], - "id": "" - }, - "name": "ml_client" - }, - "outputs": [], - "source": [ - "from azure.ai.ml import MLClient\n", - "from azure.identity import DefaultAzureCredential\n", - "\n", - "# authenticate\n", - "credential = DefaultAzureCredential()\n", - "# # Get a handle to the workspace\n", - "ml_client = MLClient(\n", - " credential=credential,\n", - " subscription_id=\"\",\n", - " resource_group_name=\"\",\n", - " workspace_name=\"\",\n", - ")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "> [!NOTE]\n", - "> Creating MLClient will not connect to the workspace. The client initialization is lazy, it will wait for the first time it needs to make a call (this will happen when creating the `credit_data` data asset, two code cells from here).\n", - "\n", - "## Register data from an external url\n", - "\n", - "If you have been following along with the other tutorials in this series and already registered the data, you can fetch the same dataset from the workspace using `credit_dataset = ml_client.data.get(\"\", version='')`. Then you may skip this section. To learn about data more in depth or if you would rather complete the data tutorial first, see [Upload, access and explore your data in Azure Machine Learning](https://learn.microsoft.com/azure/machine-learning/tutorial-explore-data).\n", - "\n", - "* Azure Machine Learning uses a `Data` object to register a reusable definition of data, and consume data within a pipeline. In the next section, you consume some data from web url as one example. Data from other sources can be created as well. `Data` assets from other sources can be created as well.\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "name": "credit_data" - }, - "outputs": [], - "source": [ - "from azure.ai.ml.entities import Data\n", - "from azure.ai.ml.constants import AssetTypes\n", - "\n", - "web_path = \"https://archive.ics.uci.edu/ml/machine-learning-databases/00350/default%20of%20credit%20card%20clients.xls\"\n", - "\n", - "credit_data = Data(\n", - " name=\"creditcard_defaults\",\n", - " path=web_path,\n", - " type=AssetTypes.URI_FILE,\n", - " description=\"Dataset for credit card defaults\",\n", - " tags={\"source_type\": \"web\", \"source\": \"UCI ML Repo\"},\n", - " version=\"1.0.0\",\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This code just created a `Data` asset, ready to be consumed as an input by the pipeline that you'll define in the next sections. In addition, you can register the data to your workspace so it becomes reusable across pipelines.\n", - "\n", - "Since this is the first time that you're making a call to the workspace, you may be asked to authenticate. Once the authentication is complete, you then see the dataset registration completion message.\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "attributes": { - "classes": [ - "Python" - ], - "id": "" - }, - "name": "update-credit_data" - }, - "outputs": [], - "source": [ - "credit_data = ml_client.data.create_or_update(credit_data)\n", - "print(\n", - " f\"Dataset with name {credit_data.name} was registered to workspace, the dataset version is {credit_data.version}\"\n", - ")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In the future, you can fetch the same dataset from the workspace using `credit_dataset = ml_client.data.get(\"\", version='')`.\n", - "\n", - "## Create a compute resource to run your pipeline\n", - "\n", - "Each step of an Azure Machine Learning pipeline can use a different compute resource for running the specific job of that step. It can be single or multi-node machines with Linux or Windows OS, or a specific compute fabric like Spark.\n", - "\n", - "In this section, you provision a Linux [compute cluster](https://docs.microsoft.com/azure/machine-learning/how-to-create-attach-compute-cluster?tabs=python). See the [full list on VM sizes and prices](https://azure.microsoft.com/en-ca/pricing/details/machine-learning/) .\n", - "\n", - "For this tutorial, you only need a basic cluster so use a Standard_DS3_v2 model with 2 vCPU cores, 7-GB RAM and create an Azure Machine Learning Compute.\n", - "> [!TIP]\n", - "> If you already have a compute cluster, replace \"cpu-cluster\" in the next code block with the name of your cluster. This will keep you from creating another one.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "name": "cpu_cluster" - }, - "outputs": [], - "source": [ - "from azure.ai.ml.entities import AmlCompute\n", - "\n", - "# Name assigned to the compute cluster\n", - "cpu_compute_target = \"cpu-cluster\"\n", - "\n", - "try:\n", - " # let's see if the compute target already exists\n", - " cpu_cluster = ml_client.compute.get(cpu_compute_target)\n", - " print(\n", - " f\"You already have a cluster named {cpu_compute_target}, we'll reuse it as is.\"\n", - " )\n", - "\n", - "except Exception:\n", - " print(\"Creating a new cpu compute target...\")\n", - "\n", - " # Let's create the Azure Machine Learning compute object with the intended parameters\n", - " # if you run into an out of quota error, change the size to a comparable VM that is available.\n", - " # Learn more on https://azure.microsoft.com/en-us/pricing/details/machine-learning/.\n", - " cpu_cluster = AmlCompute(\n", - " name=cpu_compute_target,\n", - " # Azure Machine Learning Compute is the on-demand VM service\n", - " type=\"amlcompute\",\n", - " # VM Family\n", - " size=\"STANDARD_DS3_V2\",\n", - " # Minimum running nodes when there is no job running\n", - " min_instances=0,\n", - " # Nodes in cluster\n", - " max_instances=4,\n", - " # How many seconds will the node running after the job termination\n", - " idle_time_before_scale_down=180,\n", - " # Dedicated or LowPriority. The latter is cheaper but there is a chance of job termination\n", - " tier=\"Dedicated\",\n", - " )\n", - " print(\n", - " f\"AMLCompute with name {cpu_cluster.name} will be created, with compute size {cpu_cluster.size}\"\n", - " )\n", - " # Now, we pass the object to MLClient's create_or_update method\n", - " cpu_cluster = ml_client.compute.begin_create_or_update(cpu_cluster)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Create a job environment for pipeline steps\n", - "\n", - "So far, you've created a development environment on the compute instance, your development machine. You also need an environment to use for each step of the pipeline. Each step can have its own environment, or you can use some common environments for multiple steps.\n", - "\n", - "In this example, you create a conda environment for your jobs, using a conda yaml file.\n", - "First, create a directory to store the file in." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "name": "dependencies_dir" - }, - "outputs": [], - "source": [ - "import os\n", - "\n", - "dependencies_dir = \"./dependencies\"\n", - "os.makedirs(dependencies_dir, exist_ok=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now, create the file in the dependencies directory." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "attributes": { - "classes": [ - "Python" - ], - "id": "" - }, - "name": "conda.yaml" - }, - "outputs": [], - "source": [ - "%%writefile {dependencies_dir}/conda.yaml\n", - "name: model-env\n", - "channels:\n", - " - conda-forge\n", - "dependencies:\n", - " - python=3.8\n", - " - numpy=1.21.2\n", - " - pip=21.2.4\n", - " - scikit-learn=0.24.2\n", - " - scipy=1.7.1\n", - " - pandas>=1.1,<1.2\n", - " - pip:\n", - " - inference-schema[numpy-support]==1.3.0\n", - " - xlrd==2.0.1\n", - " - mlflow== 1.26.1\n", - " - azureml-mlflow==1.42.0" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The specification contains some usual packages, that you use in your pipeline (numpy, pip), together with some Azure Machine Learning specific packages (azureml-mlflow).\n", - "\n", - "The Azure Machine Learning packages aren't mandatory to run Azure Machine Learning jobs. However, adding these packages let you interact with Azure Machine Learning for logging metrics and registering models, all inside the Azure Machine Learning job. You use them in the training script later in this tutorial.\n", - "\n", - "Use the *yaml* file to create and register this custom environment in your workspace:\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "attributes": { - "classes": [ - "Python" - ], - "id": "" - }, - "name": "custom_env_name" - }, - "outputs": [], - "source": [ - "from azure.ai.ml.entities import Environment\n", - "\n", - "custom_env_name = \"aml-scikit-learn\"\n", - "\n", - "pipeline_job_env = Environment(\n", - " name=custom_env_name,\n", - " description=\"Custom environment for Credit Card Defaults pipeline\",\n", - " tags={\"scikit-learn\": \"0.24.2\"},\n", - " conda_file=os.path.join(dependencies_dir, \"conda.yaml\"),\n", - " image=\"mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest\",\n", - " version=\"0.1.0\",\n", - ")\n", - "pipeline_job_env = ml_client.environments.create_or_update(pipeline_job_env)\n", - "\n", - "print(\n", - " f\"Environment with name {pipeline_job_env.name} is registered to workspace, the environment version is {pipeline_job_env.version}\"\n", - ")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Build the training pipeline\n", - "\n", - "Now that you have all assets required to run your pipeline, it's time to build the pipeline itself.\n", - "\n", - "Azure Machine Learning pipelines are reusable ML workflows that usually consist of several components. The typical life of a component is:\n", - "\n", - "- Write the yaml specification of the component, or create it programmatically using `ComponentMethod`.\n", - "- Optionally, register the component with a name and version in your workspace, to make it reusable and shareable.\n", - "- Load that component from the pipeline code.\n", - "- Implement the pipeline using the component's inputs, outputs and parameters.\n", - "- Submit the pipeline.\n", - "\n", - "There are two ways to create a component, programmatic and yaml definition. The next two sections walk you through creating a component both ways. You can either create the two components trying both options or pick your preferred method.\n", - "\n", - "> [!NOTE]\n", - "> In this tutorial for simplicity we are using the same compute for all components. However, you can set different computes for each component, for example by adding a line like `train_step.compute = \"cpu-cluster\"`. To view an example of building a pipeline with different computes for each component, see the [Basic pipeline job section in the cifar-10 pipeline tutorial](https://github.com/Azure/azureml-examples/blob/main/sdk/python/jobs/pipelines/2b_train_cifar_10_with_pytorch/train_cifar_10_with_pytorch.ipynb).\n", - "\n", - "### Create component 1: data prep (using programmatic definition)\n", - "\n", - "Let's start by creating the first component. This component handles the preprocessing of the data. The preprocessing task is performed in the *data_prep.py* Python file.\n", - "\n", - "First create a source folder for the data_prep component:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "attributes": { - "classes": [ - "Python" - ], - "id": "" - }, - "name": "data_prep_src_dir" - }, - "outputs": [], - "source": [ - "import os\n", - "\n", - "data_prep_src_dir = \"./components/data_prep\"\n", - "os.makedirs(data_prep_src_dir, exist_ok=True)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This script performs the simple task of splitting the data into train and test datasets. Azure Machine Learning mounts datasets as folders to the computes, therefore, we created an auxiliary `select_first_file` function to access the data file inside the mounted input folder. \n", - "\n", - "[MLFlow](https://learn.microsoft.com/articles/machine-learning/concept-mlflow) is used to log the parameters and metrics during our pipeline run." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "attributes": { - "classes": [ - "Python" - ], - "id": "" - }, - "name": "def-main" - }, - "outputs": [], - "source": [ - "%%writefile {data_prep_src_dir}/data_prep.py\n", - "import os\n", - "import argparse\n", - "import pandas as pd\n", - "from sklearn.model_selection import train_test_split\n", - "import logging\n", - "import mlflow\n", - "\n", - "\n", - "def main():\n", - " \"\"\"Main function of the script.\"\"\"\n", - "\n", - " # input and output arguments\n", - " parser = argparse.ArgumentParser()\n", - " parser.add_argument(\"--data\", type=str, help=\"path to input data\")\n", - " parser.add_argument(\"--test_train_ratio\", type=float, required=False, default=0.25)\n", - " parser.add_argument(\"--train_data\", type=str, help=\"path to train data\")\n", - " parser.add_argument(\"--test_data\", type=str, help=\"path to test data\")\n", - " args = parser.parse_args()\n", - "\n", - " # Start Logging\n", - " mlflow.start_run()\n", - "\n", - " print(\" \".join(f\"{k}={v}\" for k, v in vars(args).items()))\n", - "\n", - " print(\"input data:\", args.data)\n", - "\n", - " credit_df = pd.read_excel(args.data, header=1, index_col=0)\n", - "\n", - " mlflow.log_metric(\"num_samples\", credit_df.shape[0])\n", - " mlflow.log_metric(\"num_features\", credit_df.shape[1] - 1)\n", - "\n", - " credit_train_df, credit_test_df = train_test_split(\n", - " credit_df,\n", - " test_size=args.test_train_ratio,\n", - " )\n", - "\n", - " # output paths are mounted as folder, therefore, we are adding a filename to the path\n", - " credit_train_df.to_csv(os.path.join(args.train_data, \"data.csv\"), index=False)\n", - "\n", - " credit_test_df.to_csv(os.path.join(args.test_data, \"data.csv\"), index=False)\n", - "\n", - " # Stop Logging\n", - " mlflow.end_run()\n", - "\n", - "\n", - "if __name__ == \"__main__\":\n", - " main()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now that you have a script that can perform the desired task, create an Azure Machine Learning Component from it.\n", - "\n", - "Use the general purpose `CommandComponent` that can run command line actions. This command line action can directly call system commands or run a script. The inputs/outputs are specified on the command line via the `${{ ... }}` notation.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "name": "data_prep_component" - }, - "outputs": [], - "source": [ - "from azure.ai.ml import command\n", - "from azure.ai.ml import Input, Output\n", - "\n", - "data_prep_component = command(\n", - " name=\"data_prep_credit_defaults\",\n", - " display_name=\"Data preparation for training\",\n", - " description=\"reads a .xl input, split the input to train and test\",\n", - " inputs={\n", - " \"data\": Input(type=\"uri_folder\"),\n", - " \"test_train_ratio\": Input(type=\"number\"),\n", - " },\n", - " outputs=dict(\n", - " train_data=Output(type=\"uri_folder\", mode=\"rw_mount\"),\n", - " test_data=Output(type=\"uri_folder\", mode=\"rw_mount\"),\n", - " ),\n", - " # The source folder of the component\n", - " code=data_prep_src_dir,\n", - " command=\"\"\"python data_prep.py \\\n", - " --data ${{inputs.data}} --test_train_ratio ${{inputs.test_train_ratio}} \\\n", - " --train_data ${{outputs.train_data}} --test_data ${{outputs.test_data}} \\\n", - " \"\"\",\n", - " environment=f\"{pipeline_job_env.name}:{pipeline_job_env.version}\",\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "nteract": { - "transient": { - "deleting": false - } - } - }, - "source": [ - "Optionally, register the component in the workspace for future reuse.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Now we register the component to the workspace\n", - "data_prep_component = ml_client.create_or_update(data_prep_component.component)\n", - "\n", - "# Create (register) the component in your workspace\n", - "print(\n", - " f\"Component {data_prep_component.name} with Version {data_prep_component.version} is registered\"\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create component 2: training (using yaml definition)\n", - "\n", - "The second component that you create consumes the training and test data, train a tree based model and return the output model. Use Azure Machine Learning logging capabilities to record and visualize the learning progress.\n", - "\n", - "You used the `CommandComponent` class to create your first component. This time you use the yaml definition to define the second component. Each method has its own advantages. A yaml definition can actually be checked-in along the code, and would provide a readable history tracking. The programmatic method using `CommandComponent` can be easier with built-in class documentation and code completion.\n", - "\n", - "Create the directory for this component:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "attributes": { - "classes": [ - "Python" - ], - "id": "" - }, - "name": "train_src_dir" - }, - "outputs": [], - "source": [ - "import os\n", - "\n", - "train_src_dir = \"./components/train\"\n", - "os.makedirs(train_src_dir, exist_ok=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Create the training script in the directory:\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "attributes": { - "classes": [ - "Python" - ], - "id": "" - }, - "name": "train.py" - }, - "outputs": [], - "source": [ - "%%writefile {train_src_dir}/train.py\n", - "import argparse\n", - "from sklearn.ensemble import GradientBoostingClassifier\n", - "from sklearn.metrics import classification_report\n", - "import os\n", - "import pandas as pd\n", - "import mlflow\n", - "\n", - "\n", - "def select_first_file(path):\n", - " \"\"\"Selects first file in folder, use under assumption there is only one file in folder\n", - " Args:\n", - " path (str): path to directory or file to choose\n", - " Returns:\n", - " str: full path of selected file\n", - " \"\"\"\n", - " files = os.listdir(path)\n", - " return os.path.join(path, files[0])\n", - "\n", - "\n", - "# Start Logging\n", - "mlflow.start_run()\n", - "\n", - "# enable autologging\n", - "mlflow.sklearn.autolog()\n", - "\n", - "os.makedirs(\"./outputs\", exist_ok=True)\n", - "\n", - "\n", - "def main():\n", - " \"\"\"Main function of the script.\"\"\"\n", - "\n", - " # input and output arguments\n", - " parser = argparse.ArgumentParser()\n", - " parser.add_argument(\"--train_data\", type=str, help=\"path to train data\")\n", - " parser.add_argument(\"--test_data\", type=str, help=\"path to test data\")\n", - " parser.add_argument(\"--n_estimators\", required=False, default=100, type=int)\n", - " parser.add_argument(\"--learning_rate\", required=False, default=0.1, type=float)\n", - " parser.add_argument(\"--registered_model_name\", type=str, help=\"model name\")\n", - " parser.add_argument(\"--model\", type=str, help=\"path to model file\")\n", - " args = parser.parse_args()\n", - "\n", - " # paths are mounted as folder, therefore, we are selecting the file from folder\n", - " train_df = pd.read_csv(select_first_file(args.train_data))\n", - "\n", - " # Extracting the label column\n", - " y_train = train_df.pop(\"default payment next month\")\n", - "\n", - " # convert the dataframe values to array\n", - " X_train = train_df.values\n", - "\n", - " # paths are mounted as folder, therefore, we are selecting the file from folder\n", - " test_df = pd.read_csv(select_first_file(args.test_data))\n", - "\n", - " # Extracting the label column\n", - " y_test = test_df.pop(\"default payment next month\")\n", - "\n", - " # convert the dataframe values to array\n", - " X_test = test_df.values\n", - "\n", - " print(f\"Training with data of shape {X_train.shape}\")\n", - "\n", - " clf = GradientBoostingClassifier(\n", - " n_estimators=args.n_estimators, learning_rate=args.learning_rate\n", - " )\n", - " clf.fit(X_train, y_train)\n", - "\n", - " y_pred = clf.predict(X_test)\n", - "\n", - " print(classification_report(y_test, y_pred))\n", - "\n", - " # Registering the model to the workspace\n", - " print(\"Registering the model via MLFlow\")\n", - " mlflow.sklearn.log_model(\n", - " sk_model=clf,\n", - " registered_model_name=args.registered_model_name,\n", - " artifact_path=args.registered_model_name,\n", - " )\n", - "\n", - " # Saving the model to a file\n", - " mlflow.sklearn.save_model(\n", - " sk_model=clf,\n", - " path=os.path.join(args.model, \"trained_model\"),\n", - " )\n", - "\n", - " # Stop Logging\n", - " mlflow.end_run()\n", - "\n", - "\n", - "if __name__ == \"__main__\":\n", - " main()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As you can see in this training script, once the model is trained, the model file is saved and registered to the workspace. Now you can use the registered model in inferencing endpoints.\n", - "\n", - "For the environment of this step, you use one of the built-in (curated) Azure Machine Learning environments. The tag `azureml`, tells the system to use look for the name in curated environments.\n", - "First, create the *yaml* file describing the component:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "attributes": { - "classes": [ - "Python" - ], - "id": "" - }, - "name": "train.yml" - }, - "outputs": [], - "source": [ - "%%writefile {train_src_dir}/train.yml\n", - "# \n", - "name: train_credit_defaults_model\n", - "display_name: Train Credit Defaults Model\n", - "# version: 1 # Not specifying a version will automatically update the version\n", - "type: command\n", - "inputs:\n", - " train_data: \n", - " type: uri_folder\n", - " test_data: \n", - " type: uri_folder\n", - " learning_rate:\n", - " type: number \n", - " registered_model_name:\n", - " type: string\n", - "outputs:\n", - " model:\n", - " type: uri_folder\n", - "code: .\n", - "environment:\n", - " # for this step, we'll use an AzureML curate environment\n", - " azureml:AzureML-sklearn-1.0-ubuntu20.04-py38-cpu:1\n", - "command: >-\n", - " python train.py \n", - " --train_data ${{inputs.train_data}} \n", - " --test_data ${{inputs.test_data}} \n", - " --learning_rate ${{inputs.learning_rate}}\n", - " --registered_model_name ${{inputs.registered_model_name}} \n", - " --model ${{outputs.model}}\n", - "# \n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now create and register the component. Registering it allows you to re-use it in other pipelines. Also, anyone else with access to your workspace can use the registered component." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "attributes": { - "classes": [ - "Python" - ], - "id": "" - }, - "name": "train_component" - }, - "outputs": [], - "source": [ - "# importing the Component Package\n", - "from azure.ai.ml import load_component\n", - "\n", - "# Loading the component from the yml file\n", - "train_component = load_component(source=os.path.join(train_src_dir, \"train.yml\"))\n", - "\n", - "# Now we register the component to the workspace\n", - "train_component = ml_client.create_or_update(train_component)\n", - "\n", - "# Create (register) the component in your workspace\n", - "print(\n", - " f\"Component {train_component.name} with Version {train_component.version} is registered\"\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create the pipeline from components\n", - "\n", - "Now that both your components are defined and registered, you can start implementing the pipeline.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Here, you use *input data*, *split ratio* and *registered model name* as input variables. Then call the components and connect them via their inputs/outputs identifiers. The outputs of each step can be accessed via the `.outputs` property.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "attributes": { - "classes": [ - "Python" - ], - "id": "" - } - }, - "source": [ - "The Python functions returned by `load_component()` work as any regular Python function that we use within a pipeline to call each step.\n", - "\n", - "To code the pipeline, you use a specific `@dsl.pipeline` decorator that identifies the Azure Machine Learning pipelines. In the decorator, we can specify the pipeline description and default resources like compute and storage. Like a Python function, pipelines can have inputs. You can then create multiple instances of a single pipeline with different inputs.\n", - "\n", - "Here, we used *input data*, *split ratio* and *registered model name* as input variables. We then call the components and connect them via their inputs/outputs identifiers. The outputs of each step can be accessed via the `.outputs` property." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "attributes": { - "classes": [ - "Python" - ], - "id": "" - }, - "name": "pipeline" - }, - "outputs": [], - "source": [ - "# the dsl decorator tells the sdk that we are defining an Azure Machine Learning pipeline\n", - "from azure.ai.ml import dsl, Input, Output\n", - "\n", - "\n", - "@dsl.pipeline(\n", - " compute=cpu_compute_target,\n", - " description=\"E2E data_perp-train pipeline\",\n", - ")\n", - "def credit_defaults_pipeline(\n", - " pipeline_job_data_input,\n", - " pipeline_job_test_train_ratio,\n", - " pipeline_job_learning_rate,\n", - " pipeline_job_registered_model_name,\n", - "):\n", - " # using data_prep_function like a python call with its own inputs\n", - " data_prep_job = data_prep_component(\n", - " data=pipeline_job_data_input,\n", - " test_train_ratio=pipeline_job_test_train_ratio,\n", - " )\n", - "\n", - " # using train_func like a python call with its own inputs\n", - " train_job = train_component(\n", - " train_data=data_prep_job.outputs.train_data, # note: using outputs from previous step\n", - " test_data=data_prep_job.outputs.test_data, # note: using outputs from previous step\n", - " learning_rate=pipeline_job_learning_rate, # note: using a pipeline input as parameter\n", - " registered_model_name=pipeline_job_registered_model_name,\n", - " )\n", - "\n", - " # a pipeline returns a dictionary of outputs\n", - " # keys will code for the pipeline output identifier\n", - " return {\n", - " \"pipeline_job_train_data\": data_prep_job.outputs.train_data,\n", - " \"pipeline_job_test_data\": data_prep_job.outputs.test_data,\n", - " }" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now use your pipeline definition to instantiate a pipeline with your dataset, split rate of choice and the name you picked for your model." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "attributes": { - "classes": [ - "Python" - ], - "id": "" - }, - "name": "registered_model_name" - }, - "outputs": [], - "source": [ - "registered_model_name = \"credit_defaults_model\"\n", - "\n", - "# Let's instantiate the pipeline with the parameters of our choice\n", - "pipeline = credit_defaults_pipeline(\n", - " pipeline_job_data_input=Input(type=\"uri_file\", path=credit_data.path),\n", - " pipeline_job_test_train_ratio=0.25,\n", - " pipeline_job_learning_rate=0.05,\n", - " pipeline_job_registered_model_name=registered_model_name,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Submit the job \n", - "\n", - "It's now time to submit the job to run in Azure Machine Learning. This time you use `create_or_update` on `ml_client.jobs`.\n", - "\n", - "Here you also pass an experiment name. An experiment is a container for all the iterations one does on a certain project. All the jobs submitted under the same experiment name would be listed next to each other in Azure Machine Learning studio.\n", - "\n", - "Once completed, the pipeline registers a model in your workspace as a result of training." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "name": "returned_job" - }, - "outputs": [], - "source": [ - "# submit the pipeline job\n", - "pipeline_job = ml_client.jobs.create_or_update(\n", - " pipeline,\n", - " # Project's name\n", - " experiment_name=\"e2e_registered_components\",\n", - ")\n", - "ml_client.jobs.stream(pipeline_job.name)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You can track the progress of your pipeline, by using the link generated in the previous cell. When you first select this link, you may see that the pipeline is still running. Once it's complete, you can examine each component's results.\n", - "\n", - "Double-click the **Train Credit Defaults Model** component. \n", - "\n", - "There are two important results you'll want to see about training:\n", - "\n", - "* View your logs:\n", - " 1. Select the **Outputs+logs** tab.\n", - " 1. Open the folders to `user_logs` > `std_log.txt`\n", - " This section shows the script run stdout.\n", - " ![Screenshot of std_log.txt.](media/user-logs.jpg)\n", - "\n", - "* View your metrics: Select the **Metrics** tab. This section shows different logged metrics. In this example. mlflow `autologging`, has automatically logged the training metrics.\n", - " \n", - " ![Screenshot shows logged metrics.txt.](./media/metrics.jpg)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Deploy the model as an online endpoint\n", - "To learn how to deploy your model to an online endpoint, see [Deploy a model as an online endpoint tutorial](https://learn.microsoft.com/en-us/azure/machine-learning/tutorial-deploy-model).\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Next Steps\n", - "\n", - "Learn how to [Schedule machine learning pipeline jobs](https://learn.microsoft.com/azure/machine-learning/how-to-schedule-pipeline-job)" - ] - } - ], - "metadata": { - "description": { - "description": "Create production ML pipelines with Python SDK v2 in a Jupyter notebook" - }, - "kernel_info": { - "name": "python310-sdkv2" - }, - "kernelspec": { - "display_name": "Python 3.10 - SDK v2", - "language": "python", - "name": "python310-sdkv2" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.6" - }, - "nteract": { - "version": "nteract-front-end@1.0.0" - } + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Tutorial: Create production machine learning pipelines\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The core of a machine learning pipeline is to split a complete machine learning task into a multistep workflow. Each step is a manageable component that can be developed, optimized, configured, and automated individually. Steps are connected through well-defined interfaces. The Azure Machine Learning pipeline service automatically orchestrates all the dependencies between pipeline steps. The benefits of using a pipeline are standardized the MLOps practice, scalable team collaboration, training efficiency and cost reduction. To learn more about the benefits of pipelines, see [What are Azure Machine Learning pipelines](https://learn.microsoft.comazure/machine-learning/concept-ml-pipelines).\n", + "\n", + "In this tutorial, you use Azure Machine Learning to create a production ready machine learning project, using Azure Machine Learning Python SDK v2.\n", + "\n", + "This means you will be able to leverage the AzureML Python SDK to:\n", + "\n", + "- Get a handle to your Azure Machine Learning workspace\n", + "- Create Azure Machine Learning data assets\n", + "- Create reusable Azure Machine Learning components\n", + "- Create, validate and run Azure Machine Learning pipelines\n", + "\n", + "During this tutorial, you create an Azure Machine Learning pipeline to train a model for credit default prediction. The pipeline handles two steps: \n", + "\n", + "1. Data preparation\n", + "1. Training and registering the trained model\n", + "\n", + "The next image shows a simple pipeline as you'll see it in the Azure studio once submitted.\n", + "\n", + "![Screenshot that shows the AML Pipeline](./media/pipeline-overview.jpg \"Overview of the pipeline\")\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prerequisites\n", + "\n", + "* If you opened this notebook from Azure Machine Learning studio, you need a compute instance to run the code. If you don't have a compute instance, select **Create compute** on the toolbar to first create one. You can use all the default settings. \n", + "\n", + " ![Create compute](./media/create-compute.png)\n", + "\n", + "* If you're seeing this notebook elsewhere, complete [Create resources you need to get started](https://docs.microsoft.com/azure/machine-learning/quickstart-create-resources) to create an Azure Machine Learning workspace and a compute instance.\n", + "\n", + "## Set your kernel\n", + "\n", + "* If your compute instance is stopped, start it now. \n", + " \n", + " ![Start compute](./media/start-compute.png)\n", + "\n", + "* Once your compute instance is running, make sure the that the kernel, found on the top right, is `Python 3.10 - SDK v2`. If not, use the dropdown to select this kernel.\n", + "\n", + " ![Set the kernel](./media/set-kernel.png)\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Set up the pipeline resources\n", + "\n", + "The Azure Machine Learning framework can be used from CLI, Python SDK, or studio interface. In this example, you use the Azure Machine Learning Python SDK v2 to create a pipeline. \n", + "\n", + "Before creating the pipeline, you need the following resources:\n", + "\n", + "* The data asset for training\n", + "* The software environment to run the pipeline\n", + "* A compute resource to where the job runs\n", + "\n", + "## Create handle to workspace\n", + "\n", + "Before we dive in the code, you need a way to reference your workspace. You'll create `ml_client` for a handle to the workspace. You'll then use `ml_client` to manage resources and jobs.\n", + "\n", + "In the next cell, enter your Subscription ID, Resource Group name and Workspace name. To find these values:\n", + "\n", + "1. In the upper right Azure Machine Learning studio toolbar, select your workspace name.\n", + "1. Copy the value for workspace, resource group and subscription ID into the code.\n", + "1. You'll need to copy one value, close the area and paste, then come back for the next one.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "attributes": { + "classes": [ + "Python" + ], + "id": "" }, - "nbformat": 4, - "nbformat_minor": 1 + "name": "ml_client" + }, + "outputs": [], + "source": [ + "from azure.ai.ml import MLClient\n", + "from azure.identity import DefaultAzureCredential\n", + "\n", + "# authenticate\n", + "credential = DefaultAzureCredential()\n", + "# # Get a handle to the workspace\n", + "ml_client = MLClient(\n", + " credential=credential,\n", + " subscription_id=\"\",\n", + " resource_group_name=\"\",\n", + " workspace_name=\"\",\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "> [!NOTE]\n", + "> Creating MLClient will not connect to the workspace. The client initialization is lazy, it will wait for the first time it needs to make a call (this will happen when creating the `credit_data` data asset, two code cells from here).\n", + "\n", + "## Register data from an external url\n", + "\n", + "If you have been following along with the other tutorials in this series and already registered the data, you can fetch the same dataset from the workspace using `credit_dataset = ml_client.data.get(\"\", version='')`. Then you may skip this section. To learn about data more in depth or if you would rather complete the data tutorial first, see [Upload, access and explore your data in Azure Machine Learning](https://learn.microsoft.com/azure/machine-learning/tutorial-explore-data).\n", + "\n", + "* Azure Machine Learning uses a `Data` object to register a reusable definition of data, and consume data within a pipeline. In the next section, you consume some data from web url as one example. Data from other sources can be created as well. `Data` assets from other sources can be created as well.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "name": "credit_data" + }, + "outputs": [], + "source": [ + "from azure.ai.ml.entities import Data\n", + "from azure.ai.ml.constants import AssetTypes\n", + "\n", + "web_path = \"https://archive.ics.uci.edu/ml/machine-learning-databases/00350/default%20of%20credit%20card%20clients.xls\"\n", + "\n", + "credit_data = Data(\n", + " name=\"creditcard_defaults\",\n", + " path=web_path,\n", + " type=AssetTypes.URI_FILE,\n", + " description=\"Dataset for credit card defaults\",\n", + " tags={\"source_type\": \"web\", \"source\": \"UCI ML Repo\"},\n", + " version=\"1.0.0\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This code just created a `Data` asset, ready to be consumed as an input by the pipeline that you'll define in the next sections. In addition, you can register the data to your workspace so it becomes reusable across pipelines.\n", + "\n", + "Since this is the first time that you're making a call to the workspace, you may be asked to authenticate. Once the authentication is complete, you then see the dataset registration completion message.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "attributes": { + "classes": [ + "Python" + ], + "id": "" + }, + "name": "update-credit_data" + }, + "outputs": [], + "source": [ + "credit_data = ml_client.data.create_or_update(credit_data)\n", + "print(\n", + " f\"Dataset with name {credit_data.name} was registered to workspace, the dataset version is {credit_data.version}\"\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In the future, you can fetch the same dataset from the workspace using `credit_dataset = ml_client.data.get(\"\", version='')`.\n", + "\n", + "## Create a compute resource to run your pipeline\n", + "\n", + "You can **skip this step** if you want to use **serverless compute (preview)** to run the training job. Through serverless compute, Azure Machine Learning takes care of creating, scaling, deleting, patching and managing compute, along with providing managed network isolation, reducing the burden on you. \n", + "\n", + "Each step of an Azure Machine Learning pipeline can use a different compute resource for running the specific job of that step. It can be single or multi-node machines with Linux or Windows OS, or a specific compute fabric like Spark.\n", + "\n", + "In this section, you provision a Linux [compute cluster](https://docs.microsoft.com/azure/machine-learning/how-to-create-attach-compute-cluster?tabs=python). See the [full list on VM sizes and prices](https://azure.microsoft.com/en-ca/pricing/details/machine-learning/) .\n", + "\n", + "For this tutorial, you only need a basic cluster so use a Standard_DS3_v2 model with 2 vCPU cores, 7-GB RAM and create an Azure Machine Learning Compute.\n", + "> [!TIP]\n", + "> If you already have a compute cluster, replace \"cpu-cluster\" in the next code block with the name of your cluster. This will keep you from creating another one.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "name": "cpu_cluster" + }, + "outputs": [], + "source": [ + "from azure.ai.ml.entities import AmlCompute\n", + "\n", + "# Name assigned to the compute cluster\n", + "cpu_compute_target = \"cpu-cluster\"\n", + "\n", + "try:\n", + " # let's see if the compute target already exists\n", + " cpu_cluster = ml_client.compute.get(cpu_compute_target)\n", + " print(\n", + " f\"You already have a cluster named {cpu_compute_target}, we'll reuse it as is.\"\n", + " )\n", + "\n", + "except Exception:\n", + " print(\"Creating a new cpu compute target...\")\n", + "\n", + " # Let's create the Azure Machine Learning compute object with the intended parameters\n", + " # if you run into an out of quota error, change the size to a comparable VM that is available.\n", + " # Learn more on https://azure.microsoft.com/en-us/pricing/details/machine-learning/.\n", + " cpu_cluster = AmlCompute(\n", + " name=cpu_compute_target,\n", + " # Azure Machine Learning Compute is the on-demand VM service\n", + " type=\"amlcompute\",\n", + " # VM Family\n", + " size=\"STANDARD_DS3_V2\",\n", + " # Minimum running nodes when there is no job running\n", + " min_instances=0,\n", + " # Nodes in cluster\n", + " max_instances=4,\n", + " # How many seconds will the node running after the job termination\n", + " idle_time_before_scale_down=180,\n", + " # Dedicated or LowPriority. The latter is cheaper but there is a chance of job termination\n", + " tier=\"Dedicated\",\n", + " )\n", + " print(\n", + " f\"AMLCompute with name {cpu_cluster.name} will be created, with compute size {cpu_cluster.size}\"\n", + " )\n", + " # Now, we pass the object to MLClient's create_or_update method\n", + " cpu_cluster = ml_client.compute.begin_create_or_update(cpu_cluster)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create a job environment for pipeline steps\n", + "\n", + "So far, you've created a development environment on the compute instance, your development machine. You also need an environment to use for each step of the pipeline. Each step can have its own environment, or you can use some common environments for multiple steps.\n", + "\n", + "In this example, you create a conda environment for your jobs, using a conda yaml file.\n", + "First, create a directory to store the file in." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "name": "dependencies_dir" + }, + "outputs": [], + "source": [ + "import os\n", + "\n", + "dependencies_dir = \"./dependencies\"\n", + "os.makedirs(dependencies_dir, exist_ok=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now, create the file in the dependencies directory." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "attributes": { + "classes": [ + "Python" + ], + "id": "" + }, + "name": "conda.yaml" + }, + "outputs": [], + "source": [ + "%%writefile {dependencies_dir}/conda.yaml\n", + "name: model-env\n", + "channels:\n", + " - conda-forge\n", + "dependencies:\n", + " - python=3.8\n", + " - numpy=1.21.2\n", + " - pip=21.2.4\n", + " - scikit-learn=0.24.2\n", + " - scipy=1.7.1\n", + " - pandas>=1.1,<1.2\n", + " - pip:\n", + " - inference-schema[numpy-support]==1.3.0\n", + " - xlrd==2.0.1\n", + " - mlflow== 1.26.1\n", + " - azureml-mlflow==1.42.0" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The specification contains some usual packages, that you use in your pipeline (numpy, pip), together with some Azure Machine Learning specific packages (azureml-mlflow).\n", + "\n", + "The Azure Machine Learning packages aren't mandatory to run Azure Machine Learning jobs. However, adding these packages let you interact with Azure Machine Learning for logging metrics and registering models, all inside the Azure Machine Learning job. You use them in the training script later in this tutorial.\n", + "\n", + "Use the *yaml* file to create and register this custom environment in your workspace:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "attributes": { + "classes": [ + "Python" + ], + "id": "" + }, + "name": "custom_env_name" + }, + "outputs": [], + "source": [ + "from azure.ai.ml.entities import Environment\n", + "\n", + "custom_env_name = \"aml-scikit-learn\"\n", + "\n", + "pipeline_job_env = Environment(\n", + " name=custom_env_name,\n", + " description=\"Custom environment for Credit Card Defaults pipeline\",\n", + " tags={\"scikit-learn\": \"0.24.2\"},\n", + " conda_file=os.path.join(dependencies_dir, \"conda.yaml\"),\n", + " image=\"mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest\",\n", + " version=\"0.1.0\",\n", + ")\n", + "pipeline_job_env = ml_client.environments.create_or_update(pipeline_job_env)\n", + "\n", + "print(\n", + " f\"Environment with name {pipeline_job_env.name} is registered to workspace, the environment version is {pipeline_job_env.version}\"\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Build the training pipeline\n", + "\n", + "Now that you have all assets required to run your pipeline, it's time to build the pipeline itself.\n", + "\n", + "Azure Machine Learning pipelines are reusable ML workflows that usually consist of several components. The typical life of a component is:\n", + "\n", + "- Write the yaml specification of the component, or create it programmatically using `ComponentMethod`.\n", + "- Optionally, register the component with a name and version in your workspace, to make it reusable and shareable.\n", + "- Load that component from the pipeline code.\n", + "- Implement the pipeline using the component's inputs, outputs and parameters.\n", + "- Submit the pipeline.\n", + "\n", + "There are two ways to create a component, programmatic and yaml definition. The next two sections walk you through creating a component both ways. You can either create the two components trying both options or pick your preferred method.\n", + "\n", + "> [!NOTE]\n", + "> In this tutorial for simplicity we are using the same compute for all components. However, you can set different computes for each component, for example by adding a line like `train_step.compute = \"cpu-cluster\"`. To view an example of building a pipeline with different computes for each component, see the [Basic pipeline job section in the cifar-10 pipeline tutorial](https://github.com/Azure/azureml-examples/blob/main/sdk/python/jobs/pipelines/2b_train_cifar_10_with_pytorch/train_cifar_10_with_pytorch.ipynb).\n", + "\n", + "### Create component 1: data prep (using programmatic definition)\n", + "\n", + "Let's start by creating the first component. This component handles the preprocessing of the data. The preprocessing task is performed in the *data_prep.py* Python file.\n", + "\n", + "First create a source folder for the data_prep component:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "attributes": { + "classes": [ + "Python" + ], + "id": "" + }, + "name": "data_prep_src_dir" + }, + "outputs": [], + "source": [ + "import os\n", + "\n", + "data_prep_src_dir = \"./components/data_prep\"\n", + "os.makedirs(data_prep_src_dir, exist_ok=True)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This script performs the simple task of splitting the data into train and test datasets. Azure Machine Learning mounts datasets as folders to the computes, therefore, we created an auxiliary `select_first_file` function to access the data file inside the mounted input folder. \n", + "\n", + "[MLFlow](https://learn.microsoft.com/articles/machine-learning/concept-mlflow) is used to log the parameters and metrics during our pipeline run." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "attributes": { + "classes": [ + "Python" + ], + "id": "" + }, + "name": "def-main" + }, + "outputs": [], + "source": [ + "%%writefile {data_prep_src_dir}/data_prep.py\n", + "import os\n", + "import argparse\n", + "import pandas as pd\n", + "from sklearn.model_selection import train_test_split\n", + "import logging\n", + "import mlflow\n", + "\n", + "\n", + "def main():\n", + " \"\"\"Main function of the script.\"\"\"\n", + "\n", + " # input and output arguments\n", + " parser = argparse.ArgumentParser()\n", + " parser.add_argument(\"--data\", type=str, help=\"path to input data\")\n", + " parser.add_argument(\"--test_train_ratio\", type=float, required=False, default=0.25)\n", + " parser.add_argument(\"--train_data\", type=str, help=\"path to train data\")\n", + " parser.add_argument(\"--test_data\", type=str, help=\"path to test data\")\n", + " args = parser.parse_args()\n", + "\n", + " # Start Logging\n", + " mlflow.start_run()\n", + "\n", + " print(\" \".join(f\"{k}={v}\" for k, v in vars(args).items()))\n", + "\n", + " print(\"input data:\", args.data)\n", + "\n", + " credit_df = pd.read_excel(args.data, header=1, index_col=0)\n", + "\n", + " mlflow.log_metric(\"num_samples\", credit_df.shape[0])\n", + " mlflow.log_metric(\"num_features\", credit_df.shape[1] - 1)\n", + "\n", + " credit_train_df, credit_test_df = train_test_split(\n", + " credit_df,\n", + " test_size=args.test_train_ratio,\n", + " )\n", + "\n", + " # output paths are mounted as folder, therefore, we are adding a filename to the path\n", + " credit_train_df.to_csv(os.path.join(args.train_data, \"data.csv\"), index=False)\n", + "\n", + " credit_test_df.to_csv(os.path.join(args.test_data, \"data.csv\"), index=False)\n", + "\n", + " # Stop Logging\n", + " mlflow.end_run()\n", + "\n", + "\n", + "if __name__ == \"__main__\":\n", + " main()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now that you have a script that can perform the desired task, create an Azure Machine Learning Component from it.\n", + "\n", + "Use the general purpose `CommandComponent` that can run command line actions. This command line action can directly call system commands or run a script. The inputs/outputs are specified on the command line via the `${{ ... }}` notation.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "name": "data_prep_component" + }, + "outputs": [], + "source": [ + "from azure.ai.ml import command\n", + "from azure.ai.ml import Input, Output\n", + "\n", + "data_prep_component = command(\n", + " name=\"data_prep_credit_defaults\",\n", + " display_name=\"Data preparation for training\",\n", + " description=\"reads a .xl input, split the input to train and test\",\n", + " inputs={\n", + " \"data\": Input(type=\"uri_folder\"),\n", + " \"test_train_ratio\": Input(type=\"number\"),\n", + " },\n", + " outputs=dict(\n", + " train_data=Output(type=\"uri_folder\", mode=\"rw_mount\"),\n", + " test_data=Output(type=\"uri_folder\", mode=\"rw_mount\"),\n", + " ),\n", + " # The source folder of the component\n", + " code=data_prep_src_dir,\n", + " command=\"\"\"python data_prep.py \\\n", + " --data ${{inputs.data}} --test_train_ratio ${{inputs.test_train_ratio}} \\\n", + " --train_data ${{outputs.train_data}} --test_data ${{outputs.test_data}} \\\n", + " \"\"\",\n", + " environment=f\"{pipeline_job_env.name}:{pipeline_job_env.version}\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + }, + "source": [ + "Optionally, register the component in the workspace for future reuse.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Now we register the component to the workspace\n", + "data_prep_component = ml_client.create_or_update(data_prep_component.component)\n", + "\n", + "# Create (register) the component in your workspace\n", + "print(\n", + " f\"Component {data_prep_component.name} with Version {data_prep_component.version} is registered\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create component 2: training (using yaml definition)\n", + "\n", + "The second component that you create consumes the training and test data, train a tree based model and return the output model. Use Azure Machine Learning logging capabilities to record and visualize the learning progress.\n", + "\n", + "You used the `CommandComponent` class to create your first component. This time you use the yaml definition to define the second component. Each method has its own advantages. A yaml definition can actually be checked-in along the code, and would provide a readable history tracking. The programmatic method using `CommandComponent` can be easier with built-in class documentation and code completion.\n", + "\n", + "Create the directory for this component:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "attributes": { + "classes": [ + "Python" + ], + "id": "" + }, + "name": "train_src_dir" + }, + "outputs": [], + "source": [ + "import os\n", + "\n", + "train_src_dir = \"./components/train\"\n", + "os.makedirs(train_src_dir, exist_ok=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create the training script in the directory:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "attributes": { + "classes": [ + "Python" + ], + "id": "" + }, + "name": "train.py" + }, + "outputs": [], + "source": [ + "%%writefile {train_src_dir}/train.py\n", + "import argparse\n", + "from sklearn.ensemble import GradientBoostingClassifier\n", + "from sklearn.metrics import classification_report\n", + "import os\n", + "import pandas as pd\n", + "import mlflow\n", + "\n", + "\n", + "def select_first_file(path):\n", + " \"\"\"Selects first file in folder, use under assumption there is only one file in folder\n", + " Args:\n", + " path (str): path to directory or file to choose\n", + " Returns:\n", + " str: full path of selected file\n", + " \"\"\"\n", + " files = os.listdir(path)\n", + " return os.path.join(path, files[0])\n", + "\n", + "\n", + "# Start Logging\n", + "mlflow.start_run()\n", + "\n", + "# enable autologging\n", + "mlflow.sklearn.autolog()\n", + "\n", + "os.makedirs(\"./outputs\", exist_ok=True)\n", + "\n", + "\n", + "def main():\n", + " \"\"\"Main function of the script.\"\"\"\n", + "\n", + " # input and output arguments\n", + " parser = argparse.ArgumentParser()\n", + " parser.add_argument(\"--train_data\", type=str, help=\"path to train data\")\n", + " parser.add_argument(\"--test_data\", type=str, help=\"path to test data\")\n", + " parser.add_argument(\"--n_estimators\", required=False, default=100, type=int)\n", + " parser.add_argument(\"--learning_rate\", required=False, default=0.1, type=float)\n", + " parser.add_argument(\"--registered_model_name\", type=str, help=\"model name\")\n", + " parser.add_argument(\"--model\", type=str, help=\"path to model file\")\n", + " args = parser.parse_args()\n", + "\n", + " # paths are mounted as folder, therefore, we are selecting the file from folder\n", + " train_df = pd.read_csv(select_first_file(args.train_data))\n", + "\n", + " # Extracting the label column\n", + " y_train = train_df.pop(\"default payment next month\")\n", + "\n", + " # convert the dataframe values to array\n", + " X_train = train_df.values\n", + "\n", + " # paths are mounted as folder, therefore, we are selecting the file from folder\n", + " test_df = pd.read_csv(select_first_file(args.test_data))\n", + "\n", + " # Extracting the label column\n", + " y_test = test_df.pop(\"default payment next month\")\n", + "\n", + " # convert the dataframe values to array\n", + " X_test = test_df.values\n", + "\n", + " print(f\"Training with data of shape {X_train.shape}\")\n", + "\n", + " clf = GradientBoostingClassifier(\n", + " n_estimators=args.n_estimators, learning_rate=args.learning_rate\n", + " )\n", + " clf.fit(X_train, y_train)\n", + "\n", + " y_pred = clf.predict(X_test)\n", + "\n", + " print(classification_report(y_test, y_pred))\n", + "\n", + " # Registering the model to the workspace\n", + " print(\"Registering the model via MLFlow\")\n", + " mlflow.sklearn.log_model(\n", + " sk_model=clf,\n", + " registered_model_name=args.registered_model_name,\n", + " artifact_path=args.registered_model_name,\n", + " )\n", + "\n", + " # Saving the model to a file\n", + " mlflow.sklearn.save_model(\n", + " sk_model=clf,\n", + " path=os.path.join(args.model, \"trained_model\"),\n", + " )\n", + "\n", + " # Stop Logging\n", + " mlflow.end_run()\n", + "\n", + "\n", + "if __name__ == \"__main__\":\n", + " main()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As you can see in this training script, once the model is trained, the model file is saved and registered to the workspace. Now you can use the registered model in inferencing endpoints.\n", + "\n", + "For the environment of this step, you use one of the built-in (curated) Azure Machine Learning environments. The tag `azureml`, tells the system to use look for the name in curated environments.\n", + "First, create the *yaml* file describing the component:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "attributes": { + "classes": [ + "Python" + ], + "id": "" + }, + "name": "train.yml" + }, + "outputs": [], + "source": [ + "%%writefile {train_src_dir}/train.yml\n", + "# \n", + "name: train_credit_defaults_model\n", + "display_name: Train Credit Defaults Model\n", + "# version: 1 # Not specifying a version will automatically update the version\n", + "type: command\n", + "inputs:\n", + " train_data: \n", + " type: uri_folder\n", + " test_data: \n", + " type: uri_folder\n", + " learning_rate:\n", + " type: number \n", + " registered_model_name:\n", + " type: string\n", + "outputs:\n", + " model:\n", + " type: uri_folder\n", + "code: .\n", + "environment:\n", + " # for this step, we'll use an AzureML curate environment\n", + " azureml:AzureML-sklearn-1.0-ubuntu20.04-py38-cpu:1\n", + "command: >-\n", + " python train.py \n", + " --train_data ${{inputs.train_data}} \n", + " --test_data ${{inputs.test_data}} \n", + " --learning_rate ${{inputs.learning_rate}}\n", + " --registered_model_name ${{inputs.registered_model_name}} \n", + " --model ${{outputs.model}}\n", + "# \n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now create and register the component. Registering it allows you to re-use it in other pipelines. Also, anyone else with access to your workspace can use the registered component." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "attributes": { + "classes": [ + "Python" + ], + "id": "" + }, + "name": "train_component" + }, + "outputs": [], + "source": [ + "# importing the Component Package\n", + "from azure.ai.ml import load_component\n", + "\n", + "# Loading the component from the yml file\n", + "train_component = load_component(source=os.path.join(train_src_dir, \"train.yml\"))\n", + "\n", + "# Now we register the component to the workspace\n", + "train_component = ml_client.create_or_update(train_component)\n", + "\n", + "# Create (register) the component in your workspace\n", + "print(\n", + " f\"Component {train_component.name} with Version {train_component.version} is registered\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create the pipeline from components\n", + "\n", + "Now that both your components are defined and registered, you can start implementing the pipeline.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here, you use *input data*, *split ratio* and *registered model name* as input variables. Then call the components and connect them via their inputs/outputs identifiers. The outputs of each step can be accessed via the `.outputs` property.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "attributes": { + "classes": [ + "Python" + ], + "id": "" + } + }, + "source": [ + "The Python functions returned by `load_component()` work as any regular Python function that we use within a pipeline to call each step.\n", + "\n", + "To code the pipeline, you use a specific `@dsl.pipeline` decorator that identifies the Azure Machine Learning pipelines. In the decorator, we can specify the pipeline description and default resources like compute and storage. Like a Python function, pipelines can have inputs. You can then create multiple instances of a single pipeline with different inputs.\n", + "\n", + "Here, we used *input data*, *split ratio* and *registered model name* as input variables. We then call the components and connect them via their inputs/outputs identifiers. The outputs of each step can be accessed via the `.outputs` property." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "attributes": { + "classes": [ + "Python" + ], + "id": "" + }, + "name": "pipeline" + }, + "outputs": [], + "source": [ + "# the dsl decorator tells the sdk that we are defining an Azure Machine Learning pipeline\n", + "from azure.ai.ml import dsl, Input, Output\n", + "\n", + "\n", + "@dsl.pipeline(\n", + " compute=cpu_compute_target, # replace cpu_compute_target with \"serverless\" to run pipeline on serverless compute\n", + " description=\"E2E data_perp-train pipeline\",\n", + ")\n", + "def credit_defaults_pipeline(\n", + " pipeline_job_data_input,\n", + " pipeline_job_test_train_ratio,\n", + " pipeline_job_learning_rate,\n", + " pipeline_job_registered_model_name,\n", + "):\n", + " # using data_prep_function like a python call with its own inputs\n", + " data_prep_job = data_prep_component(\n", + " data=pipeline_job_data_input,\n", + " test_train_ratio=pipeline_job_test_train_ratio,\n", + " )\n", + "\n", + " # using train_func like a python call with its own inputs\n", + " train_job = train_component(\n", + " train_data=data_prep_job.outputs.train_data, # note: using outputs from previous step\n", + " test_data=data_prep_job.outputs.test_data, # note: using outputs from previous step\n", + " learning_rate=pipeline_job_learning_rate, # note: using a pipeline input as parameter\n", + " registered_model_name=pipeline_job_registered_model_name,\n", + " )\n", + "\n", + " # a pipeline returns a dictionary of outputs\n", + " # keys will code for the pipeline output identifier\n", + " return {\n", + " \"pipeline_job_train_data\": data_prep_job.outputs.train_data,\n", + " \"pipeline_job_test_data\": data_prep_job.outputs.test_data,\n", + " }" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now use your pipeline definition to instantiate a pipeline with your dataset, split rate of choice and the name you picked for your model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "attributes": { + "classes": [ + "Python" + ], + "id": "" + }, + "name": "registered_model_name" + }, + "outputs": [], + "source": [ + "registered_model_name = \"credit_defaults_model\"\n", + "\n", + "# Let's instantiate the pipeline with the parameters of our choice\n", + "pipeline = credit_defaults_pipeline(\n", + " pipeline_job_data_input=Input(type=\"uri_file\", path=credit_data.path),\n", + " pipeline_job_test_train_ratio=0.25,\n", + " pipeline_job_learning_rate=0.05,\n", + " pipeline_job_registered_model_name=registered_model_name,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Submit the job \n", + "\n", + "It's now time to submit the job to run in Azure Machine Learning. This time you use `create_or_update` on `ml_client.jobs`.\n", + "\n", + "Here you also pass an experiment name. An experiment is a container for all the iterations one does on a certain project. All the jobs submitted under the same experiment name would be listed next to each other in Azure Machine Learning studio.\n", + "\n", + "Once completed, the pipeline registers a model in your workspace as a result of training." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "name": "returned_job" + }, + "outputs": [], + "source": [ + "# submit the pipeline job\n", + "pipeline_job = ml_client.jobs.create_or_update(\n", + " pipeline,\n", + " # Project's name\n", + " experiment_name=\"e2e_registered_components\",\n", + ")\n", + "ml_client.jobs.stream(pipeline_job.name)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can track the progress of your pipeline, by using the link generated in the previous cell. When you first select this link, you may see that the pipeline is still running. Once it's complete, you can examine each component's results.\n", + "\n", + "Double-click the **Train Credit Defaults Model** component. \n", + "\n", + "There are two important results you'll want to see about training:\n", + "\n", + "* View your logs:\n", + " 1. Select the **Outputs+logs** tab.\n", + " 1. Open the folders to `user_logs` > `std_log.txt`\n", + " This section shows the script run stdout.\n", + " ![Screenshot of std_log.txt.](media/user-logs.jpg)\n", + "\n", + "* View your metrics: Select the **Metrics** tab. This section shows different logged metrics. In this example. mlflow `autologging`, has automatically logged the training metrics.\n", + " \n", + " ![Screenshot shows logged metrics.txt.](./media/metrics.jpg)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Deploy the model as an online endpoint\n", + "To learn how to deploy your model to an online endpoint, see [Deploy a model as an online endpoint tutorial](https://learn.microsoft.com/en-us/azure/machine-learning/tutorial-deploy-model).\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Next Steps\n", + "\n", + "Learn how to [Schedule machine learning pipeline jobs](https://learn.microsoft.com/azure/machine-learning/how-to-schedule-pipeline-job)" + ] + } + ], + "metadata": { + "description": { + "description": "Create production ML pipelines with Python SDK v2 in a Jupyter notebook" + }, + "kernel_info": { + "name": "python310-sdkv2" + }, + "kernelspec": { + "display_name": "Python 3.10 - SDK v2", + "language": "python", + "name": "python310-sdkv2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + }, + "nteract": { + "version": "nteract-front-end@1.0.0" + } + }, + "nbformat": 4, + "nbformat_minor": 1 } diff --git a/tutorials/get-started-notebooks/quickstart.ipynb b/tutorials/get-started-notebooks/quickstart.ipynb index 3e724070df..2c7441871f 100644 --- a/tutorials/get-started-notebooks/quickstart.ipynb +++ b/tutorials/get-started-notebooks/quickstart.ipynb @@ -17,7 +17,7 @@ "\n", "> * Set up a handle to your Azure Machine Learning workspace\n", "> * Create your training script\n", - "> * Create a scalable compute resource, a compute cluster \n", + "> * Create a scalable compute resource, a compute cluster or use **serverless compute (preview)** instead\n", "> * Create and run a command job that will run the training script on the compute cluster, configured with the appropriate job environment\n", "> * View the output of your training script\n", "> * Deploy the newly-trained model as an endpoint\n", @@ -273,6 +273,8 @@ "\n", "## Create a compute cluster, a scalable way to run a training job\n", "\n", + "You can **skip this step** if you want to use **serverless compute** to run the training job. Through serverless compute, Azure Machine Learning takes care of creating, scaling, deleting, patching and managing compute, along with providing managed network isolation, reducing the burden on you. \n", + "\n", "You already have a compute instance, which you're using to run the notebook. Now you'll add a second type of compute, a **compute cluster** that you'll use to run your training job. While a compute instance is a single node machine, a compute cluster can be single or multi-node machines with Linux or Windows OS, or a specific compute fabric like Spark.\n", "\n", "You'll provision a Linux compute cluster. See the [full list on VM sizes and prices](https://azure.microsoft.com/pricing/details/machine-learning/) .\n", @@ -336,7 +338,7 @@ "Now that you have a script that can perform the desired tasks, and a compute cluster to run the script, you'll use a general purpose **command** that can run command line actions. This command line action can directly call system commands or run a script. \n", "\n", "Here, you'll create input variables to specify the input data, split ratio, learning rate and registered model name. The command script will:\n", - "* Use the compute cluster to run the command.\n", + "* Use the compute cluster to run the command or just **remove the compute line to use serverless compute**.\n", "* Use an *environment* that defines software and runtime libraries needed for the training script. Azure Machine Learning provides many curated or ready-made environments, which are useful for common training and inference scenarios. You'll use one of those environments here. In the [Train a model](train-model.ipynb) tutorial, you'll learn how to create a custom environment. \n", "* Configure the command line action itself - `python main.py` in this case. The inputs/outputs are accessible in the command via the `${{ ... }}` notation.\n", "* In this sample, we access the data from a file on the internet. " @@ -371,7 +373,7 @@ " code=\"./src/\", # location of source code\n", " command=\"python main.py --data ${{inputs.data}} --test_train_ratio ${{inputs.test_train_ratio}} --learning_rate ${{inputs.learning_rate}} --registered_model_name ${{inputs.registered_model_name}}\",\n", " environment=\"AzureML-sklearn-1.0-ubuntu20.04-py38-cpu@latest\",\n", - " compute=\"cpu-cluster\",\n", + " compute=\"cpu-cluster\", # you can remove this line to use serverless compute\n", " display_name=\"credit_default_prediction\",\n", ")" ] diff --git a/tutorials/get-started-notebooks/train-model.ipynb b/tutorials/get-started-notebooks/train-model.ipynb index 4dde0095b2..d4fed7732e 100644 --- a/tutorials/get-started-notebooks/train-model.ipynb +++ b/tutorials/get-started-notebooks/train-model.ipynb @@ -1,641 +1,643 @@ { - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Day 1: Train a model\n", - "\n", - "Learn how a data scientist uses Azure Machine Learning to train a model. In this example, we use the associated credit card dataset to show how you can use Azure Machine Learning for a classification problem. The goal is to predict if a customer has a high likelihood of defaulting on a credit card payment.\n", - "\n", - "The training script handles the data preparation, then trains and registers a model. This tutorial takes you through steps to submit a cloud-based training job (command job). If you would like to learn more about how to load your data into Azure, see [Create data assets](how-to-create-data-assets.md). \n", - "\n", - "The steps are:\n", - "\n", - " * Get a handle to your Azure Machine Learning workspace\n", - " * Create your compute resource and job environment\n", - " * Create your training script\n", - " * Create and run your command job to run the training script on the compute resource, configured with the appropriate job environment and the data source\n", - " * View the output of your training script\n", - " * Deploy the newly-trained model as an endpoint\n", - " * Call the Azure Machine Learning endpoint for inferencing" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Prerequisites\n", - "\n", - "* If you opened this notebook from Azure Machine Learning studio, you need a compute instance to run the code. If you don't have a compute instance, select **Create compute** on the toolbar to first create one. You can use all the default settings. \n", - "\n", - " ![Create compute](./media/create-compute.png)\n", - "\n", - "* If you're seeing this notebook elsewhere, complete [Create resources you need to get started](https://docs.microsoft.com/azure/machine-learning/quickstart-create-resources) to create an Azure Machine Learning workspace and a compute instance.\n", - "\n", - "## Set your kernel\n", - "\n", - "* If your compute instance is stopped, start it now. \n", - " \n", - " ![Start compute](./media/start-compute.png)\n", - "\n", - "* Once your compute instance is running, make sure the that the kernel, found on the top right, is `Python 3.10 - SDK v2`. If not, use the dropdown to select this kernel.\n", - "\n", - " ![Set the kernel](./media/set-kernel.png)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "nteract": { - "transient": { - "deleting": false - } - } - }, - "source": [ - "## Use a command job to train a model in Azure Machine Learning\n", - "\n", - "To train a model, you need to submit a *job*. The type of job you'll submit in this tutorial is a *command job*. Azure Machine Learning offers several different types of jobs to train models. Users can select their method of training based on complexity of the model, data size, and training speed requirements. In this tutorial, you'll learn how to submit a *command job* to run a *training script*. \n", - "\n", - "A command job is a function that allows you to submit a custom training script to train your model. This can also be defined as a custom training job. A command job in Azure Machine Learning is a type of job that runs a script or command in a specified environment. You can use command jobs to train models, process data, or any other custom code you want to execute in the cloud. \n", - "\n", - "In this tutorial, we'll focus on using a command job to create a custom training job that we'll use to train a model. For any custom training job, the below items are required:\n", - "\n", - "* compute resource (usually a compute cluster, which we recommend for scalability)\n", - "* environment\n", - "* data\n", - "* command job \n", - "* training script\n", - "\n", - "\n", - "In this tutorial we'll provide all these items for our example: creating a classifier to predict customers who have a high likelihood of defaulting on credit card payments.\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Create handle to workspace\n", - "\n", - "Before we dive in the code, you need a way to reference your workspace. You'll create `ml_client` for a handle to the workspace. You'll then use `ml_client` to manage resources and jobs.\n", - "\n", - "In the next cell, enter your Subscription ID, Resource Group name and Workspace name. To find these values:\n", - "\n", - "1. In the upper right Azure Machine Learning studio toolbar, select your workspace name.\n", - "1. Copy the value for workspace, resource group and subscription ID into the code.\n", - "1. You'll need to copy one value, close the area and paste, then come back for the next one." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "gather": { - "logged": 1677262283435 - }, - "name": "credential" - }, - "outputs": [], - "source": [ - "from azure.ai.ml import MLClient\n", - "from azure.identity import DefaultAzureCredential\n", - "\n", - "# authenticate\n", - "credential = DefaultAzureCredential()\n", - "# # Get a handle to the workspace\n", - "ml_client = MLClient(\n", - " credential=credential,\n", - " subscription_id=\"\",\n", - " resource_group_name=\"\",\n", - " workspace_name=\"\",\n", - ")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "> [!NOTE]\n", - "> Creating MLClient will not connect to the workspace. The client initialization is lazy, it will wait for the first time it needs to make a call (this will happen in the next code cell)." - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Create a compute cluster to run your job\n", - "\n", - "In Azure, a job can refer to several tasks that Azure allows its users to do: training, pipeline creation, deployment, etc. For this tutorial and our purpose of training a machine learning model, we'll use *job* as a reference to running training computations (*training job*).\n", - "\n", - "You need a compute resource for running any job in Azure Machine Learning. It can be single or multi-node machines with Linux or Windows OS, or a specific compute fabric like Spark. In Azure, there are two compute resources that you can choose from: instance and cluster. A compute instance contains one node of computation resources while a *compute cluster* contains several. A *compute cluster* contains more memory for the computation task. For training, we recommend using a compute cluster because it allows the user to distribute calculations on multiple nodes of computation, which results in a faster training experience. \n", - "\n", - "You provision a Linux compute cluster. See the [full list on VM sizes and prices](https://azure.microsoft.com/pricing/details/machine-learning/) .\n", - "\n", - "For this example, you only need a basic cluster, so you use a Standard_DS3_v2 model with 2 vCPU cores, 7-GB RAM." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "gather": { - "logged": 1677262287630 - }, - "name": "cpu_compute_target" - }, - "outputs": [], - "source": [ - "from azure.ai.ml.entities import AmlCompute\n", - "\n", - "# Name assigned to the compute cluster\n", - "cpu_compute_target = \"cpu-cluster\"\n", - "\n", - "try:\n", - " # let's see if the compute target already exists\n", - " cpu_cluster = ml_client.compute.get(cpu_compute_target)\n", - " print(\n", - " f\"You already have a cluster named {cpu_compute_target}, we'll reuse it as is.\"\n", - " )\n", - "\n", - "except Exception:\n", - " print(\"Creating a new cpu compute target...\")\n", - "\n", - " # Let's create the Azure Machine Learning compute object with the intended parameters\n", - " # if you run into an out of quota error, change the size to a comparable VM that is available.\n", - " # Learn more on https://azure.microsoft.com/en-us/pricing/details/machine-learning/.\n", - " cpu_cluster = AmlCompute(\n", - " name=cpu_compute_target,\n", - " # Azure Machine Learning Compute is the on-demand VM service\n", - " type=\"amlcompute\",\n", - " # VM Family\n", - " size=\"STANDARD_DS3_V2\",\n", - " # Minimum running nodes when there is no job running\n", - " min_instances=0,\n", - " # Nodes in cluster\n", - " max_instances=4,\n", - " # How many seconds will the node running after the job termination\n", - " idle_time_before_scale_down=180,\n", - " # Dedicated or LowPriority. The latter is cheaper but there is a chance of job termination\n", - " tier=\"Dedicated\",\n", - " )\n", - " print(\n", - " f\"AMLCompute with name {cpu_cluster.name} will be created, with compute size {cpu_cluster.size}\"\n", - " )\n", - " # Now, we pass the object to MLClient's create_or_update method\n", - " cpu_cluster = ml_client.compute.begin_create_or_update(cpu_cluster)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Create a job environment\n", - "\n", - "To run your Azure Machine Learning job on your compute resource, you need an [environment](https://learn.microsoft.com/articles/machine-learning/concept-environments). An environment lists the software runtime and libraries that you want installed on the compute where you’ll be training. It's similar to your python environment on your local machine.\n", - "\n", - "Azure Machine Learning provides many curated or ready-made environments, which are useful for common training and inference scenarios. \n", - "\n", - "In this example, you'll create a custom conda environment for your jobs, using a conda yaml file.\n", - "\n", - "First, create a directory to store the file in." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "gather": { - "logged": 1677262301389 - }, - "name": "dependencies_dir" - }, - "outputs": [], - "source": [ - "import os\n", - "\n", - "dependencies_dir = \"./dependencies\"\n", - "os.makedirs(dependencies_dir, exist_ok=True)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The cell below uses IPython magic to write the conda file into the directory you just created." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "name": "write_model" - }, - "outputs": [], - "source": [ - "%%writefile {dependencies_dir}/conda.yaml\n", - "name: model-env\n", - "channels:\n", - " - conda-forge\n", - "dependencies:\n", - " - python=3.8\n", - " - numpy=1.21.2\n", - " - pip=21.2.4\n", - " - scikit-learn=0.24.2\n", - " - scipy=1.7.1\n", - " - pandas>=1.1,<1.2\n", - " - pip:\n", - " - inference-schema[numpy-support]==1.3.0\n", - " - mlflow== 1.26.1\n", - " - azureml-mlflow==1.42.0\n", - " - psutil>=5.8,<5.9\n", - " - tqdm>=4.59,<4.60\n", - " - ipykernel~=6.0\n", - " - matplotlib" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "The specification contains some usual packages, that you'll use in your job (numpy, pip).\n", - "\n", - "Reference this *yaml* file to create and register this custom environment in your workspace:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "gather": { - "logged": 1677262314695 - }, - "name": "custom_env_name" - }, - "outputs": [], - "source": [ - "from azure.ai.ml.entities import Environment\n", - "\n", - "custom_env_name = \"aml-scikit-learn\"\n", - "\n", - "custom_job_env = Environment(\n", - " name=custom_env_name,\n", - " description=\"Custom environment for Credit Card Defaults job\",\n", - " tags={\"scikit-learn\": \"0.24.2\"},\n", - " conda_file=os.path.join(dependencies_dir, \"conda.yaml\"),\n", - " image=\"mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:latest\",\n", - ")\n", - "custom_job_env = ml_client.environments.create_or_update(custom_job_env)\n", - "\n", - "print(\n", - " f\"Environment with name {custom_job_env.name} is registered to workspace, the environment version is {custom_job_env.version}\"\n", - ")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Configure a training job using the command function\n", - "\n", - "You create an Azure Machine Learning *command job* to train a model for credit default prediction. The command job runs a *training script* in a specified environment on a specified compute resource. You've already created the environment and the compute cluster. Next you'll create the training script. In our specific case, we're training our dataset to produce a classifier using the `GradientBoostingClassifier` model. \n", - "\n", - "The *training script* handles the data preparation, training and registering of the trained model. The method `train_test_split` handles splitting the dataset into test and training data. In this tutorial, you'll create a Python training script. \n", - "\n", - "Command jobs can be run from CLI, Python SDK, or studio interface. In this tutorial, you'll use the Azure Machine Learning Python SDK v2 to create and run the command job.\n", - "\n", - "## Create training script\n", - "\n", - "Let's start by creating the training script - the *main.py* python file.\n", - "\n", - "First create a source folder for the script:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "gather": { - "logged": 1677262322022 - }, - "name": "train_src_dir" - }, - "outputs": [], - "source": [ - "import os\n", - "\n", - "train_src_dir = \"./src\"\n", - "os.makedirs(train_src_dir, exist_ok=True)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This script handles the preprocessing of the data, splitting it into test and train data. It then consumes this data to train a tree based model and return the output model. \n", - "\n", - "[MLFlow](https://learn.microsoft.com/articles/machine-learning/concept-mlflow) is used to log the parameters and metrics during our job. The MLFlow package allows you to keep track of metrics and results for each model Azure trains. We'll be using MLFlow to first get the best model for our data, then we'll view the model's metrics on the Azure studio. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "name": "write_main" - }, - "outputs": [], - "source": [ - "%%writefile {train_src_dir}/main.py\n", - "import os\n", - "import argparse\n", - "import pandas as pd\n", - "import mlflow\n", - "import mlflow.sklearn\n", - "from sklearn.ensemble import GradientBoostingClassifier\n", - "from sklearn.metrics import classification_report\n", - "from sklearn.model_selection import train_test_split\n", - "\n", - "def main():\n", - " \"\"\"Main function of the script.\"\"\"\n", - "\n", - " # input and output arguments\n", - " parser = argparse.ArgumentParser()\n", - " parser.add_argument(\"--data\", type=str, help=\"path to input data\")\n", - " parser.add_argument(\"--test_train_ratio\", type=float, required=False, default=0.25)\n", - " parser.add_argument(\"--n_estimators\", required=False, default=100, type=int)\n", - " parser.add_argument(\"--learning_rate\", required=False, default=0.1, type=float)\n", - " parser.add_argument(\"--registered_model_name\", type=str, help=\"model name\")\n", - " args = parser.parse_args()\n", - " \n", - " # Start Logging\n", - " mlflow.start_run()\n", - "\n", - " # enable autologging\n", - " mlflow.sklearn.autolog()\n", - "\n", - " ###################\n", - " #\n", - " ###################\n", - " print(\" \".join(f\"{k}={v}\" for k, v in vars(args).items()))\n", - "\n", - " print(\"input data:\", args.data)\n", - " \n", - " credit_df = pd.read_csv(args.data, header=1, index_col=0)\n", - "\n", - " mlflow.log_metric(\"num_samples\", credit_df.shape[0])\n", - " mlflow.log_metric(\"num_features\", credit_df.shape[1] - 1)\n", - "\n", - " #Split train and test datasets\n", - " train_df, test_df = train_test_split(\n", - " credit_df,\n", - " test_size=args.test_train_ratio,\n", - " )\n", - " ####################\n", - " #\n", - " ####################\n", - "\n", - " ##################\n", - " #\n", - " ##################\n", - " # Extracting the label column\n", - " y_train = train_df.pop(\"default payment next month\")\n", - "\n", - " # convert the dataframe values to array\n", - " X_train = train_df.values\n", - "\n", - " # Extracting the label column\n", - " y_test = test_df.pop(\"default payment next month\")\n", - "\n", - " # convert the dataframe values to array\n", - " X_test = test_df.values\n", - "\n", - " print(f\"Training with data of shape {X_train.shape}\")\n", - "\n", - " clf = GradientBoostingClassifier(\n", - " n_estimators=args.n_estimators, learning_rate=args.learning_rate\n", - " )\n", - " clf.fit(X_train, y_train)\n", - "\n", - " y_pred = clf.predict(X_test)\n", - "\n", - " print(classification_report(y_test, y_pred))\n", - " ###################\n", - " #\n", - " ###################\n", - "\n", - " ##########################\n", - " #\n", - " ##########################\n", - " # Registering the model to the workspace\n", - " print(\"Registering the model via MLFlow\")\n", - " mlflow.sklearn.log_model(\n", - " sk_model=clf,\n", - " registered_model_name=args.registered_model_name,\n", - " artifact_path=args.registered_model_name,\n", - " )\n", - "\n", - " # Saving the model to a file\n", - " mlflow.sklearn.save_model(\n", - " sk_model=clf,\n", - " path=os.path.join(args.registered_model_name, \"trained_model\"),\n", - " )\n", - " ###########################\n", - " #\n", - " ###########################\n", - " \n", - " # Stop Logging\n", - " mlflow.end_run()\n", - "\n", - "if __name__ == \"__main__\":\n", - " main()" - ] + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Day 1: Train a model\n", + "\n", + "Learn how a data scientist uses Azure Machine Learning to train a model. In this example, we use the associated credit card dataset to show how you can use Azure Machine Learning for a classification problem. The goal is to predict if a customer has a high likelihood of defaulting on a credit card payment.\n", + "\n", + "The training script handles the data preparation, then trains and registers a model. This tutorial takes you through steps to submit a cloud-based training job (command job). If you would like to learn more about how to load your data into Azure, see [Create data assets](how-to-create-data-assets.md). \n", + "\n", + "The steps are:\n", + "\n", + " * Get a handle to your Azure Machine Learning workspace\n", + " * Create your compute resource (or simply use serverless compute) and job environment\n", + " * Create your training script\n", + " * Create and run your command job to run the training script on the compute resource, configured with the appropriate job environment and the data source\n", + " * View the output of your training script\n", + " * Deploy the newly-trained model as an endpoint\n", + " * Call the Azure Machine Learning endpoint for inferencing" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prerequisites\n", + "\n", + "* If you opened this notebook from Azure Machine Learning studio, you need a compute instance to run the code. If you don't have a compute instance, select **Create compute** on the toolbar to first create one. You can use all the default settings. \n", + "\n", + " ![Create compute](./media/create-compute.png)\n", + "\n", + "* If you're seeing this notebook elsewhere, complete [Create resources you need to get started](https://docs.microsoft.com/azure/machine-learning/quickstart-create-resources) to create an Azure Machine Learning workspace and a compute instance.\n", + "\n", + "## Set your kernel\n", + "\n", + "* If your compute instance is stopped, start it now. \n", + " \n", + " ![Start compute](./media/start-compute.png)\n", + "\n", + "* Once your compute instance is running, make sure the that the kernel, found on the top right, is `Python 3.10 - SDK v2`. If not, use the dropdown to select this kernel.\n", + "\n", + " ![Set the kernel](./media/set-kernel.png)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + }, + "source": [ + "## Use a command job to train a model in Azure Machine Learning\n", + "\n", + "To train a model, you need to submit a *job*. The type of job you'll submit in this tutorial is a *command job*. Azure Machine Learning offers several different types of jobs to train models. Users can select their method of training based on complexity of the model, data size, and training speed requirements. In this tutorial, you'll learn how to submit a *command job* to run a *training script*. \n", + "\n", + "A command job is a function that allows you to submit a custom training script to train your model. This can also be defined as a custom training job. A command job in Azure Machine Learning is a type of job that runs a script or command in a specified environment. You can use command jobs to train models, process data, or any other custom code you want to execute in the cloud. \n", + "\n", + "In this tutorial, we'll focus on using a command job to create a custom training job that we'll use to train a model. For any custom training job, the below items are required:\n", + "\n", + "* compute resource (usually a compute cluster, which we recommend for scalability)\n", + "* environment\n", + "* data\n", + "* command job \n", + "* training script\n", + "\n", + "\n", + "In this tutorial we'll provide all these items for our example: creating a classifier to predict customers who have a high likelihood of defaulting on credit card payments.\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create handle to workspace\n", + "\n", + "Before we dive in the code, you need a way to reference your workspace. You'll create `ml_client` for a handle to the workspace. You'll then use `ml_client` to manage resources and jobs.\n", + "\n", + "In the next cell, enter your Subscription ID, Resource Group name and Workspace name. To find these values:\n", + "\n", + "1. In the upper right Azure Machine Learning studio toolbar, select your workspace name.\n", + "1. Copy the value for workspace, resource group and subscription ID into the code.\n", + "1. You'll need to copy one value, close the area and paste, then come back for the next one." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "gather": { + "logged": 1677262283435 }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In this script, once the model is trained, the model file is saved and registered to the workspace. Registering your model allows you to store and version your models in the Azure cloud, in your workspace. Once you register a model, you can find all other registered model in one place in the Azure Studio called the model registry. The model registry helps you organize and keep track of your trained models. \n", - "\n", - "## Configure the command\n", - "\n", - "Now that you have a script that can perform the classification task, use the general purpose **command** that can run command line actions. This command line action can be directly calling system commands or by running a script. \n", - "\n", - "Here, create input variables to specify the input data, split ratio, learning rate and registered model name. The command script will:\n", - "* Use the compute created earlier to run this command.\n", - "* Use the environment created earlier - you can use the `@latest` notation to indicate the latest version of the environment when the command is run.\n", - "* Configure the command line action itself - `python main.py` in this case. The inputs/outputs are accessible in the command via the `${{ ... }}` notation." - ] + "name": "credential" + }, + "outputs": [], + "source": [ + "from azure.ai.ml import MLClient\n", + "from azure.identity import DefaultAzureCredential\n", + "\n", + "# authenticate\n", + "credential = DefaultAzureCredential()\n", + "# # Get a handle to the workspace\n", + "ml_client = MLClient(\n", + " credential=credential,\n", + " subscription_id=\"\",\n", + " resource_group_name=\"\",\n", + " workspace_name=\"\",\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "> [!NOTE]\n", + "> Creating MLClient will not connect to the workspace. The client initialization is lazy, it will wait for the first time it needs to make a call (this will happen in the next code cell)." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create a compute cluster to run your job\n", + "\n", + "You can **skip this step** if you want to use **serverless compute (preview)** to run the training job. Through serverless compute, Azure Machine Learning takes care of creating, scaling, deleting, patching and managing compute, along with providing managed network isolation, reducing the burden on you. \n", + "\n", + "In Azure, a job can refer to several tasks that Azure allows its users to do: training, pipeline creation, deployment, etc. For this tutorial and our purpose of training a machine learning model, we'll use *job* as a reference to running training computations (*training job*).\n", + "\n", + "You need a compute resource for running any job in Azure Machine Learning. It can be single or multi-node machines with Linux or Windows OS, or a specific compute fabric like Spark. In Azure Machine Learning, there are two compute resources that you can choose from: instance and cluster. A compute instance contains one node of computation resources while a *compute cluster* contains several. A *compute cluster* contains more memory for the computation task. For training, we recommend using a compute cluster because it allows the user to distribute calculations on multiple nodes of computation, which results in a faster training experience. \n", + "\n", + "You provision a Linux compute cluster. See the [full list on VM sizes and prices](https://azure.microsoft.com/pricing/details/machine-learning/) .\n", + "\n", + "For this example, you only need a basic cluster, so you use a Standard_DS3_v2 model with 2 vCPU cores, 7-GB RAM." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "gather": { + "logged": 1677262287630 }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "gather": { - "logged": 1677262332367 - }, - "name": "registered_model_name" - }, - "outputs": [], - "source": [ - "from azure.ai.ml import command\n", - "from azure.ai.ml import Input\n", - "\n", - "registered_model_name = \"credit_defaults_model\"\n", - "\n", - "job = command(\n", - " inputs=dict(\n", - " data=Input(\n", - " type=\"uri_file\",\n", - " path=\"https://azuremlexamples.blob.core.windows.net/datasets/credit_card/default_of_credit_card_clients.csv\",\n", - " ),\n", - " test_train_ratio=0.2,\n", - " learning_rate=0.25,\n", - " registered_model_name=registered_model_name,\n", - " ),\n", - " code=\"./src/\", # location of source code\n", - " command=\"python main.py --data ${{inputs.data}} --test_train_ratio ${{inputs.test_train_ratio}} --learning_rate ${{inputs.learning_rate}} --registered_model_name ${{inputs.registered_model_name}}\",\n", - " environment=\"aml-scikit-learn@latest\",\n", - " compute=\"cpu-cluster\",\n", - " display_name=\"credit_default_prediction\",\n", - ")" - ] + "name": "cpu_compute_target" + }, + "outputs": [], + "source": [ + "from azure.ai.ml.entities import AmlCompute\n", + "\n", + "# Name assigned to the compute cluster\n", + "cpu_compute_target = \"cpu-cluster\"\n", + "\n", + "try:\n", + " # let's see if the compute target already exists\n", + " cpu_cluster = ml_client.compute.get(cpu_compute_target)\n", + " print(\n", + " f\"You already have a cluster named {cpu_compute_target}, we'll reuse it as is.\"\n", + " )\n", + "\n", + "except Exception:\n", + " print(\"Creating a new cpu compute target...\")\n", + "\n", + " # Let's create the Azure Machine Learning compute object with the intended parameters\n", + " # if you run into an out of quota error, change the size to a comparable VM that is available.\n", + " # Learn more on https://azure.microsoft.com/en-us/pricing/details/machine-learning/.\n", + " cpu_cluster = AmlCompute(\n", + " name=cpu_compute_target,\n", + " # Azure Machine Learning Compute is the on-demand VM service\n", + " type=\"amlcompute\",\n", + " # VM Family\n", + " size=\"STANDARD_DS3_V2\",\n", + " # Minimum running nodes when there is no job running\n", + " min_instances=0,\n", + " # Nodes in cluster\n", + " max_instances=4,\n", + " # How many seconds will the node running after the job termination\n", + " idle_time_before_scale_down=180,\n", + " # Dedicated or LowPriority. The latter is cheaper but there is a chance of job termination\n", + " tier=\"Dedicated\",\n", + " )\n", + " print(\n", + " f\"AMLCompute with name {cpu_cluster.name} will be created, with compute size {cpu_cluster.size}\"\n", + " )\n", + " # Now, we pass the object to MLClient's create_or_update method\n", + " cpu_cluster = ml_client.compute.begin_create_or_update(cpu_cluster)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create a job environment\n", + "\n", + "To run your Azure Machine Learning job on your compute resource, you need an [environment](https://learn.microsoft.com/articles/machine-learning/concept-environments). An environment lists the software runtime and libraries that you want installed on the compute where you’ll be training. It's similar to your python environment on your local machine.\n", + "\n", + "Azure Machine Learning provides many curated or ready-made environments, which are useful for common training and inference scenarios. \n", + "\n", + "In this example, you'll create a custom conda environment for your jobs, using a conda yaml file.\n", + "\n", + "First, create a directory to store the file in." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "gather": { + "logged": 1677262301389 }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Submit the job \n", - "\n", - "It's now time to submit the job to run in Azure Machine Learning studio. This time you'll use `create_or_update` on `ml_client`. `ml_client` is a client class that allows you to connect to your Azure subscription using Python and interact with Azure Machine Learning services. `ml_client` allows you to submit your jobs using Python." - ] + "name": "dependencies_dir" + }, + "outputs": [], + "source": [ + "import os\n", + "\n", + "dependencies_dir = \"./dependencies\"\n", + "os.makedirs(dependencies_dir, exist_ok=True)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The cell below uses IPython magic to write the conda file into the directory you just created." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "name": "write_model" + }, + "outputs": [], + "source": [ + "%%writefile {dependencies_dir}/conda.yaml\n", + "name: model-env\n", + "channels:\n", + " - conda-forge\n", + "dependencies:\n", + " - python=3.8\n", + " - numpy=1.21.2\n", + " - pip=21.2.4\n", + " - scikit-learn=0.24.2\n", + " - scipy=1.7.1\n", + " - pandas>=1.1,<1.2\n", + " - pip:\n", + " - inference-schema[numpy-support]==1.3.0\n", + " - mlflow== 1.26.1\n", + " - azureml-mlflow==1.42.0\n", + " - psutil>=5.8,<5.9\n", + " - tqdm>=4.59,<4.60\n", + " - ipykernel~=6.0\n", + " - matplotlib" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "The specification contains some usual packages, that you'll use in your job (numpy, pip).\n", + "\n", + "Reference this *yaml* file to create and register this custom environment in your workspace:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "gather": { + "logged": 1677262314695 }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "gather": { - "logged": 1677262345449 - }, - "name": "create_job" - }, - "outputs": [], - "source": [ - "ml_client.create_or_update(job)" - ] + "name": "custom_env_name" + }, + "outputs": [], + "source": [ + "from azure.ai.ml.entities import Environment\n", + "\n", + "custom_env_name = \"aml-scikit-learn\"\n", + "\n", + "custom_job_env = Environment(\n", + " name=custom_env_name,\n", + " description=\"Custom environment for Credit Card Defaults job\",\n", + " tags={\"scikit-learn\": \"0.24.2\"},\n", + " conda_file=os.path.join(dependencies_dir, \"conda.yaml\"),\n", + " image=\"mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:latest\",\n", + ")\n", + "custom_job_env = ml_client.environments.create_or_update(custom_job_env)\n", + "\n", + "print(\n", + " f\"Environment with name {custom_job_env.name} is registered to workspace, the environment version is {custom_job_env.version}\"\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Configure a training job using the command function\n", + "\n", + "You create an Azure Machine Learning *command job* to train a model for credit default prediction. The command job runs a *training script* in a specified environment on a specified compute resource. You've already created the environment and the compute cluster. Next you'll create the training script. In our specific case, we're training our dataset to produce a classifier using the `GradientBoostingClassifier` model. \n", + "\n", + "The *training script* handles the data preparation, training and registering of the trained model. The method `train_test_split` handles splitting the dataset into test and training data. In this tutorial, you'll create a Python training script. \n", + "\n", + "Command jobs can be run from CLI, Python SDK, or studio interface. In this tutorial, you'll use the Azure Machine Learning Python SDK v2 to create and run the command job.\n", + "\n", + "## Create training script\n", + "\n", + "Let's start by creating the training script - the *main.py* python file.\n", + "\n", + "First create a source folder for the script:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "gather": { + "logged": 1677262322022 }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## View job output and wait for job completion\n", - "\n", - "View the job in Azure Machine Learning studio by selecting the link in the output of the previous cell. The output of this job will look like this in the Azure Machine Learning studio. Explore the tabs for various details like metrics, outputs etc. Once completed, the job will register a model in your workspace as a result of training. \n", - "\n", - "![Screenshot shows the overview page for the job.](./media/view-job.gif)\n", - "\n", - "> [!IMPORTANT]\n", - "> Wait until the status of the job is complete before returning to this notebook to continue. The job will take 2 to 3 minutes to run. It could take longer (up to 10 minutes) if the compute cluster has been scaled down to zero nodes and custom environment is still building.\n", - "\n", - "When you run the cell, the notebook output shows a link to the job's details page on Azure Studio. Alternatively, you can also select Jobs on the left navigation menu. A job is a grouping of many runs from a specified script or piece of code. Information for the run is stored under that job. The details page gives an overview of the job, the time it took to run, when it was created, etc. The page also has tabs to other information about the job such as metrics, Outputs + logs, and code. Listed below are the tabs available in the job's details page:\n", - "\n", - "* Overview: The overview section provides basic information about the job, including its status, start and end times, and the type of job that was run\n", - "* Inputs: The input section lists the data and code that were used as inputs for the job. This section can include datasets, scripts, environment configurations, and other resources that were used during training. \n", - "* Outputs + logs: The Outputs + logs tab contains logs generated while the job was running. This tab assists in troubleshooting if anything goes wrong with your training script or model creation.\n", - "* Metrics: The metrics tab showcases key performance metrics from your model such as training score, f1 score, and precision score. " - ] + "name": "train_src_dir" + }, + "outputs": [], + "source": [ + "import os\n", + "\n", + "train_src_dir = \"./src\"\n", + "os.makedirs(train_src_dir, exist_ok=True)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This script handles the preprocessing of the data, splitting it into test and train data. It then consumes this data to train a tree based model and return the output model. \n", + "\n", + "[MLFlow](https://learn.microsoft.com/articles/machine-learning/concept-mlflow) is used to log the parameters and metrics during our job. The MLFlow package allows you to keep track of metrics and results for each model Azure trains. We'll be using MLFlow to first get the best model for our data, then we'll view the model's metrics on the Azure studio. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "name": "write_main" + }, + "outputs": [], + "source": [ + "%%writefile {train_src_dir}/main.py\n", + "import os\n", + "import argparse\n", + "import pandas as pd\n", + "import mlflow\n", + "import mlflow.sklearn\n", + "from sklearn.ensemble import GradientBoostingClassifier\n", + "from sklearn.metrics import classification_report\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "def main():\n", + " \"\"\"Main function of the script.\"\"\"\n", + "\n", + " # input and output arguments\n", + " parser = argparse.ArgumentParser()\n", + " parser.add_argument(\"--data\", type=str, help=\"path to input data\")\n", + " parser.add_argument(\"--test_train_ratio\", type=float, required=False, default=0.25)\n", + " parser.add_argument(\"--n_estimators\", required=False, default=100, type=int)\n", + " parser.add_argument(\"--learning_rate\", required=False, default=0.1, type=float)\n", + " parser.add_argument(\"--registered_model_name\", type=str, help=\"model name\")\n", + " args = parser.parse_args()\n", + " \n", + " # Start Logging\n", + " mlflow.start_run()\n", + "\n", + " # enable autologging\n", + " mlflow.sklearn.autolog()\n", + "\n", + " ###################\n", + " #\n", + " ###################\n", + " print(\" \".join(f\"{k}={v}\" for k, v in vars(args).items()))\n", + "\n", + " print(\"input data:\", args.data)\n", + " \n", + " credit_df = pd.read_csv(args.data, header=1, index_col=0)\n", + "\n", + " mlflow.log_metric(\"num_samples\", credit_df.shape[0])\n", + " mlflow.log_metric(\"num_features\", credit_df.shape[1] - 1)\n", + "\n", + " #Split train and test datasets\n", + " train_df, test_df = train_test_split(\n", + " credit_df,\n", + " test_size=args.test_train_ratio,\n", + " )\n", + " ####################\n", + " #\n", + " ####################\n", + "\n", + " ##################\n", + " #\n", + " ##################\n", + " # Extracting the label column\n", + " y_train = train_df.pop(\"default payment next month\")\n", + "\n", + " # convert the dataframe values to array\n", + " X_train = train_df.values\n", + "\n", + " # Extracting the label column\n", + " y_test = test_df.pop(\"default payment next month\")\n", + "\n", + " # convert the dataframe values to array\n", + " X_test = test_df.values\n", + "\n", + " print(f\"Training with data of shape {X_train.shape}\")\n", + "\n", + " clf = GradientBoostingClassifier(\n", + " n_estimators=args.n_estimators, learning_rate=args.learning_rate\n", + " )\n", + " clf.fit(X_train, y_train)\n", + "\n", + " y_pred = clf.predict(X_test)\n", + "\n", + " print(classification_report(y_test, y_pred))\n", + " ###################\n", + " #\n", + " ###################\n", + "\n", + " ##########################\n", + " #\n", + " ##########################\n", + " # Registering the model to the workspace\n", + " print(\"Registering the model via MLFlow\")\n", + " mlflow.sklearn.log_model(\n", + " sk_model=clf,\n", + " registered_model_name=args.registered_model_name,\n", + " artifact_path=args.registered_model_name,\n", + " )\n", + "\n", + " # Saving the model to a file\n", + " mlflow.sklearn.save_model(\n", + " sk_model=clf,\n", + " path=os.path.join(args.registered_model_name, \"trained_model\"),\n", + " )\n", + " ###########################\n", + " #\n", + " ###########################\n", + " \n", + " # Stop Logging\n", + " mlflow.end_run()\n", + "\n", + "if __name__ == \"__main__\":\n", + " main()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this script, once the model is trained, the model file is saved and registered to the workspace. Registering your model allows you to store and version your models in the Azure cloud, in your workspace. Once you register a model, you can find all other registered model in one place in the Azure Studio called the model registry. The model registry helps you organize and keep track of your trained models. \n", + "\n", + "## Configure the command\n", + "\n", + "Now that you have a script that can perform the classification task, use the general purpose **command** that can run command line actions. This command line action can be directly calling system commands or by running a script. \n", + "\n", + "Here, create input variables to specify the input data, split ratio, learning rate and registered model name. The command script will:\n", + "* Use the compute created earlier to run this command or simply use serverless compute.\n", + "* Use the environment created earlier - you can use the `@latest` notation to indicate the latest version of the environment when the command is run.\n", + "* Configure the command line action itself - `python main.py` in this case. The inputs/outputs are accessible in the command via the `${{ ... }}` notation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "gather": { + "logged": 1677262332367 }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Clean up resources\n", - "\n", - "If you plan to continue now to other tutorials, skip to [Next steps](#next-steps).\n", - "\n", - "### Stop compute instance\n", - "\n", - "If you're not going to use it now, stop the compute instance:\n", - "\n", - "1. In the studio, in the left navigation area, select **Compute**.\n", - "1. In the top tabs, select **Compute instances**\n", - "1. Select the compute instance in the list.\n", - "1. On the top toolbar, select **Stop**.\n" - ] + "name": "registered_model_name" + }, + "outputs": [], + "source": [ + "from azure.ai.ml import command\n", + "from azure.ai.ml import Input\n", + "\n", + "registered_model_name = \"credit_defaults_model\"\n", + "\n", + "job = command(\n", + " inputs=dict(\n", + " data=Input(\n", + " type=\"uri_file\",\n", + " path=\"https://azuremlexamples.blob.core.windows.net/datasets/credit_card/default_of_credit_card_clients.csv\",\n", + " ),\n", + " test_train_ratio=0.2,\n", + " learning_rate=0.25,\n", + " registered_model_name=registered_model_name,\n", + " ),\n", + " code=\"./src/\", # location of source code\n", + " command=\"python main.py --data ${{inputs.data}} --test_train_ratio ${{inputs.test_train_ratio}} --learning_rate ${{inputs.learning_rate}} --registered_model_name ${{inputs.registered_model_name}}\",\n", + " environment=\"aml-scikit-learn@latest\",\n", + " compute=\"cpu-cluster\", # Remove this line to use serverless compute\n", + " display_name=\"credit_default_prediction\",\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Submit the job \n", + "\n", + "It's now time to submit the job to run in Azure Machine Learning studio. This time you'll use `create_or_update` on `ml_client`. `ml_client` is a client class that allows you to connect to your Azure subscription using Python and interact with Azure Machine Learning services. `ml_client` allows you to submit your jobs using Python." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "gather": { + "logged": 1677262345449 }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Next Steps\n", - "Learn about deploying a model \n", - "\n", - "[Deploy a model](https://learn.microsoft.com/articles/machine-learning/tutorial-deploy-model).\n", - "\n", - "This tutorial used an online data file. To learn more about other ways to access data, see [Tutorial: Upload, access and explore your data in Azure Machine Learning](https://learn.microsoft.com/articles/machine-learning/tutorial-explore-data).\n", - "\n", - "If you would like to learn more about different ways to train models in Azure Machine Learning, see [What is automated machine learning (AutoML)?](https://learn.microsoft.com/articles/machine-learning/concept-automated-ml). Automated ML is a supplemental tool to reduce the amount of time a data scientist spends finding a model that works best with their data.\n", - "\n", - "If you would like more examples similar to this tutorial, see [**Samples**](https://learn.microsoft.com/articles/machine-learning/quickstart-create-resources#learn-from-sample-notebooks) section of studio. These same samples are available at our [GitHub examples page.](https://github.com/Azure/azureml-examples) The examples include complete Python Notebooks that you can run code and learn to train a model. You can modify and run existing scripts from the samples, containing scenarios including classification, natural language processing, and anomaly detection. \n", - "\n", - "To train models by creating your own custom environments using a [docker image,](how-to-manage-environments-v2.md#create-an-environment-from-a-docker-build-context)." - ] - } + "name": "create_job" + }, + "outputs": [], + "source": [ + "ml_client.create_or_update(job)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## View job output and wait for job completion\n", + "\n", + "View the job in Azure Machine Learning studio by selecting the link in the output of the previous cell. The output of this job will look like this in the Azure Machine Learning studio. Explore the tabs for various details like metrics, outputs etc. Once completed, the job will register a model in your workspace as a result of training. \n", + "\n", + "![Screenshot shows the overview page for the job.](./media/view-job.gif)\n", + "\n", + "> [!IMPORTANT]\n", + "> Wait until the status of the job is complete before returning to this notebook to continue. The job will take 2 to 3 minutes to run. It could take longer (up to 10 minutes) if the compute cluster has been scaled down to zero nodes and custom environment is still building.\n", + "\n", + "When you run the cell, the notebook output shows a link to the job's details page on Azure Studio. Alternatively, you can also select Jobs on the left navigation menu. A job is a grouping of many runs from a specified script or piece of code. Information for the run is stored under that job. The details page gives an overview of the job, the time it took to run, when it was created, etc. The page also has tabs to other information about the job such as metrics, Outputs + logs, and code. Listed below are the tabs available in the job's details page:\n", + "\n", + "* Overview: The overview section provides basic information about the job, including its status, start and end times, and the type of job that was run\n", + "* Inputs: The input section lists the data and code that were used as inputs for the job. This section can include datasets, scripts, environment configurations, and other resources that were used during training. \n", + "* Outputs + logs: The Outputs + logs tab contains logs generated while the job was running. This tab assists in troubleshooting if anything goes wrong with your training script or model creation.\n", + "* Metrics: The metrics tab showcases key performance metrics from your model such as training score, f1 score, and precision score. " + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Clean up resources\n", + "\n", + "If you plan to continue now to other tutorials, skip to [Next steps](#next-steps).\n", + "\n", + "### Stop compute instance\n", + "\n", + "If you're not going to use it now, stop the compute instance:\n", + "\n", + "1. In the studio, in the left navigation area, select **Compute**.\n", + "1. In the top tabs, select **Compute instances**\n", + "1. Select the compute instance in the list.\n", + "1. On the top toolbar, select **Stop**.\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Next Steps\n", + "Learn about deploying a model \n", + "\n", + "[Deploy a model](https://learn.microsoft.com/articles/machine-learning/tutorial-deploy-model).\n", + "\n", + "This tutorial used an online data file. To learn more about other ways to access data, see [Tutorial: Upload, access and explore your data in Azure Machine Learning](https://learn.microsoft.com/articles/machine-learning/tutorial-explore-data).\n", + "\n", + "If you would like to learn more about different ways to train models in Azure Machine Learning, see [What is automated machine learning (AutoML)?](https://learn.microsoft.com/articles/machine-learning/concept-automated-ml). Automated ML is a supplemental tool to reduce the amount of time a data scientist spends finding a model that works best with their data.\n", + "\n", + "If you would like more examples similar to this tutorial, see [**Samples**](https://learn.microsoft.com/articles/machine-learning/quickstart-create-resources#learn-from-sample-notebooks) section of studio. These same samples are available at our [GitHub examples page.](https://github.com/Azure/azureml-examples) The examples include complete Python Notebooks that you can run code and learn to train a model. You can modify and run existing scripts from the samples, containing scenarios including classification, natural language processing, and anomaly detection. \n", + "\n", + "To train models by creating your own custom environments using a [docker image,](how-to-manage-environments-v2.md#create-an-environment-from-a-docker-build-context)." + ] + } + ], + "metadata": { + "categories": [ + "SDK v2", + "tutorials" ], - "metadata": { - "categories": [ - "SDK v2", - "tutorials" - ], - "kernel_info": { - "name": "python310-sdkv2" - }, - "kernelspec": { - "display_name": "Python 3.10 - SDK v2", - "language": "python", - "name": "python310-sdkv2" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.6" - }, - "microsoft": { - "host": { - "AzureML": { - "notebookHasBeenCompleted": true - } - }, - "ms_spell_check": { - "ms_spell_check_language": "en" - } - }, - "nteract": { - "version": "nteract-front-end@1.0.0" + "kernel_info": { + "name": "python310-sdkv2" + }, + "kernelspec": { + "display_name": "Python 3.10 - SDK v2", + "language": "python", + "name": "python310-sdkv2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + }, + "microsoft": { + "host": { + "AzureML": { + "notebookHasBeenCompleted": true } + }, + "ms_spell_check": { + "ms_spell_check_language": "en" + } }, - "nbformat": 4, - "nbformat_minor": 2 + "nteract": { + "version": "nteract-front-end@1.0.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 }