Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added validation for forecast automl notebooks #2132

Merged
merged 6 commits into from
Mar 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"validations": [
{
"name": "check notebook output",
"params": {
"check": "warning"
}
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
"validations": [
{
"name": "check notebook output",
"params": {
"check": "warning stderr"
}
},
{
"name": "check v2 experiment result",
"params": {
"experiment_name": "dpv2-forecasting-experiment",
"minimum_median_score": "0.01",
"maximum_median_score": "0.3",
"metric_name": "normalized_root_mean_squared_error",
"absolute_minimum_score": "0.0",
"absolute_maximum_score": "1.0"
}
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"validations": [
{
"name": "check notebook output",
"params": {
"check": "warning"
}
},
{
"name": "check v2 experiment result",
"params": {
"experiment_name": "dpv2-bike-test",
"minimum_median_score": "0.01",
"maximum_median_score": "0.3",
"metric_name": "normalized_root_mean_squared_error",
"absolute_minimum_score": "0.0",
"absolute_maximum_score": "1.0"
}
}
]
}

Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,13 @@ jobs:
[ -f "../../.azureml/config" ] && cat "../../.azureml/config";
papermill -k python -p compute_name automl-cpu-cluster auto-ml-forecasting-github-dau.ipynb auto-ml-forecasting-github-dau.output.ipynb
working-directory: sdk/python/jobs/automl-standalone-jobs/automl-forecasting-github-dau
- name: check notebook output
run: |
python ${{ github.workspace }}/v1/scripts/validation/check_notebook_output.py \
--file_name auto-ml-forecasting-github-dau.output.ipynb \
--folder . \
--check warning \
working-directory: sdk/python/jobs/automl-standalone-jobs/automl-forecasting-github-dau
- name: upload notebook's working folder as an artifact
if: ${{ always() }}
uses: actions/upload-artifact@v2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,25 @@ jobs:
[ -f "../../.azureml/config" ] && cat "../../.azureml/config";
papermill -k python -p compute_name automl-cpu-cluster automl-forecasting-orange-juice-sales-mlflow.ipynb automl-forecasting-orange-juice-sales-mlflow.output.ipynb
working-directory: sdk/python/jobs/automl-standalone-jobs/automl-forecasting-orange-juice-sales
- name: check notebook output
run: |
python ${{ github.workspace }}/v1/scripts/validation/check_notebook_output.py \
--file_name automl-forecasting-orange-juice-sales-mlflow.output.ipynb \
--folder . \
--check warning stderr \
working-directory: sdk/python/jobs/automl-standalone-jobs/automl-forecasting-orange-juice-sales
- name: check v2 experiment result
run: |
python ${{ github.workspace }}/v1/scripts/validation/check_v2_experiment_result.py \
--file_name automl-forecasting-orange-juice-sales-mlflow.output.ipynb \
--folder . \
--experiment_name dpv2-forecasting-experiment \
--minimum_median_score 0.01 \
--maximum_median_score 0.3 \
--metric_name normalized_root_mean_squared_error \
--absolute_minimum_score 0.0 \
--absolute_maximum_score 1.0 \
working-directory: sdk/python/jobs/automl-standalone-jobs/automl-forecasting-orange-juice-sales
- name: upload notebook's working folder as an artifact
if: ${{ always() }}
uses: actions/upload-artifact@v2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,25 @@ jobs:
[ -f "../../.azureml/config" ] && cat "../../.azureml/config";
papermill -k python -p compute_name automl-cpu-cluster auto-ml-forecasting-bike-share.ipynb auto-ml-forecasting-bike-share.output.ipynb
working-directory: sdk/python/jobs/automl-standalone-jobs/automl-forecasting-task-bike-share
- name: check notebook output
run: |
python ${{ github.workspace }}/v1/scripts/validation/check_notebook_output.py \
--file_name auto-ml-forecasting-bike-share.output.ipynb \
--folder . \
--check warning \
working-directory: sdk/python/jobs/automl-standalone-jobs/automl-forecasting-task-bike-share
- name: check v2 experiment result
run: |
python ${{ github.workspace }}/v1/scripts/validation/check_v2_experiment_result.py \
--file_name auto-ml-forecasting-bike-share.output.ipynb \
--folder . \
--experiment_name dpv2-bike-test \
--minimum_median_score 0.01 \
--maximum_median_score 0.3 \
--metric_name normalized_root_mean_squared_error \
--absolute_minimum_score 0.0 \
--absolute_maximum_score 1.0 \
working-directory: sdk/python/jobs/automl-standalone-jobs/automl-forecasting-task-bike-share
- name: upload notebook's working folder as an artifact
if: ${{ always() }}
uses: actions/upload-artifact@v2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -445,6 +445,7 @@
"outputs": [],
"source": [
"from mlflow.tracking.client import MlflowClient\n",
"from mlflow.artifacts import download_artifacts\n",
"\n",
"# Initialize MLFlow client\n",
"mlflow_client = MlflowClient()"
Expand Down Expand Up @@ -559,8 +560,8 @@
"outputs": [],
"source": [
"# Download run's artifacts/outputs\n",
"local_path = mlflow_client.download_artifacts(\n",
" best_run.info.run_id, \"outputs\", local_dir\n",
"local_path = download_artifacts(\n",
" run_id=best_run.info.run_id, artifact_path=\"outputs\", dst_path=local_dir\n",
")\n",
"print(\"Artifacts downloaded in: {}\".format(local_path))\n",
"print(\"Artifacts: {}\".format(os.listdir(local_path)))"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -632,6 +632,7 @@
"outputs": [],
"source": [
"from mlflow.tracking.client import MlflowClient\n",
"from mlflow.artifacts import download_artifacts\n",
"\n",
"# Initialize MLFlow client\n",
"mlflow_client = MlflowClient()"
Expand Down Expand Up @@ -744,8 +745,8 @@
"outputs": [],
"source": [
"# Download run's artifacts/outputs\n",
"local_path = mlflow_client.download_artifacts(\n",
" best_run.info.run_id, \"outputs\", local_dir\n",
"local_path = download_artifacts(\n",
" run_id=best_run.info.run_id, artifact_path=\"outputs\", dst_path=local_dir\n",
")\n",
"print(\"Artifacts downloaded in: {}\".format(local_path))\n",
"print(\"Artifacts: {}\".format(os.listdir(local_path)))"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -500,6 +500,7 @@
"outputs": [],
"source": [
"from mlflow.tracking.client import MlflowClient\n",
"from mlflow.artifacts import download_artifacts\n",
"\n",
"# Initialize MLFlow client\n",
"mlflow_client = MlflowClient()"
Expand Down Expand Up @@ -613,8 +614,8 @@
"outputs": [],
"source": [
"# Download run's artifacts/outputs\n",
"local_path = mlflow_client.download_artifacts(\n",
" best_run.info.run_id, \"outputs\", local_dir\n",
"local_path = download_artifacts(\n",
" run_id=best_run.info.run_id, artifact_path=\"outputs\", dst_path=local_dir\n",
")\n",
"print(\"Artifacts downloaded in: {}\".format(local_path))\n",
"print(\"Artifacts: {}\".format(os.listdir(local_path)))"
Expand Down
2 changes: 2 additions & 0 deletions v1/scripts/validation/check_notebook_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@
"Check: endpoint",
"data_collector is not a known attribute of class",
"Readonly attribute primary_metric will be ignored",
"Downloading artifact ",
"Warnings:",
]

with open(full_name, "r") as notebook_file:
Expand Down
2 changes: 1 addition & 1 deletion v1/scripts/validation/check_v2_experiment_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def checkExperimentResult(
for iteration in children:
iteration_status = iteration.info.status
print(iteration.info.run_id + ": " + iteration_status)
assert iteration_status == "FINISHED" or iteration_status == "CANCELED"
assert iteration_status in ["FINISHED", "CANCELED", "KILLED"]
if iteration_status == "FINISHED":
metrics = iteration.data.metrics
print(metric_name + " = " + str(metrics[metric_name]))
Expand Down