Skip to content

Commit 4de475a

Browse files
authored
Remove BuildId parameter (#214)
Remove the need to add the BuildId parameter and tagging logic to the train/evaluate scripts. The tags are instead specified when the experiment is submitted. The register script pulls the BuildId from the tag.
1 parent c773db9 commit 4de475a

File tree

8 files changed

+50
-85
lines changed

8 files changed

+50
-85
lines changed

Diff for: .pipelines/diabetes_regression-ci.yml

+3-1
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ stages:
5050
- stage: 'Trigger_AML_Pipeline'
5151
displayName: 'Train model'
5252
condition: and(succeeded(), not(variables['MODEL_BUILD_ID']))
53+
variables:
54+
BUILD_URI: '$(SYSTEM.COLLECTIONURI)$(SYSTEM.TEAMPROJECT)/_build/results?buildId=$(BUILD.BUILDID)'
5355
jobs:
5456
- job: "Get_Pipeline_ID"
5557
condition: and(succeeded(), eq(coalesce(variables['auto-trigger-training'], 'true'), 'true'))
@@ -85,7 +87,7 @@ stages:
8587
azureSubscription: '$(WORKSPACE_SVC_CONNECTION)'
8688
PipelineId: '$(AMLPIPELINE_ID)'
8789
ExperimentName: '$(EXPERIMENT_NAME)'
88-
PipelineParameters: '"ParameterAssignments": {"model_name": "$(MODEL_NAME)"}'
90+
PipelineParameters: '"ParameterAssignments": {"model_name": "$(MODEL_NAME)"}, "tags": {"BuildId": "$(Build.BuildId)", "BuildUri": "$(BUILD_URI)"}, "StepTags": {"BuildId": "$(Build.BuildId)", "BuildUri": "$(BUILD_URI)"}'
8991
- job: "Training_Run_Report"
9092
dependsOn: "Run_ML_Pipeline"
9193
condition: always()

Diff for: diabetes_regression/evaluate/evaluate_model.py

+2-13
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE
2424
POSSIBILITY OF SUCH DAMAGE.
2525
"""
26-
import os
2726
from azureml.core import Run
2827
import argparse
2928
import traceback
@@ -74,11 +73,7 @@
7473
run_id = 'amlcompute'
7574

7675
parser = argparse.ArgumentParser("evaluate")
77-
parser.add_argument(
78-
"--build_id",
79-
type=str,
80-
help="The Build ID of the build triggering this pipeline run",
81-
)
76+
8277
parser.add_argument(
8378
"--run_id",
8479
type=str,
@@ -99,19 +94,13 @@
9994
)
10095

10196
args = parser.parse_args()
102-
if (args.build_id is not None):
103-
build_id = args.build_id
10497
if (args.run_id is not None):
10598
run_id = args.run_id
10699
if (run_id == 'amlcompute'):
107100
run_id = run.parent.id
108101
model_name = args.model_name
109102
metric_eval = "mse"
110-
run.tag("BuildId", value=build_id)
111-
builduri_base = os.environ.get("BUILDURI_BASE")
112-
if (builduri_base is not None):
113-
build_uri = builduri_base + build_id
114-
run.tag("BuildUri", value=build_uri)
103+
115104
allow_run_cancel = args.allow_run_cancel
116105
# Parameterize the matrices on which the models should be compared
117106
# Add golden data set on which all the model performance can be evaluated

Diff for: diabetes_regression/register/register_model.py

+30-30
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@ def main():
4343
experiment_name = os.environ.get("EXPERIMENT_NAME")
4444
resource_group = os.environ.get("RESOURCE_GROUP")
4545
subscription_id = os.environ.get("SUBSCRIPTION_ID")
46-
build_id = os.environ.get('BUILD_BUILDID')
4746
# run_id useful to query previous runs
4847
run_id = "bd184a18-2ac8-4951-8e78-e290bef3b012"
4948
aml_workspace = Workspace.get(
@@ -59,11 +58,6 @@ def main():
5958
run_id = 'amlcompute'
6059

6160
parser = argparse.ArgumentParser("register")
62-
parser.add_argument(
63-
"--build_id",
64-
type=str,
65-
help="The Build ID of the build triggering this pipeline run",
66-
)
6761

6862
parser.add_argument(
6963
"--run_id",
@@ -84,8 +78,6 @@ def main():
8478
)
8579

8680
args = parser.parse_args()
87-
if (args.build_id is not None):
88-
build_id = args.build_id
8981
if (args.run_id is not None):
9082
run_id = args.run_id
9183
if (run_id == 'amlcompute'):
@@ -98,32 +90,40 @@ def main():
9890
model_file = os.path.join(model_path, model_name)
9991
model = joblib.load(model_file)
10092
model_mse = run.parent.get_metrics()["mse"]
93+
parent_tags = run.parent.get_tags()
94+
try:
95+
build_id = parent_tags["BuildId"]
96+
except KeyError:
97+
build_id = None
98+
print("BuildId tag not found on parent run.")
99+
print("Tags present: {parent_tags}")
100+
try:
101+
build_uri = parent_tags["BuildUri"]
102+
except KeyError:
103+
build_uri = None
104+
print("BuildUri tag not found on parent run.")
105+
print("Tags present: {parent_tags}")
101106

102107
if (model is not None):
103108
if (build_id is None):
104-
register_aml_model(model_file, model_name, exp, run_id)
109+
register_aml_model(model_file, model_name, model_mse, exp, run_id)
110+
elif (build_uri is None):
111+
register_aml_model(
112+
model_file,
113+
model_name,
114+
model_mse,
115+
exp,
116+
run_id,
117+
build_id)
105118
else:
106-
run.tag("BuildId", value=build_id)
107-
builduri_base = os.environ.get("BUILDURI_BASE")
108-
if (builduri_base is not None):
109-
build_uri = builduri_base + build_id
110-
run.tag("BuildUri", value=build_uri)
111-
register_aml_model(
112-
model_file,
113-
model_name,
114-
model_mse,
115-
exp,
116-
run_id,
117-
build_id,
118-
build_uri)
119-
else:
120-
register_aml_model(
121-
model_file,
122-
model_name,
123-
model_mse,
124-
exp,
125-
run_id,
126-
build_id)
119+
register_aml_model(
120+
model_file,
121+
model_name,
122+
model_mse,
123+
exp,
124+
run_id,
125+
build_id,
126+
build_uri)
127127
else:
128128
print("Model not found. Skipping model registration.")
129129
sys.exit(0)

Diff for: diabetes_regression/training/train.py

+1-15
Original file line numberDiff line numberDiff line change
@@ -50,11 +50,7 @@ def main():
5050
print("Running train.py")
5151

5252
parser = argparse.ArgumentParser("train")
53-
parser.add_argument(
54-
"--build_id",
55-
type=str,
56-
help="The build ID of the build triggering this pipeline run",
57-
)
53+
5854
parser.add_argument(
5955
"--model_name",
6056
type=str,
@@ -70,12 +66,10 @@ def main():
7066

7167
args = parser.parse_args()
7268

73-
print("Argument [build_id]: %s" % args.build_id)
7469
print("Argument [model_name]: %s" % args.model_name)
7570
print("Argument [step_output]: %s" % args.step_output)
7671

7772
model_name = args.model_name
78-
build_id = args.build_id
7973
step_output_path = args.step_output
8074

8175
print("Getting training parameters")
@@ -119,15 +113,7 @@ def main():
119113
output_path = os.path.join('outputs', model_name)
120114
joblib.dump(value=reg, filename=output_path)
121115

122-
# Add properties to identify this specific training run
123-
run.parent.tag("BuildId", value=build_id)
124-
run.tag("BuildId", value=build_id)
125116
run.tag("run_type", value="train")
126-
builduri_base = os.environ.get("BUILDURI_BASE")
127-
if (builduri_base is not None):
128-
build_uri = builduri_base + build_id
129-
run.tag("BuildUri", value=build_uri)
130-
run.parent.tag("BuildUri", value=build_uri)
131117
print(f"tags now present for run: {run.tags}")
132118

133119
run.complete()

Diff for: ml_service/pipelines/diabetes_regression_build_train_pipeline.py

-9
Original file line numberDiff line numberDiff line change
@@ -38,15 +38,9 @@ def main():
3838

3939
run_config = RunConfiguration()
4040
run_config.environment = environment
41-
if (e.collection_uri is not None and e.teamproject_name is not None):
42-
builduri_base = e.collection_uri + e.teamproject_name
43-
builduri_base = builduri_base + "/_build/results?buildId="
44-
run_config.environment.environment_variables["BUILDURI_BASE"] = builduri_base # NOQA: E501
4541

4642
model_name_param = PipelineParameter(
4743
name="model_name", default_value=e.model_name)
48-
build_id_param = PipelineParameter(
49-
name="build_id", default_value=e.build_id)
5044

5145
# Get dataset name
5246
dataset_name = e.dataset_name
@@ -98,7 +92,6 @@ def main():
9892
inputs=[dataset.as_named_input('training_data')],
9993
outputs=[pipeline_data],
10094
arguments=[
101-
"--build_id", build_id_param,
10295
"--model_name", model_name_param,
10396
"--step_output", pipeline_data
10497
],
@@ -113,7 +106,6 @@ def main():
113106
compute_target=aml_compute,
114107
source_directory=e.sources_directory_train,
115108
arguments=[
116-
"--build_id", build_id_param,
117109
"--model_name", model_name_param,
118110
"--allow_run_cancel", e.allow_run_cancel,
119111
],
@@ -129,7 +121,6 @@ def main():
129121
source_directory=e.sources_directory_train,
130122
inputs=[pipeline_data],
131123
arguments=[
132-
"--build_id", build_id_param,
133124
"--model_name", model_name_param,
134125
"--step_input", pipeline_data,
135126
],

Diff for: ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py

-4
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,6 @@ def main():
3434
aml_workspace, e.aml_env_name, create_new=False) # NOQA: E501
3535
run_config = RunConfiguration()
3636
run_config.environment = environment
37-
if (e.collection_uri is not None and e.teamproject_name is not None):
38-
builduri_base = e.collection_uri + e.teamproject_name
39-
builduri_base = builduri_base + "/_build/results?buildId="
40-
run_config.environment.environment_variables["BUILDURI_BASE"] = builduri_base # NOQA: E501
4137

4238
train_step = PythonScriptStep(
4339
name="Train Model",

Diff for: ml_service/pipelines/run_train_pipeline.py

+11-5
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from azureml.pipeline.core import PublishedPipeline
2-
from azureml.core import Workspace
2+
from azureml.core import Experiment, Workspace
33
import argparse
44
from ml_service.util.env_variables import Env
55

@@ -55,10 +55,16 @@ def main():
5555

5656
if(args.skip_train_execution is False):
5757
pipeline_parameters = {"model_name": e.model_name}
58-
run = published_pipeline.submit(
59-
aml_workspace,
60-
e.experiment_name,
61-
pipeline_parameters)
58+
tags = {"BuildId": e.build_id}
59+
if (e.build_uri is not None):
60+
tags["BuildUri"] = e.build_uri
61+
experiment = Experiment(
62+
workspace=aml_workspace,
63+
name=e.experiment_name)
64+
run = experiment.submit(
65+
published_pipeline,
66+
tags=tags,
67+
pipeline_parameters=pipeline_parameters)
6268

6369
print("Pipeline run initiated ", run.id)
6470

Diff for: ml_service/util/env_variables.py

+3-8
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,7 @@ def __init__(self):
3838
self._image_name = os.environ.get('IMAGE_NAME')
3939
self._db_cluster_id = os.environ.get("DB_CLUSTER_ID")
4040
self._score_script = os.environ.get("SCORE_SCRIPT")
41-
self._collection_uri = os.environ.get("SYSTEM_COLLECTIONURI")
42-
self._teamproject_name = os.environ.get("SYSTEM_TEAMPROJECT")
41+
self._build_uri = os.environ.get("BUILD_URI")
4342
self._dataset_name = os.environ.get("DATASET_NAME")
4443
self._run_evaluation = os.environ.get("RUN_EVALUATION", "true")
4544
self._allow_run_cancel = os.environ.get(
@@ -139,12 +138,8 @@ def score_script(self):
139138
return self._score_script
140139

141140
@property
142-
def collection_uri(self):
143-
return self._collection_uri
144-
145-
@property
146-
def teamproject_name(self):
147-
return self._teamproject_name
141+
def build_uri(self):
142+
return self._build_uri
148143

149144
@property
150145
def dataset_name(self):

0 commit comments

Comments
 (0)