Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 2 additions & 5 deletions bootstrap/bootstrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,7 @@ def rename_dir(self):
def delete_dir(self):
# Delete unwanted directories
dirs = ["docs", r"diabetes_regression"]
if (platform.system() == "Windows"):
cmd = 'rmdir /S /Q "{}"'
else:
cmd = 'rm -r "{}"'
cmd = 'rmdir /S /Q "{}"' if (platform.system() == "Windows") else 'rm -r "{}"'
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function Helper.delete_dir refactored with the following changes:

for dir in dirs:
os.system(cmd.format(os.path.join(self._project_directory, os.path.normpath(dir)))) # NOQA: E501

Expand Down Expand Up @@ -151,5 +148,5 @@ def main(args):
return 0


if '__main__' == __name__:
if __name__ == '__main__':
Comment on lines -154 to +151
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Lines 154-154 refactored with the following changes:

sys.exit(main(sys.argv))
13 changes: 4 additions & 9 deletions diabetes_regression/evaluate/evaluate_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
"""

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Lines 73-136 refactored with the following changes:

from azureml.core import Run
import argparse
import traceback
Expand Down Expand Up @@ -70,8 +71,6 @@
# if you would like to use Offline mode
exp = run.experiment
ws = run.experiment.workspace
run_id = 'amlcompute'

parser = argparse.ArgumentParser("evaluate")

parser.add_argument(
Expand All @@ -94,8 +93,7 @@
)

args = parser.parse_args()
if (args.run_id is not None):
run_id = args.run_id
run_id = args.run_id if (args.run_id is not None) else 'amlcompute'
if (run_id == 'amlcompute'):
run_id = run.parent.id
model_name = args.model_name
Expand Down Expand Up @@ -129,11 +127,8 @@
run.parent.cancel()
else:
print(
"Current Production model {}: {}, ".format(
metric_eval, production_model_mse) +
"New trained model {}: {}".format(
metric_eval, new_model_mse
)
f"Current Production model {metric_eval}: {production_model_mse}, "
+ f"New trained model {metric_eval}: {new_model_mse}"
)

if (new_model_mse < production_model_mse):
Expand Down
9 changes: 3 additions & 6 deletions diabetes_regression/register/register_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def main():
print(f"Could not find {tag} metric on parent run.")

# load the model
print("Loading model from " + model_path)
print(f"Loading model from {model_path}")
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function main refactored with the following changes:

model_file = os.path.join(model_path, model_name)
model = joblib.load(model_file)
parent_tags = run.parent.get_tags()
Expand Down Expand Up @@ -183,7 +183,7 @@ def register_aml_model(
tagsValue = {"area": "diabetes_regression",
"run_id": run_id,
"experiment_name": exp.name}
tagsValue.update(model_tags)
tagsValue |= model_tags
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function register_aml_model refactored with the following changes:

if (build_id != 'none'):
model_already_registered(model_name, exp, run_id)
tagsValue["BuildId"] = build_id
Expand All @@ -199,10 +199,7 @@ def register_aml_model(
Dataset.get_by_id(exp.workspace, dataset_id))])
os.chdir("..")
print(
"Model registered: {} \nModel Description: {} "
"\nModel Version: {}".format(
model.name, model.description, model.version
)
f"Model registered: {model.name} \nModel Description: {model.description} \nModel Version: {model.version}"
)
except Exception:
traceback.print_exc(limit=None, file=None, chain=True)
Expand Down
16 changes: 8 additions & 8 deletions diabetes_regression/scoring/parallel_batchscore.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def parse_args() -> List[str]:
if itm == "--model_name"
]

if len(model_name_param) == 0:
if not model_name_param:
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function parse_args refactored with the following changes:

raise ValueError(
"Model name is required but no model name parameter was passed to the script" # NOQA: E501
)
Expand All @@ -67,8 +67,8 @@ def parse_args() -> List[str]:
]
model_version = (
None
if len(model_version_param) < 1
or len(model_version_param[0][1].strip()) == 0 # NOQA: E501
if not model_version_param
or len(model_version_param[0][1].strip()) == 0
else model_version_param[0][1]
)

Expand All @@ -79,8 +79,8 @@ def parse_args() -> List[str]:
]
model_tag_name = (
None
if len(model_tag_name_param) < 1
or len(model_tag_name_param[0][1].strip()) == 0 # NOQA: E501
if not model_tag_name_param
or len(model_tag_name_param[0][1].strip()) == 0
else model_tag_name_param[0][1]
)

Expand All @@ -91,7 +91,7 @@ def parse_args() -> List[str]:
]
model_tag_value = (
None
if len(model_tag_value_param) < 1
if not model_tag_value_param
or len(model_tag_name_param[0][1].strip()) == 0
else model_tag_value_param[0][1]
)
Expand Down Expand Up @@ -120,9 +120,9 @@ def init():
modelpath = Model.get_model_path(
model_name=amlmodel.name, version=amlmodel.version)
model = joblib.load(modelpath)
print("Loaded model {}".format(model_filter[0]))
print(f"Loaded model {model_filter[0]}")
except Exception as ex:
print("Error: {}".format(ex))
print(f"Error: {ex}")
Comment on lines -123 to +125
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function init refactored with the following changes:



def run(mini_batch: pd.DataFrame) -> pd.DataFrame:
Expand Down
10 changes: 3 additions & 7 deletions diabetes_regression/scoring/parallel_batchscore_copyoutput.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,9 @@ def parse_args():


def copy_output(args):
print("Output : {}".format(args.output_path))
print(f"Output : {args.output_path}")

accounturl = "https://{}.blob.core.windows.net".format(
args.scoring_datastore
) # NOQA E501
accounturl = f"https://{args.scoring_datastore}.blob.core.windows.net"
Comment on lines -45 to +47
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function copy_output refactored with the following changes:

This removes the following comments ( why? ):

# NOQA E501


containerclient = ContainerClient(
accounturl, args.score_container, args.scoring_datastore_key
Expand All @@ -61,9 +59,7 @@ def copy_output(args):
.replace(".", "_")
) # noqa E501
destfilenameparts = args.scoring_output_filename.split(".")
destblobname = "{}/{}_{}.{}".format(
destfolder, destfilenameparts[0], filetime, destfilenameparts[1]
)
destblobname = f"{destfolder}/{destfilenameparts[0]}_{filetime}.{destfilenameparts[1]}"

destblobclient = containerclient.get_blob_client(destblobname)
with open(
Expand Down
10 changes: 5 additions & 5 deletions diabetes_regression/training/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,10 @@ def split_data(df):

X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=0)
data = {"train": {"X": X_train, "y": y_train},
"test": {"X": X_test, "y": y_test}}
return data
return {
"train": {"X": X_train, "y": y_train},
"test": {"X": X_test, "y": y_test},
}
Comment on lines -41 to +44
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function split_data refactored with the following changes:



# Train the model, return the model
Expand All @@ -54,8 +55,7 @@ def train_model(data, ridge_args):
def get_model_metrics(model, data):
preds = model.predict(data["test"]["X"])
mse = mean_squared_error(preds, data["test"]["y"])
metrics = {"mse": mse}
return metrics
return {"mse": mse}
Comment on lines -57 to +58
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function get_model_metrics refactored with the following changes:



def main():
Expand Down
12 changes: 6 additions & 6 deletions diabetes_regression/training/train_aml.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,12 +93,12 @@ def main():

args = parser.parse_args()

print("Argument [model_name]: %s" % args.model_name)
print("Argument [step_output]: %s" % args.step_output)
print("Argument [dataset_version]: %s" % args.dataset_version)
print("Argument [data_file_path]: %s" % args.data_file_path)
print("Argument [caller_run_id]: %s" % args.caller_run_id)
print("Argument [dataset_name]: %s" % args.dataset_name)
print(f"Argument [model_name]: {args.model_name}")
print(f"Argument [step_output]: {args.step_output}")
print(f"Argument [dataset_version]: {args.dataset_version}")
print(f"Argument [data_file_path]: {args.data_file_path}")
print(f"Argument [caller_run_id]: {args.caller_run_id}")
print(f"Argument [dataset_name]: {args.dataset_name}")
Comment on lines -96 to +101
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function main refactored with the following changes:


model_name = args.model_name
step_output_path = args.step_output
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@ def get_or_create_datastore(

datastore = ws.datastores[datastorename]

# the datastore is not registered but we have all details to register it
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function get_or_create_datastore refactored with the following changes:

This removes the following comments ( why? ):

# NOQA: E501
# the datastore is not registered but we have all details to register it

elif (
env.scoring_datastore_access_key is not None
and containername is not None # NOQA: E501
Expand All @@ -86,9 +85,7 @@ def get_or_create_datastore(
)
else:
raise ValueError(
"No existing datastore named {} nor was enough information supplied to create one.".format( # NOQA: E501
datastorename
)
f"No existing datastore named {datastorename} nor was enough information supplied to create one."
)

return datastore
Expand Down Expand Up @@ -161,14 +158,12 @@ def get_fallback_input_dataset(ws: Workspace, env: Env) -> Dataset:
overwrite=False,
)

scoringinputds = (
return (
Dataset.Tabular.from_delimited_files(scoreinputdataref)
.register(ws, env.scoring_dataset_name, create_new_version=True)
.as_named_input(env.scoring_dataset_name)
)

return scoringinputds
Comment on lines -164 to -170
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function get_fallback_input_dataset refactored with the following changes:



def get_output_location(
ws: Workspace, env: Env, outputdatastore: Datastore = None
Expand All @@ -187,21 +182,18 @@ def get_output_location(
:returns: PipelineData wrapping the output datastore
"""

if outputdatastore is None:
output_loc = PipelineData(
return (
PipelineData(
name="defaultoutput", datastore=ws.get_default_datastore()
)
else:
output_loc = PipelineData(
name=outputdatastore.name, datastore=outputdatastore
) # NOQA: E501

return output_loc
if outputdatastore is None
else PipelineData(name=outputdatastore.name, datastore=outputdatastore)
)
Comment on lines -190 to +191
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function get_output_location refactored with the following changes:

This removes the following comments ( why? ):

# NOQA: E501



def get_inputds_outputloc(
ws: Workspace, env: Env
) -> Tuple[Dataset, PipelineData]: # NOQA: E501
) -> Tuple[Dataset, PipelineData]: # NOQA: E501
Comment on lines -204 to +196
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function get_inputds_outputloc refactored with the following changes:

"""
Prepare the input and output for the scoring step. Input is a tabular
dataset wrapped around the scoring data. Output is PipelineData
Expand All @@ -219,13 +211,10 @@ def get_inputds_outputloc(
output_loc = get_output_location(ws, env)
else:
inputdatastore = get_or_create_datastore(
"{}_in".format(env.scoring_datastore_storage_name), ws, env
f"{env.scoring_datastore_storage_name}_in", ws, env
)
outputdatastore = get_or_create_datastore(
"{}_out".format(env.scoring_datastore_storage_name),
ws,
env,
input=False, # NOQA: E501
f"{env.scoring_datastore_storage_name}_out", ws, env, input=False
)
scoringinputds = get_input_dataset(ws, inputdatastore, env)
output_loc = get_output_location(ws, env, outputdatastore)
Expand Down Expand Up @@ -415,9 +404,7 @@ def build_batchscore_pipeline():
name=env.scoring_pipeline_name,
description="Diabetes Batch Scoring Pipeline",
)
pipeline_id_string = "##vso[task.setvariable variable=pipeline_id;isOutput=true]{}".format( # NOQA: E501
published_pipeline.id
)
pipeline_id_string = f"##vso[task.setvariable variable=pipeline_id;isOutput=true]{published_pipeline.id}"
Comment on lines -418 to +407
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function build_batchscore_pipeline refactored with the following changes:

print(pipeline_id_string)
except Exception as e:
print(e)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,8 @@ def main():

if not os.path.exists(file_name):
raise Exception(
'Could not find CSV dataset at "%s". If you have bootstrapped your project, you will need to provide a CSV.' # NOQA: E501
% file_name
) # NOQA: E501
f'Could not find CSV dataset at "{file_name}". If you have bootstrapped your project, you will need to provide a CSV.'
)
Comment on lines -72 to +73
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function main refactored with the following changes:


# Upload file to default datastore in workspace
datatstore = Datastore.get(aml_workspace, datastore_name)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,9 @@ def main():
train_pipeline = Pipeline(workspace=aml_workspace, steps=steps)
train_pipeline.validate()
published_pipeline = train_pipeline.publish(
name=e.pipeline_name + "_with_R_on_DB",
name=f"{e.pipeline_name}_with_R_on_DB",
description="Model training/retraining pipeline",
version=e.build_id
version=e.build_id,
Comment on lines -46 to +48
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function main refactored with the following changes:

)
print(f'Published pipeline: {published_pipeline.name}')
print(f'for build {published_pipeline.version}')
Expand Down
22 changes: 8 additions & 14 deletions ml_service/pipelines/run_parallel_batchscore_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,7 @@ def get_pipeline(pipeline_id, ws: Workspace, env: Env):
] # noqa E501

if scoringpipelinelist.count == 0:
raise Exception(
"No pipeline found matching name:{}".format(env.scoring_pipeline_name) # NOQA: E501
)
raise Exception(f"No pipeline found matching name:{env.scoring_pipeline_name}")
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function get_pipeline refactored with the following changes:

This removes the following comments ( why? ):

# NOQA: E501

else:
# latest published
scoringpipeline = scoringpipelinelist[0]
Expand All @@ -58,16 +56,14 @@ def get_pipeline(pipeline_id, ws: Workspace, env: Env):


def copy_output(step_id: str, env: Env):
accounturl = "https://{}.blob.core.windows.net".format(
env.scoring_datastore_storage_name
accounturl = (
f"https://{env.scoring_datastore_storage_name}.blob.core.windows.net"
)

srcblobname = "azureml/{}/{}_out/parallel_run_step.txt".format(
step_id, env.scoring_datastore_storage_name
)
srcblobname = f"azureml/{step_id}/{env.scoring_datastore_storage_name}_out/parallel_run_step.txt"

srcbloburl = "{}/{}/{}".format(
accounturl, env.scoring_datastore_output_container, srcblobname
srcbloburl = (
f"{accounturl}/{env.scoring_datastore_output_container}/{srcblobname}"
Comment on lines -61 to +66
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function copy_output refactored with the following changes:

)

containerclient = ContainerClient(
Expand All @@ -87,9 +83,7 @@ def copy_output(step_id: str, env: Env):
.replace(".", "_")
) # noqa E501
destfilenameparts = env.scoring_datastore_output_filename.split(".")
destblobname = "{}/{}_{}.{}".format(
destfolder, destfilenameparts[0], filetime, destfilenameparts[1]
)
destblobname = f"{destfolder}/{destfilenameparts[0]}_{filetime}.{destfilenameparts[1]}"

destblobclient = containerclient.get_blob_client(destblobname)
destblobclient.start_copy_from_url(srcbloburl)
Expand Down Expand Up @@ -127,7 +121,7 @@ def run_batchscore_pipeline():
copy_output(list(run.get_steps())[0].id, env)

except Exception as ex:
print("Error: {}".format(ex))
print(f"Error: {ex}")
Comment on lines -130 to +124
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function run_batchscore_pipeline refactored with the following changes:



if __name__ == "__main__":
Expand Down
4 changes: 2 additions & 2 deletions ml_service/pipelines/run_train_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,10 @@ def main():
if p.version == e.build_id:
matched_pipes.append(p)

if(len(matched_pipes) > 1):
if (len(matched_pipes) > 1):
published_pipeline = None
raise Exception(f"Multiple active pipelines are published for build {e.build_id}.") # NOQA: E501
elif(len(matched_pipes) == 0):
elif not matched_pipes:
Comment on lines -41 to +44
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function main refactored with the following changes:

published_pipeline = None
raise KeyError(f"Unable to find a published pipeline for this build {e.build_id}") # NOQA: E501
else:
Expand Down
2 changes: 1 addition & 1 deletion ml_service/util/attach_compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def get_compute(workspace: Workspace, compute_name: str, vm_size: str, for_batch
if compute_name in workspace.compute_targets:
compute_target = workspace.compute_targets[compute_name]
if compute_target and type(compute_target) is AmlCompute:
print("Found existing compute target " + compute_name + " so using it.") # NOQA
print(f"Found existing compute target {compute_name} so using it.")
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function get_compute refactored with the following changes:

This removes the following comments ( why? ):

# NOQA

else:
e = Env()
compute_config = AmlCompute.provisioning_configuration(
Expand Down
Loading