From dfdcdeab6e3b1909d0476f71f2f1bcc75639f720 Mon Sep 17 00:00:00 2001 From: Sourcery AI <> Date: Thu, 25 May 2023 13:22:19 +0000 Subject: [PATCH] 'Refactored by Sourcery' --- bootstrap/bootstrap.py | 7 ++-- .../evaluate/evaluate_model.py | 13 +++---- .../register/register_model.py | 9 ++--- .../scoring/parallel_batchscore.py | 16 ++++----- .../scoring/parallel_batchscore_copyoutput.py | 10 ++---- diabetes_regression/training/train.py | 10 +++--- diabetes_regression/training/train_aml.py | 12 +++---- ...sion_build_parallel_batchscore_pipeline.py | 35 ++++++------------- ...iabetes_regression_build_train_pipeline.py | 5 ++- ..._build_train_pipeline_with_r_on_dbricks.py | 4 +-- .../run_parallel_batchscore_pipeline.py | 22 +++++------- ml_service/pipelines/run_train_pipeline.py | 4 +-- ml_service/util/attach_compute.py | 2 +- ml_service/util/create_scoring_image.py | 6 ++-- ml_service/util/smoke_test_scoring_service.py | 8 ++--- 15 files changed, 64 insertions(+), 99 deletions(-) diff --git a/bootstrap/bootstrap.py b/bootstrap/bootstrap.py index 02f51bbc..53566aa7 100644 --- a/bootstrap/bootstrap.py +++ b/bootstrap/bootstrap.py @@ -54,10 +54,7 @@ def rename_dir(self): def delete_dir(self): # Delete unwanted directories dirs = ["docs", r"diabetes_regression"] - if (platform.system() == "Windows"): - cmd = 'rmdir /S /Q "{}"' - else: - cmd = 'rm -r "{}"' + cmd = 'rmdir /S /Q "{}"' if (platform.system() == "Windows") else 'rm -r "{}"' for dir in dirs: os.system(cmd.format(os.path.join(self._project_directory, os.path.normpath(dir)))) # NOQA: E501 @@ -151,5 +148,5 @@ def main(args): return 0 -if '__main__' == __name__: +if __name__ == '__main__': sys.exit(main(sys.argv)) diff --git a/diabetes_regression/evaluate/evaluate_model.py b/diabetes_regression/evaluate/evaluate_model.py index d1ff3c6a..30486eb3 100644 --- a/diabetes_regression/evaluate/evaluate_model.py +++ b/diabetes_regression/evaluate/evaluate_model.py @@ -23,6 +23,7 @@ ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """ + from azureml.core import Run import argparse import traceback @@ -70,8 +71,6 @@ # if you would like to use Offline mode exp = run.experiment ws = run.experiment.workspace -run_id = 'amlcompute' - parser = argparse.ArgumentParser("evaluate") parser.add_argument( @@ -94,8 +93,7 @@ ) args = parser.parse_args() -if (args.run_id is not None): - run_id = args.run_id +run_id = args.run_id if (args.run_id is not None) else 'amlcompute' if (run_id == 'amlcompute'): run_id = run.parent.id model_name = args.model_name @@ -129,11 +127,8 @@ run.parent.cancel() else: print( - "Current Production model {}: {}, ".format( - metric_eval, production_model_mse) + - "New trained model {}: {}".format( - metric_eval, new_model_mse - ) + f"Current Production model {metric_eval}: {production_model_mse}, " + + f"New trained model {metric_eval}: {new_model_mse}" ) if (new_model_mse < production_model_mse): diff --git a/diabetes_regression/register/register_model.py b/diabetes_regression/register/register_model.py index bca55a83..fae336f9 100644 --- a/diabetes_regression/register/register_model.py +++ b/diabetes_regression/register/register_model.py @@ -107,7 +107,7 @@ def main(): print(f"Could not find {tag} metric on parent run.") # load the model - print("Loading model from " + model_path) + print(f"Loading model from {model_path}") model_file = os.path.join(model_path, model_name) model = joblib.load(model_file) parent_tags = run.parent.get_tags() @@ -183,7 +183,7 @@ def register_aml_model( tagsValue = {"area": "diabetes_regression", "run_id": run_id, "experiment_name": exp.name} - tagsValue.update(model_tags) + tagsValue |= model_tags if (build_id != 'none'): model_already_registered(model_name, exp, run_id) tagsValue["BuildId"] = build_id @@ -199,10 +199,7 @@ def register_aml_model( Dataset.get_by_id(exp.workspace, dataset_id))]) os.chdir("..") print( - "Model registered: {} \nModel Description: {} " - "\nModel Version: {}".format( - model.name, model.description, model.version - ) + f"Model registered: {model.name} \nModel Description: {model.description} \nModel Version: {model.version}" ) except Exception: traceback.print_exc(limit=None, file=None, chain=True) diff --git a/diabetes_regression/scoring/parallel_batchscore.py b/diabetes_regression/scoring/parallel_batchscore.py index cd42c79c..f0b9cf35 100644 --- a/diabetes_regression/scoring/parallel_batchscore.py +++ b/diabetes_regression/scoring/parallel_batchscore.py @@ -53,7 +53,7 @@ def parse_args() -> List[str]: if itm == "--model_name" ] - if len(model_name_param) == 0: + if not model_name_param: raise ValueError( "Model name is required but no model name parameter was passed to the script" # NOQA: E501 ) @@ -67,8 +67,8 @@ def parse_args() -> List[str]: ] model_version = ( None - if len(model_version_param) < 1 - or len(model_version_param[0][1].strip()) == 0 # NOQA: E501 + if not model_version_param + or len(model_version_param[0][1].strip()) == 0 else model_version_param[0][1] ) @@ -79,8 +79,8 @@ def parse_args() -> List[str]: ] model_tag_name = ( None - if len(model_tag_name_param) < 1 - or len(model_tag_name_param[0][1].strip()) == 0 # NOQA: E501 + if not model_tag_name_param + or len(model_tag_name_param[0][1].strip()) == 0 else model_tag_name_param[0][1] ) @@ -91,7 +91,7 @@ def parse_args() -> List[str]: ] model_tag_value = ( None - if len(model_tag_value_param) < 1 + if not model_tag_value_param or len(model_tag_name_param[0][1].strip()) == 0 else model_tag_value_param[0][1] ) @@ -120,9 +120,9 @@ def init(): modelpath = Model.get_model_path( model_name=amlmodel.name, version=amlmodel.version) model = joblib.load(modelpath) - print("Loaded model {}".format(model_filter[0])) + print(f"Loaded model {model_filter[0]}") except Exception as ex: - print("Error: {}".format(ex)) + print(f"Error: {ex}") def run(mini_batch: pd.DataFrame) -> pd.DataFrame: diff --git a/diabetes_regression/scoring/parallel_batchscore_copyoutput.py b/diabetes_regression/scoring/parallel_batchscore_copyoutput.py index 1bcde4b6..e99af345 100644 --- a/diabetes_regression/scoring/parallel_batchscore_copyoutput.py +++ b/diabetes_regression/scoring/parallel_batchscore_copyoutput.py @@ -42,11 +42,9 @@ def parse_args(): def copy_output(args): - print("Output : {}".format(args.output_path)) + print(f"Output : {args.output_path}") - accounturl = "https://{}.blob.core.windows.net".format( - args.scoring_datastore - ) # NOQA E501 + accounturl = f"https://{args.scoring_datastore}.blob.core.windows.net" containerclient = ContainerClient( accounturl, args.score_container, args.scoring_datastore_key @@ -61,9 +59,7 @@ def copy_output(args): .replace(".", "_") ) # noqa E501 destfilenameparts = args.scoring_output_filename.split(".") - destblobname = "{}/{}_{}.{}".format( - destfolder, destfilenameparts[0], filetime, destfilenameparts[1] - ) + destblobname = f"{destfolder}/{destfilenameparts[0]}_{filetime}.{destfilenameparts[1]}" destblobclient = containerclient.get_blob_client(destblobname) with open( diff --git a/diabetes_regression/training/train.py b/diabetes_regression/training/train.py index 22258042..ec97a6ea 100644 --- a/diabetes_regression/training/train.py +++ b/diabetes_regression/training/train.py @@ -38,9 +38,10 @@ def split_data(df): X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=0) - data = {"train": {"X": X_train, "y": y_train}, - "test": {"X": X_test, "y": y_test}} - return data + return { + "train": {"X": X_train, "y": y_train}, + "test": {"X": X_test, "y": y_test}, + } # Train the model, return the model @@ -54,8 +55,7 @@ def train_model(data, ridge_args): def get_model_metrics(model, data): preds = model.predict(data["test"]["X"]) mse = mean_squared_error(preds, data["test"]["y"]) - metrics = {"mse": mse} - return metrics + return {"mse": mse} def main(): diff --git a/diabetes_regression/training/train_aml.py b/diabetes_regression/training/train_aml.py index 9303198b..c9c66669 100644 --- a/diabetes_regression/training/train_aml.py +++ b/diabetes_regression/training/train_aml.py @@ -93,12 +93,12 @@ def main(): args = parser.parse_args() - print("Argument [model_name]: %s" % args.model_name) - print("Argument [step_output]: %s" % args.step_output) - print("Argument [dataset_version]: %s" % args.dataset_version) - print("Argument [data_file_path]: %s" % args.data_file_path) - print("Argument [caller_run_id]: %s" % args.caller_run_id) - print("Argument [dataset_name]: %s" % args.dataset_name) + print(f"Argument [model_name]: {args.model_name}") + print(f"Argument [step_output]: {args.step_output}") + print(f"Argument [dataset_version]: {args.dataset_version}") + print(f"Argument [data_file_path]: {args.data_file_path}") + print(f"Argument [caller_run_id]: {args.caller_run_id}") + print(f"Argument [dataset_name]: {args.dataset_name}") model_name = args.model_name step_output_path = args.step_output diff --git a/ml_service/pipelines/diabetes_regression_build_parallel_batchscore_pipeline.py b/ml_service/pipelines/diabetes_regression_build_parallel_batchscore_pipeline.py index 5a0f0125..059cd586 100644 --- a/ml_service/pipelines/diabetes_regression_build_parallel_batchscore_pipeline.py +++ b/ml_service/pipelines/diabetes_regression_build_parallel_batchscore_pipeline.py @@ -71,7 +71,6 @@ def get_or_create_datastore( datastore = ws.datastores[datastorename] - # the datastore is not registered but we have all details to register it elif ( env.scoring_datastore_access_key is not None and containername is not None # NOQA: E501 @@ -86,9 +85,7 @@ def get_or_create_datastore( ) else: raise ValueError( - "No existing datastore named {} nor was enough information supplied to create one.".format( # NOQA: E501 - datastorename - ) + f"No existing datastore named {datastorename} nor was enough information supplied to create one." ) return datastore @@ -161,14 +158,12 @@ def get_fallback_input_dataset(ws: Workspace, env: Env) -> Dataset: overwrite=False, ) - scoringinputds = ( + return ( Dataset.Tabular.from_delimited_files(scoreinputdataref) .register(ws, env.scoring_dataset_name, create_new_version=True) .as_named_input(env.scoring_dataset_name) ) - return scoringinputds - def get_output_location( ws: Workspace, env: Env, outputdatastore: Datastore = None @@ -187,21 +182,18 @@ def get_output_location( :returns: PipelineData wrapping the output datastore """ - if outputdatastore is None: - output_loc = PipelineData( + return ( + PipelineData( name="defaultoutput", datastore=ws.get_default_datastore() ) - else: - output_loc = PipelineData( - name=outputdatastore.name, datastore=outputdatastore - ) # NOQA: E501 - - return output_loc + if outputdatastore is None + else PipelineData(name=outputdatastore.name, datastore=outputdatastore) + ) def get_inputds_outputloc( ws: Workspace, env: Env -) -> Tuple[Dataset, PipelineData]: # NOQA: E501 +) -> Tuple[Dataset, PipelineData]: # NOQA: E501 """ Prepare the input and output for the scoring step. Input is a tabular dataset wrapped around the scoring data. Output is PipelineData @@ -219,13 +211,10 @@ def get_inputds_outputloc( output_loc = get_output_location(ws, env) else: inputdatastore = get_or_create_datastore( - "{}_in".format(env.scoring_datastore_storage_name), ws, env + f"{env.scoring_datastore_storage_name}_in", ws, env ) outputdatastore = get_or_create_datastore( - "{}_out".format(env.scoring_datastore_storage_name), - ws, - env, - input=False, # NOQA: E501 + f"{env.scoring_datastore_storage_name}_out", ws, env, input=False ) scoringinputds = get_input_dataset(ws, inputdatastore, env) output_loc = get_output_location(ws, env, outputdatastore) @@ -415,9 +404,7 @@ def build_batchscore_pipeline(): name=env.scoring_pipeline_name, description="Diabetes Batch Scoring Pipeline", ) - pipeline_id_string = "##vso[task.setvariable variable=pipeline_id;isOutput=true]{}".format( # NOQA: E501 - published_pipeline.id - ) + pipeline_id_string = f"##vso[task.setvariable variable=pipeline_id;isOutput=true]{published_pipeline.id}" print(pipeline_id_string) except Exception as e: print(e) diff --git a/ml_service/pipelines/diabetes_regression_build_train_pipeline.py b/ml_service/pipelines/diabetes_regression_build_train_pipeline.py index 03937186..91da7908 100644 --- a/ml_service/pipelines/diabetes_regression_build_train_pipeline.py +++ b/ml_service/pipelines/diabetes_regression_build_train_pipeline.py @@ -69,9 +69,8 @@ def main(): if not os.path.exists(file_name): raise Exception( - 'Could not find CSV dataset at "%s". If you have bootstrapped your project, you will need to provide a CSV.' # NOQA: E501 - % file_name - ) # NOQA: E501 + f'Could not find CSV dataset at "{file_name}". If you have bootstrapped your project, you will need to provide a CSV.' + ) # Upload file to default datastore in workspace datatstore = Datastore.get(aml_workspace, datastore_name) diff --git a/ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r_on_dbricks.py b/ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r_on_dbricks.py index ae607b3b..35f2d5c7 100644 --- a/ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r_on_dbricks.py +++ b/ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r_on_dbricks.py @@ -43,9 +43,9 @@ def main(): train_pipeline = Pipeline(workspace=aml_workspace, steps=steps) train_pipeline.validate() published_pipeline = train_pipeline.publish( - name=e.pipeline_name + "_with_R_on_DB", + name=f"{e.pipeline_name}_with_R_on_DB", description="Model training/retraining pipeline", - version=e.build_id + version=e.build_id, ) print(f'Published pipeline: {published_pipeline.name}') print(f'for build {published_pipeline.version}') diff --git a/ml_service/pipelines/run_parallel_batchscore_pipeline.py b/ml_service/pipelines/run_parallel_batchscore_pipeline.py index c046eb9c..9474b989 100644 --- a/ml_service/pipelines/run_parallel_batchscore_pipeline.py +++ b/ml_service/pipelines/run_parallel_batchscore_pipeline.py @@ -47,9 +47,7 @@ def get_pipeline(pipeline_id, ws: Workspace, env: Env): ] # noqa E501 if scoringpipelinelist.count == 0: - raise Exception( - "No pipeline found matching name:{}".format(env.scoring_pipeline_name) # NOQA: E501 - ) + raise Exception(f"No pipeline found matching name:{env.scoring_pipeline_name}") else: # latest published scoringpipeline = scoringpipelinelist[0] @@ -58,16 +56,14 @@ def get_pipeline(pipeline_id, ws: Workspace, env: Env): def copy_output(step_id: str, env: Env): - accounturl = "https://{}.blob.core.windows.net".format( - env.scoring_datastore_storage_name + accounturl = ( + f"https://{env.scoring_datastore_storage_name}.blob.core.windows.net" ) - srcblobname = "azureml/{}/{}_out/parallel_run_step.txt".format( - step_id, env.scoring_datastore_storage_name - ) + srcblobname = f"azureml/{step_id}/{env.scoring_datastore_storage_name}_out/parallel_run_step.txt" - srcbloburl = "{}/{}/{}".format( - accounturl, env.scoring_datastore_output_container, srcblobname + srcbloburl = ( + f"{accounturl}/{env.scoring_datastore_output_container}/{srcblobname}" ) containerclient = ContainerClient( @@ -87,9 +83,7 @@ def copy_output(step_id: str, env: Env): .replace(".", "_") ) # noqa E501 destfilenameparts = env.scoring_datastore_output_filename.split(".") - destblobname = "{}/{}_{}.{}".format( - destfolder, destfilenameparts[0], filetime, destfilenameparts[1] - ) + destblobname = f"{destfolder}/{destfilenameparts[0]}_{filetime}.{destfilenameparts[1]}" destblobclient = containerclient.get_blob_client(destblobname) destblobclient.start_copy_from_url(srcbloburl) @@ -127,7 +121,7 @@ def run_batchscore_pipeline(): copy_output(list(run.get_steps())[0].id, env) except Exception as ex: - print("Error: {}".format(ex)) + print(f"Error: {ex}") if __name__ == "__main__": diff --git a/ml_service/pipelines/run_train_pipeline.py b/ml_service/pipelines/run_train_pipeline.py index b68b9a15..5231f2fa 100644 --- a/ml_service/pipelines/run_train_pipeline.py +++ b/ml_service/pipelines/run_train_pipeline.py @@ -38,10 +38,10 @@ def main(): if p.version == e.build_id: matched_pipes.append(p) - if(len(matched_pipes) > 1): + if (len(matched_pipes) > 1): published_pipeline = None raise Exception(f"Multiple active pipelines are published for build {e.build_id}.") # NOQA: E501 - elif(len(matched_pipes) == 0): + elif not matched_pipes: published_pipeline = None raise KeyError(f"Unable to find a published pipeline for this build {e.build_id}") # NOQA: E501 else: diff --git a/ml_service/util/attach_compute.py b/ml_service/util/attach_compute.py index cf8c07a6..aba49e92 100644 --- a/ml_service/util/attach_compute.py +++ b/ml_service/util/attach_compute.py @@ -12,7 +12,7 @@ def get_compute(workspace: Workspace, compute_name: str, vm_size: str, for_batch if compute_name in workspace.compute_targets: compute_target = workspace.compute_targets[compute_name] if compute_target and type(compute_target) is AmlCompute: - print("Found existing compute target " + compute_name + " so using it.") # NOQA + print(f"Found existing compute target {compute_name} so using it.") else: e = Env() compute_config = AmlCompute.provisioning_configuration( diff --git a/ml_service/util/create_scoring_image.py b/ml_service/util/create_scoring_image.py index 378cb3b4..126b7bca 100644 --- a/ml_service/util/create_scoring_image.py +++ b/ml_service/util/create_scoring_image.py @@ -50,10 +50,12 @@ if package.state != "Succeeded": raise Exception("Image creation status: {package.creation_state}") -print("Package stored at {} with build log {}".format(package.location, package.package_build_log_uri)) # NOQA: E501 +print( + f"Package stored at {package.location} with build log {package.package_build_log_uri}" +) # Save the Image Location for other AzDO jobs after script is complete if args.output_image_location_file is not None: - print("Writing image location to %s" % args.output_image_location_file) + print(f"Writing image location to {args.output_image_location_file}") with open(args.output_image_location_file, "w") as out_file: out_file.write(str(package.location)) diff --git a/ml_service/util/smoke_test_scoring_service.py b/ml_service/util/smoke_test_scoring_service.py index 0fa34b1e..ed520b01 100644 --- a/ml_service/util/smoke_test_scoring_service.py +++ b/ml_service/util/smoke_test_scoring_service.py @@ -26,12 +26,10 @@ def call_web_service(e, service_type, service_name): service = AksWebservice(aml_workspace, service_name) if service.auth_enabled: service_keys = service.get_keys() - headers['Authorization'] = 'Bearer ' + service_keys[0] + headers['Authorization'] = f'Bearer {service_keys[0]}' print("Testing service") - print(". url: %s" % service.scoring_uri) - output = call_web_app(service.scoring_uri, headers) - - return output + print(f". url: {service.scoring_uri}") + return call_web_app(service.scoring_uri, headers) def call_web_app(url, headers):