-
Notifications
You must be signed in to change notification settings - Fork 0
Sourcery refactored master branch #1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -54,10 +54,7 @@ def rename_dir(self): | |
| def delete_dir(self): | ||
| # Delete unwanted directories | ||
| dirs = ["docs", r"diabetes_regression"] | ||
| if (platform.system() == "Windows"): | ||
| cmd = 'rmdir /S /Q "{}"' | ||
| else: | ||
| cmd = 'rm -r "{}"' | ||
| cmd = 'rmdir /S /Q "{}"' if (platform.system() == "Windows") else 'rm -r "{}"' | ||
| for dir in dirs: | ||
| os.system(cmd.format(os.path.join(self._project_directory, os.path.normpath(dir)))) # NOQA: E501 | ||
|
|
||
|
|
@@ -151,5 +148,5 @@ def main(args): | |
| return 0 | ||
|
|
||
|
|
||
| if '__main__' == __name__: | ||
| if __name__ == '__main__': | ||
|
Comment on lines
-154
to
+151
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Lines
|
||
| sys.exit(main(sys.argv)) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -23,6 +23,7 @@ | |
| ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE | ||
| POSSIBILITY OF SUCH DAMAGE. | ||
| """ | ||
|
|
||
|
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Lines
|
||
| from azureml.core import Run | ||
| import argparse | ||
| import traceback | ||
|
|
@@ -70,8 +71,6 @@ | |
| # if you would like to use Offline mode | ||
| exp = run.experiment | ||
| ws = run.experiment.workspace | ||
| run_id = 'amlcompute' | ||
|
|
||
| parser = argparse.ArgumentParser("evaluate") | ||
|
|
||
| parser.add_argument( | ||
|
|
@@ -94,8 +93,7 @@ | |
| ) | ||
|
|
||
| args = parser.parse_args() | ||
| if (args.run_id is not None): | ||
| run_id = args.run_id | ||
| run_id = args.run_id if (args.run_id is not None) else 'amlcompute' | ||
| if (run_id == 'amlcompute'): | ||
| run_id = run.parent.id | ||
| model_name = args.model_name | ||
|
|
@@ -129,11 +127,8 @@ | |
| run.parent.cancel() | ||
| else: | ||
| print( | ||
| "Current Production model {}: {}, ".format( | ||
| metric_eval, production_model_mse) + | ||
| "New trained model {}: {}".format( | ||
| metric_eval, new_model_mse | ||
| ) | ||
| f"Current Production model {metric_eval}: {production_model_mse}, " | ||
| + f"New trained model {metric_eval}: {new_model_mse}" | ||
| ) | ||
|
|
||
| if (new_model_mse < production_model_mse): | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -107,7 +107,7 @@ def main(): | |
| print(f"Could not find {tag} metric on parent run.") | ||
|
|
||
| # load the model | ||
| print("Loading model from " + model_path) | ||
| print(f"Loading model from {model_path}") | ||
|
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
| model_file = os.path.join(model_path, model_name) | ||
| model = joblib.load(model_file) | ||
| parent_tags = run.parent.get_tags() | ||
|
|
@@ -183,7 +183,7 @@ def register_aml_model( | |
| tagsValue = {"area": "diabetes_regression", | ||
| "run_id": run_id, | ||
| "experiment_name": exp.name} | ||
| tagsValue.update(model_tags) | ||
| tagsValue |= model_tags | ||
|
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
| if (build_id != 'none'): | ||
| model_already_registered(model_name, exp, run_id) | ||
| tagsValue["BuildId"] = build_id | ||
|
|
@@ -199,10 +199,7 @@ def register_aml_model( | |
| Dataset.get_by_id(exp.workspace, dataset_id))]) | ||
| os.chdir("..") | ||
| print( | ||
| "Model registered: {} \nModel Description: {} " | ||
| "\nModel Version: {}".format( | ||
| model.name, model.description, model.version | ||
| ) | ||
| f"Model registered: {model.name} \nModel Description: {model.description} \nModel Version: {model.version}" | ||
| ) | ||
| except Exception: | ||
| traceback.print_exc(limit=None, file=None, chain=True) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -53,7 +53,7 @@ def parse_args() -> List[str]: | |
| if itm == "--model_name" | ||
| ] | ||
|
|
||
| if len(model_name_param) == 0: | ||
| if not model_name_param: | ||
|
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
| raise ValueError( | ||
| "Model name is required but no model name parameter was passed to the script" # NOQA: E501 | ||
| ) | ||
|
|
@@ -67,8 +67,8 @@ def parse_args() -> List[str]: | |
| ] | ||
| model_version = ( | ||
| None | ||
| if len(model_version_param) < 1 | ||
| or len(model_version_param[0][1].strip()) == 0 # NOQA: E501 | ||
| if not model_version_param | ||
| or len(model_version_param[0][1].strip()) == 0 | ||
| else model_version_param[0][1] | ||
| ) | ||
|
|
||
|
|
@@ -79,8 +79,8 @@ def parse_args() -> List[str]: | |
| ] | ||
| model_tag_name = ( | ||
| None | ||
| if len(model_tag_name_param) < 1 | ||
| or len(model_tag_name_param[0][1].strip()) == 0 # NOQA: E501 | ||
| if not model_tag_name_param | ||
| or len(model_tag_name_param[0][1].strip()) == 0 | ||
| else model_tag_name_param[0][1] | ||
| ) | ||
|
|
||
|
|
@@ -91,7 +91,7 @@ def parse_args() -> List[str]: | |
| ] | ||
| model_tag_value = ( | ||
| None | ||
| if len(model_tag_value_param) < 1 | ||
| if not model_tag_value_param | ||
| or len(model_tag_name_param[0][1].strip()) == 0 | ||
| else model_tag_value_param[0][1] | ||
| ) | ||
|
|
@@ -120,9 +120,9 @@ def init(): | |
| modelpath = Model.get_model_path( | ||
| model_name=amlmodel.name, version=amlmodel.version) | ||
| model = joblib.load(modelpath) | ||
| print("Loaded model {}".format(model_filter[0])) | ||
| print(f"Loaded model {model_filter[0]}") | ||
| except Exception as ex: | ||
| print("Error: {}".format(ex)) | ||
| print(f"Error: {ex}") | ||
|
Comment on lines
-123
to
+125
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
|
||
|
|
||
| def run(mini_batch: pd.DataFrame) -> pd.DataFrame: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -42,11 +42,9 @@ def parse_args(): | |
|
|
||
|
|
||
| def copy_output(args): | ||
| print("Output : {}".format(args.output_path)) | ||
| print(f"Output : {args.output_path}") | ||
|
|
||
| accounturl = "https://{}.blob.core.windows.net".format( | ||
| args.scoring_datastore | ||
| ) # NOQA E501 | ||
| accounturl = f"https://{args.scoring_datastore}.blob.core.windows.net" | ||
|
Comment on lines
-45
to
+47
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
This removes the following comments ( why? ): |
||
|
|
||
| containerclient = ContainerClient( | ||
| accounturl, args.score_container, args.scoring_datastore_key | ||
|
|
@@ -61,9 +59,7 @@ def copy_output(args): | |
| .replace(".", "_") | ||
| ) # noqa E501 | ||
| destfilenameparts = args.scoring_output_filename.split(".") | ||
| destblobname = "{}/{}_{}.{}".format( | ||
| destfolder, destfilenameparts[0], filetime, destfilenameparts[1] | ||
| ) | ||
| destblobname = f"{destfolder}/{destfilenameparts[0]}_{filetime}.{destfilenameparts[1]}" | ||
|
|
||
| destblobclient = containerclient.get_blob_client(destblobname) | ||
| with open( | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -38,9 +38,10 @@ def split_data(df): | |
|
|
||
| X_train, X_test, y_train, y_test = train_test_split( | ||
| X, y, test_size=0.2, random_state=0) | ||
| data = {"train": {"X": X_train, "y": y_train}, | ||
| "test": {"X": X_test, "y": y_test}} | ||
| return data | ||
| return { | ||
| "train": {"X": X_train, "y": y_train}, | ||
| "test": {"X": X_test, "y": y_test}, | ||
| } | ||
|
Comment on lines
-41
to
+44
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
|
||
|
|
||
| # Train the model, return the model | ||
|
|
@@ -54,8 +55,7 @@ def train_model(data, ridge_args): | |
| def get_model_metrics(model, data): | ||
| preds = model.predict(data["test"]["X"]) | ||
| mse = mean_squared_error(preds, data["test"]["y"]) | ||
| metrics = {"mse": mse} | ||
| return metrics | ||
| return {"mse": mse} | ||
|
Comment on lines
-57
to
+58
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
|
||
|
|
||
| def main(): | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -93,12 +93,12 @@ def main(): | |
|
|
||
| args = parser.parse_args() | ||
|
|
||
| print("Argument [model_name]: %s" % args.model_name) | ||
| print("Argument [step_output]: %s" % args.step_output) | ||
| print("Argument [dataset_version]: %s" % args.dataset_version) | ||
| print("Argument [data_file_path]: %s" % args.data_file_path) | ||
| print("Argument [caller_run_id]: %s" % args.caller_run_id) | ||
| print("Argument [dataset_name]: %s" % args.dataset_name) | ||
| print(f"Argument [model_name]: {args.model_name}") | ||
| print(f"Argument [step_output]: {args.step_output}") | ||
| print(f"Argument [dataset_version]: {args.dataset_version}") | ||
| print(f"Argument [data_file_path]: {args.data_file_path}") | ||
| print(f"Argument [caller_run_id]: {args.caller_run_id}") | ||
| print(f"Argument [dataset_name]: {args.dataset_name}") | ||
|
Comment on lines
-96
to
+101
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
|
||
| model_name = args.model_name | ||
| step_output_path = args.step_output | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -71,7 +71,6 @@ def get_or_create_datastore( | |
|
|
||
| datastore = ws.datastores[datastorename] | ||
|
|
||
| # the datastore is not registered but we have all details to register it | ||
|
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
This removes the following comments ( why? ): |
||
| elif ( | ||
| env.scoring_datastore_access_key is not None | ||
| and containername is not None # NOQA: E501 | ||
|
|
@@ -86,9 +85,7 @@ def get_or_create_datastore( | |
| ) | ||
| else: | ||
| raise ValueError( | ||
| "No existing datastore named {} nor was enough information supplied to create one.".format( # NOQA: E501 | ||
| datastorename | ||
| ) | ||
| f"No existing datastore named {datastorename} nor was enough information supplied to create one." | ||
| ) | ||
|
|
||
| return datastore | ||
|
|
@@ -161,14 +158,12 @@ def get_fallback_input_dataset(ws: Workspace, env: Env) -> Dataset: | |
| overwrite=False, | ||
| ) | ||
|
|
||
| scoringinputds = ( | ||
| return ( | ||
| Dataset.Tabular.from_delimited_files(scoreinputdataref) | ||
| .register(ws, env.scoring_dataset_name, create_new_version=True) | ||
| .as_named_input(env.scoring_dataset_name) | ||
| ) | ||
|
|
||
| return scoringinputds | ||
|
Comment on lines
-164
to
-170
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
|
||
|
|
||
| def get_output_location( | ||
| ws: Workspace, env: Env, outputdatastore: Datastore = None | ||
|
|
@@ -187,21 +182,18 @@ def get_output_location( | |
| :returns: PipelineData wrapping the output datastore | ||
| """ | ||
|
|
||
| if outputdatastore is None: | ||
| output_loc = PipelineData( | ||
| return ( | ||
| PipelineData( | ||
| name="defaultoutput", datastore=ws.get_default_datastore() | ||
| ) | ||
| else: | ||
| output_loc = PipelineData( | ||
| name=outputdatastore.name, datastore=outputdatastore | ||
| ) # NOQA: E501 | ||
|
|
||
| return output_loc | ||
| if outputdatastore is None | ||
| else PipelineData(name=outputdatastore.name, datastore=outputdatastore) | ||
| ) | ||
|
Comment on lines
-190
to
+191
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
This removes the following comments ( why? ): |
||
|
|
||
|
|
||
| def get_inputds_outputloc( | ||
| ws: Workspace, env: Env | ||
| ) -> Tuple[Dataset, PipelineData]: # NOQA: E501 | ||
| ) -> Tuple[Dataset, PipelineData]: # NOQA: E501 | ||
|
Comment on lines
-204
to
+196
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
| """ | ||
| Prepare the input and output for the scoring step. Input is a tabular | ||
| dataset wrapped around the scoring data. Output is PipelineData | ||
|
|
@@ -219,13 +211,10 @@ def get_inputds_outputloc( | |
| output_loc = get_output_location(ws, env) | ||
| else: | ||
| inputdatastore = get_or_create_datastore( | ||
| "{}_in".format(env.scoring_datastore_storage_name), ws, env | ||
| f"{env.scoring_datastore_storage_name}_in", ws, env | ||
| ) | ||
| outputdatastore = get_or_create_datastore( | ||
| "{}_out".format(env.scoring_datastore_storage_name), | ||
| ws, | ||
| env, | ||
| input=False, # NOQA: E501 | ||
| f"{env.scoring_datastore_storage_name}_out", ws, env, input=False | ||
| ) | ||
| scoringinputds = get_input_dataset(ws, inputdatastore, env) | ||
| output_loc = get_output_location(ws, env, outputdatastore) | ||
|
|
@@ -415,9 +404,7 @@ def build_batchscore_pipeline(): | |
| name=env.scoring_pipeline_name, | ||
| description="Diabetes Batch Scoring Pipeline", | ||
| ) | ||
| pipeline_id_string = "##vso[task.setvariable variable=pipeline_id;isOutput=true]{}".format( # NOQA: E501 | ||
| published_pipeline.id | ||
| ) | ||
| pipeline_id_string = f"##vso[task.setvariable variable=pipeline_id;isOutput=true]{published_pipeline.id}" | ||
|
Comment on lines
-418
to
+407
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
| print(pipeline_id_string) | ||
| except Exception as e: | ||
| print(e) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -69,9 +69,8 @@ def main(): | |
|
|
||
| if not os.path.exists(file_name): | ||
| raise Exception( | ||
| 'Could not find CSV dataset at "%s". If you have bootstrapped your project, you will need to provide a CSV.' # NOQA: E501 | ||
| % file_name | ||
| ) # NOQA: E501 | ||
| f'Could not find CSV dataset at "{file_name}". If you have bootstrapped your project, you will need to provide a CSV.' | ||
| ) | ||
|
Comment on lines
-72
to
+73
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
|
||
| # Upload file to default datastore in workspace | ||
| datatstore = Datastore.get(aml_workspace, datastore_name) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -43,9 +43,9 @@ def main(): | |
| train_pipeline = Pipeline(workspace=aml_workspace, steps=steps) | ||
| train_pipeline.validate() | ||
| published_pipeline = train_pipeline.publish( | ||
| name=e.pipeline_name + "_with_R_on_DB", | ||
| name=f"{e.pipeline_name}_with_R_on_DB", | ||
| description="Model training/retraining pipeline", | ||
| version=e.build_id | ||
| version=e.build_id, | ||
|
Comment on lines
-46
to
+48
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
| ) | ||
| print(f'Published pipeline: {published_pipeline.name}') | ||
| print(f'for build {published_pipeline.version}') | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -47,9 +47,7 @@ def get_pipeline(pipeline_id, ws: Workspace, env: Env): | |
| ] # noqa E501 | ||
|
|
||
| if scoringpipelinelist.count == 0: | ||
| raise Exception( | ||
| "No pipeline found matching name:{}".format(env.scoring_pipeline_name) # NOQA: E501 | ||
| ) | ||
| raise Exception(f"No pipeline found matching name:{env.scoring_pipeline_name}") | ||
|
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
This removes the following comments ( why? ): |
||
| else: | ||
| # latest published | ||
| scoringpipeline = scoringpipelinelist[0] | ||
|
|
@@ -58,16 +56,14 @@ def get_pipeline(pipeline_id, ws: Workspace, env: Env): | |
|
|
||
|
|
||
| def copy_output(step_id: str, env: Env): | ||
| accounturl = "https://{}.blob.core.windows.net".format( | ||
| env.scoring_datastore_storage_name | ||
| accounturl = ( | ||
| f"https://{env.scoring_datastore_storage_name}.blob.core.windows.net" | ||
| ) | ||
|
|
||
| srcblobname = "azureml/{}/{}_out/parallel_run_step.txt".format( | ||
| step_id, env.scoring_datastore_storage_name | ||
| ) | ||
| srcblobname = f"azureml/{step_id}/{env.scoring_datastore_storage_name}_out/parallel_run_step.txt" | ||
|
|
||
| srcbloburl = "{}/{}/{}".format( | ||
| accounturl, env.scoring_datastore_output_container, srcblobname | ||
| srcbloburl = ( | ||
| f"{accounturl}/{env.scoring_datastore_output_container}/{srcblobname}" | ||
|
Comment on lines
-61
to
+66
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
| ) | ||
|
|
||
| containerclient = ContainerClient( | ||
|
|
@@ -87,9 +83,7 @@ def copy_output(step_id: str, env: Env): | |
| .replace(".", "_") | ||
| ) # noqa E501 | ||
| destfilenameparts = env.scoring_datastore_output_filename.split(".") | ||
| destblobname = "{}/{}_{}.{}".format( | ||
| destfolder, destfilenameparts[0], filetime, destfilenameparts[1] | ||
| ) | ||
| destblobname = f"{destfolder}/{destfilenameparts[0]}_{filetime}.{destfilenameparts[1]}" | ||
|
|
||
| destblobclient = containerclient.get_blob_client(destblobname) | ||
| destblobclient.start_copy_from_url(srcbloburl) | ||
|
|
@@ -127,7 +121,7 @@ def run_batchscore_pipeline(): | |
| copy_output(list(run.get_steps())[0].id, env) | ||
|
|
||
| except Exception as ex: | ||
| print("Error: {}".format(ex)) | ||
| print(f"Error: {ex}") | ||
|
Comment on lines
-130
to
+124
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -38,10 +38,10 @@ def main(): | |
| if p.version == e.build_id: | ||
| matched_pipes.append(p) | ||
|
|
||
| if(len(matched_pipes) > 1): | ||
| if (len(matched_pipes) > 1): | ||
| published_pipeline = None | ||
| raise Exception(f"Multiple active pipelines are published for build {e.build_id}.") # NOQA: E501 | ||
| elif(len(matched_pipes) == 0): | ||
| elif not matched_pipes: | ||
|
Comment on lines
-41
to
+44
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
| published_pipeline = None | ||
| raise KeyError(f"Unable to find a published pipeline for this build {e.build_id}") # NOQA: E501 | ||
| else: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -12,7 +12,7 @@ def get_compute(workspace: Workspace, compute_name: str, vm_size: str, for_batch | |
| if compute_name in workspace.compute_targets: | ||
| compute_target = workspace.compute_targets[compute_name] | ||
| if compute_target and type(compute_target) is AmlCompute: | ||
| print("Found existing compute target " + compute_name + " so using it.") # NOQA | ||
| print(f"Found existing compute target {compute_name} so using it.") | ||
|
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
This removes the following comments ( why? ): |
||
| else: | ||
| e = Env() | ||
| compute_config = AmlCompute.provisioning_configuration( | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Function
Helper.delete_dirrefactored with the following changes:assign-if-exp)