Merge pull request #106 from Azure/fix/reinstate-mlflow

Fix/reinstate mlflow
Azure · Nov 9, 2022 · d07ac0f · d07ac0f
2 parents b4070fc + e68df26
commit d07ac0f
Show file tree

Hide file tree

Showing 9 changed files with 287 additions and 278 deletions.
diff --git a/classical/aml-cli-v2/data-science/environment/train-conda.yml b/classical/aml-cli-v2/data-science/environment/train-conda.yml
@@ -12,8 +12,5 @@ dependencies:
       - pandas==1.2.1
       - joblib==1.0.0
       - matplotlib==3.3.3
-      - fairlearn==0.7.0
-      - azureml-contrib-fairness==1.38.0
-      - interpret-community==0.24.1
-      - interpret-core==0.2.7
-      - azureml-interpret==1.38.0
+      - git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-client
+      - git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-collector
diff --git a/classical/aml-cli-v2/data-science/environment/train-monitor-conda.yml b/classical/aml-cli-v2/data-science/environment/train-monitor-conda.yml
@@ -12,8 +12,5 @@ dependencies:
       - pandas==1.2.1
       - joblib==1.0.0
       - matplotlib==3.3.3
-      - fairlearn==0.7.0
-      - azureml-contrib-fairness==1.38.0
-      - interpret-community==0.24.1
-      - interpret-core==0.2.7
-      - azureml-interpret==1.38.0
+      - git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-client
+      - git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-collector
diff --git a/classical/aml-cli-v2/data-science/src/evaluate.py b/classical/aml-cli-v2/data-science/src/evaluate.py
@@ -1,28 +1,23 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+"""
+Evaluates trained ML model using test dataset.
+Saves predictions, evaluation results and deploy flag.
+"""
+
 import argparse
 from pathlib import Path
-import os
-import pickle
 
 import numpy as np
 import pandas as pd
 from matplotlib import pyplot as plt
 
 from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
 
-from azureml.core import Run, Model
-
-from fairlearn.metrics._group_metric_set import _create_group_metric_set
-from azureml.contrib.fairness import upload_dashboard_dictionary, download_dashboard_by_upload_id
-
-from interpret_community import TabularExplainer
-from azureml.interpret import ExplanationClient
-
 import mlflow
 import mlflow.sklearn
-
-# current run 
-run = Run.get_context()
-ws = run.experiment.workspace
+import mlflow.pyfunc
+from mlflow.tracking import MlflowClient
 
 TARGET_COL = "cost"
 
@@ -55,56 +50,43 @@
 CAT_ORD_COLS = [
 ]
 
-SENSITIVE_COLS = ["vendor"] # for fairlearn dashborad
-
-
 def parse_args():
-
+    '''Parse input arguments'''
+
     parser = argparse.ArgumentParser("predict")
     parser.add_argument("--model_name", type=str, help="Name of registered model")
     parser.add_argument("--model_input", type=str, help="Path of input model")
-    parser.add_argument("--prepared_data", type=str, help="Path to transformed data")
-    parser.add_argument("--predictions", type=str, help="Path of predictions")
-    parser.add_argument("--score_report", type=str, help="Path to score report")
-    parser.add_argument('--deploy_flag', type=str, help='A deploy flag whether to deploy or no')
+    parser.add_argument("--test_data", type=str, help="Path to test dataset")
+    parser.add_argument("--evaluation_output", type=str, help="Path of eval results")
+    parser.add_argument("--runner", type=str, help="Local or Cloud Runner", default="CloudRunner")
 
     args = parser.parse_args()
 
     return args
 
+def main(args):
+    '''Read trained model and test dataset, evaluate model and save result'''
 
-def main():
-
-    args = parse_args()
+    # Load the test data
+    test_data = pd.read_parquet(Path(args.test_data))
 
-    lines = [
-        f"Model path: {args.model_input}",
-        f"Test data path: {args.prepared_data}",
-        f"Predictions path: {args.predictions}",
-        f"Scoring output path: {args.score_report}",
-    ]
+    # Split the data into inputs and outputs
+    y_test = test_data[TARGET_COL]
+    X_test = test_data[NUMERIC_COLS + CAT_NOM_COLS + CAT_ORD_COLS]
 
-    for line in lines:
-        print(line)
+    # Load the model from input port
+    model =  mlflow.sklearn.load_model(args.model_input) 
 
     # ---------------- Model Evaluation ---------------- #
+    yhat_test, score = model_evaluation(X_test, y_test, model, args.evaluation_output)
 
-    # Load the test data
-
-    print("mounted_path files: ")
-    arr = os.listdir(args.prepared_data)
-
-    train_data = pd.read_csv((Path(args.prepared_data) / "train.csv"))
-    test_data = pd.read_csv((Path(args.prepared_data) / "test.csv"))
+    # ----------------- Model Promotion ---------------- #
+    if args.runner == "CloudRunner":
+        predictions, deploy_flag = model_promotion(args.model_name, args.evaluation_output, X_test, y_test, yhat_test, score)
 
-    y_train = train_data[TARGET_COL]
-    X_train = train_data[NUMERIC_COLS + CAT_NOM_COLS + CAT_ORD_COLS]
 
-    y_test = test_data[TARGET_COL]
-    X_test = test_data[NUMERIC_COLS + CAT_NOM_COLS + CAT_ORD_COLS]
 
-    # Load the model from input port
-    model = pickle.load(open((Path(args.model_input) / "model.pkl"), "rb"))
+def model_evaluation(X_test, y_test, model, evaluation_output):
 
     # Get predictions to y_test (y_test)
     yhat_test = model.predict(X_test)
@@ -113,7 +95,7 @@ def main():
     output_data = X_test.copy()
     output_data["real_label"] = y_test
     output_data["predicted_label"] = yhat_test
-    output_data.to_csv((Path(args.predictions) / "predictions.csv"))
+    output_data.to_csv((Path(evaluation_output) / "predictions.csv"))
 
     # Evaluate Model performance with the test set
     r2 = r2_score(y_test, yhat_test)
@@ -122,15 +104,14 @@ def main():
     mae = mean_absolute_error(y_test, yhat_test)
 
     # Print score report to a text file
-    (Path(args.score_report) / "score.txt").write_text(
-        "Scored with the following model:\n{}".format(model)
+    (Path(evaluation_output) / "score.txt").write_text(
+        f"Scored with the following model:\n{format(model)}"
     )
-    with open((Path(args.score_report) / "score.txt"), "a") as f:
-        f.write("Mean squared error: %.2f \n" % mse)
-        f.write("Root mean squared error: %.2f \n" % rmse)
-        f.write("Mean absolute error: %.2f \n" % mae)
-        f.write("Coefficient of determination: %.2f \n" % r2)
-
+    with open((Path(evaluation_output) / "score.txt"), "a") as outfile:
+        outfile.write("Mean squared error: {mse.2f} \n")
+        outfile.write("Root mean squared error: {rmse.2f} \n")
+        outfile.write("Mean absolute error: {mae.2f} \n")
+        outfile.write("Coefficient of determination: {r2.2f} \n")
 
     mlflow.log_metric("test r2", r2)
     mlflow.log_metric("test mse", mse)
@@ -146,88 +127,65 @@ def main():
     plt.savefig("predictions.png")
     mlflow.log_artifact("predictions.png")
 
-    # -------------------- Promotion ------------------- #
+    return yhat_test, r2
+
+def model_promotion(model_name, evaluation_output, X_test, y_test, yhat_test, score):
+
     scores = {}
     predictions = {}
-    score = r2_score(y_test, yhat_test) # current model
-    for model_run in Model.list(ws):
-        if model_run.name == args.model_name:
-            model_path = Model.download(model_run, exist_ok=True)
-            mdl = pickle.load(open((Path(model_path) / "model.pkl"), "rb"))
-            predictions[model_run.id] = mdl.predict(X_test)
-            scores[model_run.id] = r2_score(y_test, predictions[model_run.id])
-
-    print(scores)
+
+    client = MlflowClient()
+
+    for model_run in client.search_model_versions(f"name='{model_name}'"):
+        model_version = model_run.version
+        mdl = mlflow.pyfunc.load_model(
+            model_uri=f"models:/{model_name}/{model_version}")
+        predictions[f"{model_name}:{model_version}"] = mdl.predict(X_test)
+        scores[f"{model_name}:{model_version}"] = r2_score(
+            y_test, predictions[f"{model_name}:{model_version}"])
+
     if scores:
         if score >= max(list(scores.values())):
             deploy_flag = 1
         else:
             deploy_flag = 0
     else:
         deploy_flag = 1
-    print("Deploy flag: ",deploy_flag)
+    print(f"Deploy flag: {deploy_flag}")
+
+    with open((Path(evaluation_output) / "deploy_flag"), 'w') as outfile:
+        outfile.write(f"{int(deploy_flag)}")
 
-    with open((Path(args.deploy_flag) / "deploy_flag"), 'w') as f:
-        f.write('%d' % int(deploy_flag))
-
+    # add current model score and predictions
     scores["current model"] = score
-    perf_comparison_plot = pd.DataFrame(scores, index=["r2 score"]).plot(kind='bar', figsize=(15, 10))
+    predictions["currrent model"] = yhat_test
+
+    perf_comparison_plot = pd.DataFrame(
+        scores, index=["r2 score"]).plot(kind='bar', figsize=(15, 10))
     perf_comparison_plot.figure.savefig("perf_comparison.png")
-    perf_comparison_plot.figure.savefig(Path(args.score_report) / "perf_comparison.png")
-    
+    perf_comparison_plot.figure.savefig(Path(evaluation_output) / "perf_comparison.png")
+
     mlflow.log_metric("deploy flag", bool(deploy_flag))
     mlflow.log_artifact("perf_comparison.png")
-
 
-    # -------------------- FAIRNESS ------------------- #
-    # Calculate Fairness Metrics over Sensitive Features
-    # Create a dictionary of model(s) you want to assess for fairness 
-
-    sf = { col: X_test[[col]] for col in SENSITIVE_COLS }
-    predictions["currrent model"] = [x for x in model.predict(X_test)]
-
-    dash_dict_all = _create_group_metric_set(y_true=y_test,
-                                             predictions=predictions,
-                                             sensitive_features=sf,
-                                             prediction_type='regression',
-                                            )
-
-    # Upload the dashboard to Azure Machine Learning
-    dashboard_title = "Fairness insights Comparison of Models"
+    return predictions, deploy_flag
 
-    # Set validate_model_ids parameter of upload_dashboard_dictionary to False 
-    # if you have not registered your model(s)
-    upload_id = upload_dashboard_dictionary(run,
-                                            dash_dict_all,
-                                            dashboard_name=dashboard_title,
-                                            validate_model_ids=False)
-    print("\nUploaded to id: {0}\n".format(upload_id))
+if __name__ == "__main__":
 
-
-    # -------------------- Explainability ------------------- #
-    tabular_explainer = TabularExplainer(model,
-                                   initialization_examples=X_train,
-                                   features=X_train.columns)
-
-    # save explainer                                 
-    #joblib.dump(tabular_explainer, os.path.join(tabular_explainer, "explainer"))
+    mlflow.start_run()
 
-    # find global explanations for feature importance
-    # you can use the training data or the test data here, 
-    # but test data would allow you to use Explanation Exploration
-    global_explanation = tabular_explainer.explain_global(X_test)
+    args = parse_args()
 
-    # sorted feature importance values and feature names
-    sorted_global_importance_values = global_explanation.get_ranked_global_values()
-    sorted_global_importance_names = global_explanation.get_ranked_global_names()
+    lines = [
+        f"Model name: {args.model_name}",
+        f"Model path: {args.model_input}",
+        f"Test data path: {args.test_data}",
+        f"Evaluation output path: {args.evaluation_output}",
+    ]
 
-    print("Explainability feature importance:")
-    # alternatively, you can print out a dictionary that holds the top K feature names and values
-    global_explanation.get_feature_importance_dict()
+    for line in lines:
+        print(line)
 
-    client = ExplanationClient.from_run(run)
-    client.upload_model_explanation(global_explanation, comment='global explanation: all features')
-
+    main(args)
 
-if __name__ == "__main__":
-    main()
+    mlflow.end_run()