Skip to content

Commit

Permalink
Merge pull request #106 from Azure/fix/reinstate-mlflow
Browse files Browse the repository at this point in the history
Fix/reinstate mlflow
  • Loading branch information
maggiemhanna committed Nov 9, 2022
2 parents b4070fc + e68df26 commit d07ac0f
Show file tree
Hide file tree
Showing 9 changed files with 287 additions and 278 deletions.
7 changes: 2 additions & 5 deletions classical/aml-cli-v2/data-science/environment/train-conda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,5 @@ dependencies:
- pandas==1.2.1
- joblib==1.0.0
- matplotlib==3.3.3
- fairlearn==0.7.0
- azureml-contrib-fairness==1.38.0
- interpret-community==0.24.1
- interpret-core==0.2.7
- azureml-interpret==1.38.0
- git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-client
- git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-collector
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,5 @@ dependencies:
- pandas==1.2.1
- joblib==1.0.0
- matplotlib==3.3.3
- fairlearn==0.7.0
- azureml-contrib-fairness==1.38.0
- interpret-community==0.24.1
- interpret-core==0.2.7
- azureml-interpret==1.38.0
- git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-client
- git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-collector
194 changes: 76 additions & 118 deletions classical/aml-cli-v2/data-science/src/evaluate.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,23 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
"""
Evaluates trained ML model using test dataset.
Saves predictions, evaluation results and deploy flag.
"""

import argparse
from pathlib import Path
import os
import pickle

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

from azureml.core import Run, Model

from fairlearn.metrics._group_metric_set import _create_group_metric_set
from azureml.contrib.fairness import upload_dashboard_dictionary, download_dashboard_by_upload_id

from interpret_community import TabularExplainer
from azureml.interpret import ExplanationClient

import mlflow
import mlflow.sklearn

# current run
run = Run.get_context()
ws = run.experiment.workspace
import mlflow.pyfunc
from mlflow.tracking import MlflowClient

TARGET_COL = "cost"

Expand Down Expand Up @@ -55,56 +50,43 @@
CAT_ORD_COLS = [
]

SENSITIVE_COLS = ["vendor"] # for fairlearn dashborad


def parse_args():

'''Parse input arguments'''

parser = argparse.ArgumentParser("predict")
parser.add_argument("--model_name", type=str, help="Name of registered model")
parser.add_argument("--model_input", type=str, help="Path of input model")
parser.add_argument("--prepared_data", type=str, help="Path to transformed data")
parser.add_argument("--predictions", type=str, help="Path of predictions")
parser.add_argument("--score_report", type=str, help="Path to score report")
parser.add_argument('--deploy_flag', type=str, help='A deploy flag whether to deploy or no')
parser.add_argument("--test_data", type=str, help="Path to test dataset")
parser.add_argument("--evaluation_output", type=str, help="Path of eval results")
parser.add_argument("--runner", type=str, help="Local or Cloud Runner", default="CloudRunner")

args = parser.parse_args()

return args

def main(args):
'''Read trained model and test dataset, evaluate model and save result'''

def main():

args = parse_args()
# Load the test data
test_data = pd.read_parquet(Path(args.test_data))

lines = [
f"Model path: {args.model_input}",
f"Test data path: {args.prepared_data}",
f"Predictions path: {args.predictions}",
f"Scoring output path: {args.score_report}",
]
# Split the data into inputs and outputs
y_test = test_data[TARGET_COL]
X_test = test_data[NUMERIC_COLS + CAT_NOM_COLS + CAT_ORD_COLS]

for line in lines:
print(line)
# Load the model from input port
model = mlflow.sklearn.load_model(args.model_input)

# ---------------- Model Evaluation ---------------- #
yhat_test, score = model_evaluation(X_test, y_test, model, args.evaluation_output)

# Load the test data

print("mounted_path files: ")
arr = os.listdir(args.prepared_data)

train_data = pd.read_csv((Path(args.prepared_data) / "train.csv"))
test_data = pd.read_csv((Path(args.prepared_data) / "test.csv"))
# ----------------- Model Promotion ---------------- #
if args.runner == "CloudRunner":
predictions, deploy_flag = model_promotion(args.model_name, args.evaluation_output, X_test, y_test, yhat_test, score)

y_train = train_data[TARGET_COL]
X_train = train_data[NUMERIC_COLS + CAT_NOM_COLS + CAT_ORD_COLS]

y_test = test_data[TARGET_COL]
X_test = test_data[NUMERIC_COLS + CAT_NOM_COLS + CAT_ORD_COLS]

# Load the model from input port
model = pickle.load(open((Path(args.model_input) / "model.pkl"), "rb"))
def model_evaluation(X_test, y_test, model, evaluation_output):

# Get predictions to y_test (y_test)
yhat_test = model.predict(X_test)
Expand All @@ -113,7 +95,7 @@ def main():
output_data = X_test.copy()
output_data["real_label"] = y_test
output_data["predicted_label"] = yhat_test
output_data.to_csv((Path(args.predictions) / "predictions.csv"))
output_data.to_csv((Path(evaluation_output) / "predictions.csv"))

# Evaluate Model performance with the test set
r2 = r2_score(y_test, yhat_test)
Expand All @@ -122,15 +104,14 @@ def main():
mae = mean_absolute_error(y_test, yhat_test)

# Print score report to a text file
(Path(args.score_report) / "score.txt").write_text(
"Scored with the following model:\n{}".format(model)
(Path(evaluation_output) / "score.txt").write_text(
f"Scored with the following model:\n{format(model)}"
)
with open((Path(args.score_report) / "score.txt"), "a") as f:
f.write("Mean squared error: %.2f \n" % mse)
f.write("Root mean squared error: %.2f \n" % rmse)
f.write("Mean absolute error: %.2f \n" % mae)
f.write("Coefficient of determination: %.2f \n" % r2)

with open((Path(evaluation_output) / "score.txt"), "a") as outfile:
outfile.write("Mean squared error: {mse.2f} \n")
outfile.write("Root mean squared error: {rmse.2f} \n")
outfile.write("Mean absolute error: {mae.2f} \n")
outfile.write("Coefficient of determination: {r2.2f} \n")

mlflow.log_metric("test r2", r2)
mlflow.log_metric("test mse", mse)
Expand All @@ -146,88 +127,65 @@ def main():
plt.savefig("predictions.png")
mlflow.log_artifact("predictions.png")

# -------------------- Promotion ------------------- #
return yhat_test, r2

def model_promotion(model_name, evaluation_output, X_test, y_test, yhat_test, score):

scores = {}
predictions = {}
score = r2_score(y_test, yhat_test) # current model
for model_run in Model.list(ws):
if model_run.name == args.model_name:
model_path = Model.download(model_run, exist_ok=True)
mdl = pickle.load(open((Path(model_path) / "model.pkl"), "rb"))
predictions[model_run.id] = mdl.predict(X_test)
scores[model_run.id] = r2_score(y_test, predictions[model_run.id])

print(scores)

client = MlflowClient()

for model_run in client.search_model_versions(f"name='{model_name}'"):
model_version = model_run.version
mdl = mlflow.pyfunc.load_model(
model_uri=f"models:/{model_name}/{model_version}")
predictions[f"{model_name}:{model_version}"] = mdl.predict(X_test)
scores[f"{model_name}:{model_version}"] = r2_score(
y_test, predictions[f"{model_name}:{model_version}"])

if scores:
if score >= max(list(scores.values())):
deploy_flag = 1
else:
deploy_flag = 0
else:
deploy_flag = 1
print("Deploy flag: ",deploy_flag)
print(f"Deploy flag: {deploy_flag}")

with open((Path(evaluation_output) / "deploy_flag"), 'w') as outfile:
outfile.write(f"{int(deploy_flag)}")

with open((Path(args.deploy_flag) / "deploy_flag"), 'w') as f:
f.write('%d' % int(deploy_flag))

# add current model score and predictions
scores["current model"] = score
perf_comparison_plot = pd.DataFrame(scores, index=["r2 score"]).plot(kind='bar', figsize=(15, 10))
predictions["currrent model"] = yhat_test

perf_comparison_plot = pd.DataFrame(
scores, index=["r2 score"]).plot(kind='bar', figsize=(15, 10))
perf_comparison_plot.figure.savefig("perf_comparison.png")
perf_comparison_plot.figure.savefig(Path(args.score_report) / "perf_comparison.png")
perf_comparison_plot.figure.savefig(Path(evaluation_output) / "perf_comparison.png")

mlflow.log_metric("deploy flag", bool(deploy_flag))
mlflow.log_artifact("perf_comparison.png")


# -------------------- FAIRNESS ------------------- #
# Calculate Fairness Metrics over Sensitive Features
# Create a dictionary of model(s) you want to assess for fairness

sf = { col: X_test[[col]] for col in SENSITIVE_COLS }
predictions["currrent model"] = [x for x in model.predict(X_test)]

dash_dict_all = _create_group_metric_set(y_true=y_test,
predictions=predictions,
sensitive_features=sf,
prediction_type='regression',
)

# Upload the dashboard to Azure Machine Learning
dashboard_title = "Fairness insights Comparison of Models"
return predictions, deploy_flag

# Set validate_model_ids parameter of upload_dashboard_dictionary to False
# if you have not registered your model(s)
upload_id = upload_dashboard_dictionary(run,
dash_dict_all,
dashboard_name=dashboard_title,
validate_model_ids=False)
print("\nUploaded to id: {0}\n".format(upload_id))
if __name__ == "__main__":


# -------------------- Explainability ------------------- #
tabular_explainer = TabularExplainer(model,
initialization_examples=X_train,
features=X_train.columns)

# save explainer
#joblib.dump(tabular_explainer, os.path.join(tabular_explainer, "explainer"))
mlflow.start_run()

# find global explanations for feature importance
# you can use the training data or the test data here,
# but test data would allow you to use Explanation Exploration
global_explanation = tabular_explainer.explain_global(X_test)
args = parse_args()

# sorted feature importance values and feature names
sorted_global_importance_values = global_explanation.get_ranked_global_values()
sorted_global_importance_names = global_explanation.get_ranked_global_names()
lines = [
f"Model name: {args.model_name}",
f"Model path: {args.model_input}",
f"Test data path: {args.test_data}",
f"Evaluation output path: {args.evaluation_output}",
]

print("Explainability feature importance:")
# alternatively, you can print out a dictionary that holds the top K feature names and values
global_explanation.get_feature_importance_dict()
for line in lines:
print(line)

client = ExplanationClient.from_run(run)
client.upload_model_explanation(global_explanation, comment='global explanation: all features')

main(args)

if __name__ == "__main__":
main()
mlflow.end_run()
Loading

0 comments on commit d07ac0f

Please sign in to comment.