In [None]:
from azureml.core.workspace import Workspace
ws = Workspace.from_config()

In [None]:
%%writefile fairness_requirements.txt

azureml-core
scikit-learn
azureml-dataset-runtime
azureml-contrib-fairness
fairlearn==0.7.0
raiwidgets
numpy<=1.20.0
lightgbm

In [None]:
from azureml.core import Environment
from azureml.core.environment import CondaDependencies
myenv = Environment.from_pip_requirements(
    name="fairness-environment", 
    file_path="fairness_requirements.txt",
    )
myenv.register(workspace=ws)

In [None]:
%%writefile fairness_train.py

import joblib

from sklearn.datasets import load_diabetes
from sklearn.linear_model import Ridge
import sklearn

from azureml.core import Dataset
from azureml.core.run import Run
import numpy as np
import sklearn

from azureml.core import Model
from azureml.core.resource_configuration import ResourceConfiguration

from sklearn.model_selection import train_test_split


dataset_x, dataset_y = load_diabetes(as_frame=True, return_X_y = True)
sensitive = dataset_x[["age", "sex"]]
dataset_x.drop(labels=["age", "sex"], axis=1)
(X_train, X_test, y_train, y_test, A_train, A_test) = train_test_split(
    dataset_x, dataset_y, sensitive, test_size=0.3, random_state=12345
)

## Align indices
X_train = X_train.reset_index(drop=True)
X_test = X_test.reset_index(drop=True)
y_train = y_train.reset_index(drop=True)
y_test = y_test.reset_index(drop=True)
A_train = A_train.reset_index(drop=True)
A_test = A_test.reset_index(drop=True)


model = Ridge().fit(X_train, y_train)

joblib.dump(model, 'sklearn_regression_model.pkl')

print('Model trained')


## Store sample input and output datasets into Azure

np.savetxt('features.csv', X_train, delimiter=',')
np.savetxt('labels.csv', y_train, delimiter=',')
run = Run.get_context()
ws = run.experiment.workspace
datastore = ws.get_default_datastore()
datastore.upload_files(files=['./features.csv', './labels.csv'],
                       target_path='sklearn_regression/',
                       overwrite=True)

input_dataset = Dataset.Tabular.from_delimited_files(path=[(datastore, 'sklearn_regression/features.csv')])
output_dataset = Dataset.Tabular.from_delimited_files(path=[(datastore, 'sklearn_regression/labels.csv')])
## Register model

registered_model = Model.register(workspace=ws,
                       model_name='fair_model',                # Name of the registered model in your workspace.
                       model_path='./sklearn_regression_model.pkl',  # Local file to upload and register as a model.
                       model_framework=Model.Framework.SCIKITLEARN,  # Framework used to create the model.
                       model_framework_version=sklearn.__version__,  # Version of scikit-learn used to create the model.
                       sample_input_dataset=input_dataset,
                       sample_output_dataset=output_dataset,
                       resource_configuration=ResourceConfiguration(cpu=1, memory_in_gb=0.5),
                       description='Ridge regression model to predict diabetes progression.',
                       tags={'area': 'diabetes', 'type': 'regression'})

print('Name:', registered_model.name)
print('Version:', registered_model.version)


## Check fairness
from fairlearn.metrics._group_metric_set import _create_group_metric_set
from azureml.contrib.fairness import upload_dashboard_dictionary, download_dashboard_by_upload_id

#  Create a dictionary of model(s) you want to assess for fairness 
sf = { 'Age': A_test.age, 'Sex': A_test.sex}
ys_pred = { registered_model.id: model.predict(X_test)}

dash_dict = _create_group_metric_set(y_true=y_test,
                                    predictions=ys_pred,
                                    sensitive_features=sf,
                                    prediction_type='regression')


exp = run.experiment
run = exp.start_logging()

# Upload the dashboard to Azure Machine Learning
try:
    dashboard_title = "Fairness insights of Ridge Regression Diabetes"
    # Set validate_model_ids parameter of upload_dashboard_dictionary to False if you have not registered your model(s)
    upload_id = upload_dashboard_dictionary(run,
                                            dash_dict,
                                            dashboard_name=dashboard_title)
    print("\nUploaded to id: {0}\n".format(upload_id))
finally:
    run.complete()