## 1. Install packages

In [2]:
#verify packages
!pip install azureml-core #Contains core packages, modules, and classes for Azure Machine Learning.
!pip install azureml-interpret #Contains functionality for working with model interpretability in Azure Machine Learning.



## 2. Connect to Azure ML workspace to add explainability to a model training experiment

In [3]:
import azureml.core
from azureml.core import Workspace
from azureml.core.authentication import InteractiveLoginAuthentication

# Load the workspace from the saved config file
ws = Workspace.from_config()
print('Ready to use Azure ML {} to work with {}'.format(azureml.core.VERSION, ws.name))

Ready to use Azure ML 1.18.0 to work with demoML


## 3. Create an experiment and a local folder contianing files to run

In [4]:
import os, shutil
from azureml.core import Experiment

# Create a folder for the experiment files
experiment_folder = 'hiring_train_and_explain'
os.makedirs(experiment_folder, exist_ok=True)

# Copy the data file into the experiment folder
shutil.copy('studenthiring.csv', os.path.join(experiment_folder, "studenthiring.csv"))

'hiring_train_and_explain/studenthiring.csv'

## 4. Load data, train and explain a model using an experiment

In [5]:
%%writefile $experiment_folder/hiring_training.py
# Import libraries
import pandas as pd
import numpy as np
import joblib
import os
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score

# Import Azure ML run library
from azureml.core.run import Run

# Import libraries for model explanation
from azureml.interpret import ExplanationClient
from interpret.ext.blackbox import TabularExplainer

# Get the experiment run context
run = Run.get_context()

# load the dataset
print("Loading Data...")
data = pd.read_csv('studenthiring.csv')

# Separate features and labels
features = data.columns[0:4]
labels = ['not-hired', 'hired']
X, y = data[features].values, data['hired'].values

# Split data into training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=2)

# Train a logistic Regression Model
print('Training a logistic Regression Model')
model = LogisticRegression(penalty='l2', class_weight='balanced', C=0.1, solver='liblinear').fit(X_train, y_train)

# calculate accuracy
y_hat = model.predict(X_test)
acc = np.average(y_hat == y_test)
run.log('Accuracy', np.float(acc))

# calculate AUC
y_scores = model.predict_proba(X_test)
auc = roc_auc_score(y_test,y_scores[:,1])
run.log('AUC', np.float(auc))

os.makedirs('outputs', exist_ok=True)
# note file saved in the outputs folder is automatically uploaded into experiment record
joblib.dump(value=model, filename='outputs/hiring.pkl')

# Get explanation
explainer = TabularExplainer(model, X_train, features=features, classes=labels)
explanation = explainer.explain_global(X_test)

# Get an Explanation Client and upload the explanation
explain_client = ExplanationClient.from_run(run)
explain_client.upload_model_explanation(explanation, comment='Tabular Explanation Global')

# Complete the run
run.complete()

Overwriting hiring_train_and_explain/hiring_training.py


## 5. Create Python environment to run the script

In [6]:
%%writefile $experiment_folder/interpret_env.yml
name: batch_environment
dependencies:
- python=3.8
- scikit-learn
- pandas
- pip
- pip:
  - azureml-defaults
  - azureml-interpret

Overwriting hiring_train_and_explain/interpret_env.yml


## 6. Run the experiment 

In [7]:
from azureml.core import Experiment, ScriptRunConfig, Environment
from azureml.widgets import RunDetails


# Create a Python environment for the experiment
explain_env = Environment.from_conda_specification("explain_env", experiment_folder + "/interpret_env.yml")

# Create a script config
script_config = ScriptRunConfig(source_directory=experiment_folder,
                      script='hiring_training.py',
                      environment=explain_env) 

# submit the experiment
experiment_name = 'hiring-model-explain'
experiment = Experiment(workspace=ws, name=experiment_name)
run = experiment.submit(config=script_config)
RunDetails(run).show()
run.wait_for_completion()

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

{'runId': 'hiring-model-explain_1627720118_d086bcf6',
 'target': 'local',
 'status': 'Finalizing',
 'startTimeUtc': '2021-07-31T08:28:43.536162Z',
 'properties': {'_azureml.ComputeTargetType': 'local',
  'ContentSnapshotId': 'b2f79f71-5688-43ff-8e41-0db708b7fce3'},
 'inputDatasets': [],
 'outputDatasets': [],
 'runDefinition': {'script': 'hiring_training.py',
  'useAbsolutePath': False,
  'arguments': [],
  'sourceDirectoryDataStore': None,
  'framework': 'Python',
  'communicator': 'None',
  'target': 'local',
  'dataReferences': {},
  'data': {},
  'outputData': {},
  'datacaches': [],
  'jobName': None,
  'maxRunDurationSeconds': 2592000,
  'nodeCount': 1,
  'priority': None,
  'credentialPassthrough': False,
  'identity': None,
  'environment': {'name': 'explain_env',
   'version': 'Autosave_2021-07-13T13:18:42Z_95dab336',
   'python': {'interpreterPath': 'python',
    'userManagedDependencies': False,
    'condaDependencies': {'dependencies': ['python=3.8',
      'scikit-learn',
 

## 7. Get Feature Importance 

In [8]:
from azureml.interpret import ExplanationClient

# Get the feature explanations
client = ExplanationClient.from_run(run)
engineered_explanations = client.download_model_explanation()
feature_importances = engineered_explanations.get_feature_importance_dict()

# Overall feature importance
print('Feature\tImportance')
for key, value in feature_importances.items():
    print(key, '\t', value)

Feature	Importance
hackathons 	 0.2947643352996714
marks 	 0.10081319860103687
github_score 	 0.07122275477404397
volunteer 	 0.005973753902841748
