In [1]:
pip show azureml-explain-model azureml-interpret

Name: azureml-explain-model
Version: 1.48.0
Summary: The package has been deprecated and might not receive future updates.
Home-page: https://docs.microsoft.com/python/api/overview/azure/ml/?view=azure-ml-py
Author: Microsoft Corp
Author-email: None
License: https://aka.ms/azureml-sdk-license
Location: /anaconda/envs/azureml_py38/lib/python3.8/site-packages
Requires: azureml-interpret
Required-by: 
---
Name: azureml-interpret
Version: 1.48.0
Summary: Machine Learning interpret package is used to interpret ML models
Home-page: https://docs.microsoft.com/python/api/overview/azure/ml/?view=azure-ml-py
Author: Microsoft Corp
Author-email: None
License: https://aka.ms/azureml-sdk-license
Location: /anaconda/envs/azureml_py38/lib/python3.8/site-packages
Requires: numpy, interpret-community, numba, azureml-core, shap
Required-by: azureml-train-automl-runtime, azureml-responsibleai, azureml-explain-model
Note: you may need to restart the kernel to use updated packages.


In [3]:
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve

# load the churn dataset
print("Loading Data...")
data = pd.read_csv('data/telco-csv.csv')

# create a copy
churn = data.copy()

# drop those colume have null values
churn = churn.drop(['loglong','logtoll','logequi','logcard','logwire','lninc'],axis = 1)

# convert categorical to num
for x in churn.columns:
    if churn[x].dtypes == 'object':
        churn[x] = pd.Categorical(data[x]).codes
churn.head()

# Separate features and labels
features = ['region', 'tenure', 'age', 'marital', 'address', 'income', 'ed',
       'employ', 'retire', 'gender', 'reside', 'tollfree', 'equip', 'callcard',
       'wireless', 'longmon', 'tollmon', 'equipmon', 'cardmon', 'wiremon',
       'longten', 'tollten', 'equipten', 'cardten', 'wireten', 'multline',
       'voice', 'pager', 'internet', 'callid', 'callwait', 'forward', 'confer',
       'ebill',
       'custcat']
labels = ['not-churn', 'churn']
X, y = churn[features].values, churn['churn'].values

# Split data into training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)

# Train a decision tree model
print('Training a decision tree model')
model = DecisionTreeClassifier().fit(X_train, y_train)

# calculate accuracy
y_hat = model.predict(X_test)
acc = np.average(y_hat == y_test)
print('Accuracy:', acc)

# calculate AUC
y_scores = model.predict_proba(X_test)
auc = roc_auc_score(y_test,y_scores[:,1])
print('AUC: ' + str(auc))

print('Model trained.')

Loading Data...
Training a decision tree model
Accuracy: 0.65
AUC: 0.5722913178665392
Model trained.


In [4]:
from interpret.ext.blackbox import TabularExplainer

# "features" and "classes" fields are optional
tab_explainer = TabularExplainer(model,
                             X_train, 
                             features=features, 
                             classes=labels)
print(tab_explainer, "ready!")

TabularExplainer ready!


In [5]:
# you can use the training data or the test data here(global)
global_tab_explanation = tab_explainer.explain_global(X_train)

# Get the top features by importance
global_tab_feature_importance = global_tab_explanation.get_feature_importance_dict()
for feature, importance in global_tab_feature_importance.items():
    print(feature,":", importance)

longten : 0.16034149715220142
equipten : 0.06448847731111285
equip : 0.05616902146624003
longmon : 0.04734919078399353
region : 0.04076351879687003
callcard : 0.039629152951262174
age : 0.03852103160131641
employ : 0.0323102231493754
tollmon : 0.026504871613724616
equipmon : 0.02551737692785027
address : 0.023619595550495576
wiremon : 0.02359603806343496
income : 0.023023066867985144
custcat : 0.02180416447424759
callid : 0.019837337576229516
cardten : 0.019329064289708618
internet : 0.01885969884418004
tollten : 0.018536839024908304
tenure : 0.017556632608931366
ed : 0.015474893437487916
confer : 0.009777950665846786
callwait : 0.009072596641836007
wireten : 0.008888407267985413
cardmon : 0.007295355308270335
pager : 0.006712934135668149
wireless : 0.006694489313310983
multline : 0.0031371111008208345
marital : 0.0023783535801189795
voice : 0.0019038673224118566
reside : 0.0016643553143474946
tollfree : 0.0
gender : 0.0
retire : 0.0
forward : 0.0
ebill : 0.0


In [6]:
# Get the observations we want to explain (the first two)(local)
X_explain = X_test[0:2]

# Get predictions
predictions = model.predict(X_explain)

# Get local explanations
local_tab_explanation = tab_explainer.explain_local(X_explain)

# Get feature names and importance for each possible label
local_tab_features = local_tab_explanation.get_ranked_local_names()
local_tab_importance = local_tab_explanation.get_ranked_local_values()

for l in range(len(local_tab_features)):
    print('Support for', labels[l])
    label = local_tab_features[l]
    for o in range(len(label)):
        print("\tObservation", o + 1)
        feature_list = label[o]
        total_support = 0
        for f in range(len(feature_list)):
            print("\t\t", feature_list[f], ':', local_tab_importance[l][o][f])
            total_support += local_tab_importance[l][o][f]
        print("\t\t ----------\n\t\t Total:", total_support, "Prediction:", labels[predictions[o]])

Support for not-churn
	Observation 1
		 longten : 0.10184878175735787
		 callid : 0.042536997055290376
		 age : 0.04075932103857574
		 equip : 0.03673256193804056
		 callcard : 0.034648238299235794
		 equipten : 0.02802349721802188
		 callwait : 0.0246454128562706
		 ed : 0.014885309164208205
		 tollten : 0.011499946975097734
		 tenure : 0.00905429430538997
		 cardten : 0.008605370856808042
		 address : 0.007333074279688879
		 region : 0.006777344108855881
		 internet : 0.006592581147760534
		 income : 0.00467056358857096
		 pager : 0.004313291285295525
		 cardmon : 0.003078764280724516
		 wireten : 0.001108619751540068
		 wiremon : 0.0010322222549905037
		 marital : 0.0006709750566893424
		 voice : 0.0004098350038112456
		 ebill : 0.0
		 tollfree : 0.0
		 gender : 0.0
		 retire : 0.0
		 forward : 0.0
		 reside : -0.0003300032802366781
		 wireless : -0.0013477531135199128
		 confer : -0.002049401640470432
		 tollmon : -0.0031414458336518885
		 employ : -0.003790257335827738
		 multline

In [7]:
import azureml.core
from azureml.core import Workspace

# Load the workspace from the saved config file
ws = Workspace.from_config()
print('Ready to use Azure ML {} to work with {}'.format(azureml.core.VERSION, ws.name))

Ready to use Azure ML 1.48.0 to work with project


In [8]:
import os, shutil
from azureml.core import Experiment

# Create a folder for the experiment files
experiment_folder = 'churn_train_and_explain'
os.makedirs(experiment_folder, exist_ok=True)

# Copy the data file into the experiment folder
shutil.copy('data/telco-csv.csv', os.path.join(experiment_folder, "telco-csv.csv"))

'churn_train_and_explain/telco-csv.csv'

In [9]:
%%writefile $experiment_folder/churn_training.py
# Import libraries
import pandas as pd
import numpy as np
import joblib
import os
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve

# Import Azure ML run library
from azureml.core.run import Run

# Import libraries for model explanation
from azureml.interpret import ExplanationClient
from interpret.ext.blackbox import TabularExplainer

# Get the experiment run context
run = Run.get_context()

# load the diabetes dataset
print("Loading Data...")
data = pd.read_csv('telco-csv.csv')

# create a copy
churn = data.copy()

# drop those colume have null values
churn = churn.drop(['loglong','logtoll','logequi','logcard','logwire','lninc'],axis = 1)

# convert categorical to num
for x in churn.columns:
    if churn[x].dtypes == 'object':
        churn[x] = pd.Categorical(data[x]).codes
churn.head()

# Separate features and labels
features = ['region', 'tenure', 'age', 'marital', 'address', 'income', 'ed',
       'employ', 'retire', 'gender', 'reside', 'tollfree', 'equip', 'callcard',
       'wireless', 'longmon', 'tollmon', 'equipmon', 'cardmon', 'wiremon',
       'longten', 'tollten', 'equipten', 'cardten', 'wireten', 'multline',
       'voice', 'pager', 'internet', 'callid', 'callwait', 'forward', 'confer',
       'ebill',
       'custcat']
labels = ['not-churn', 'churn']
X, y = churn[features].values, churn['churn'].values

# Split data into training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)

# Train a decision tree model
print('Training a decision tree model')
model = DecisionTreeClassifier().fit(X_train, y_train)

# calculate accuracy
y_hat = model.predict(X_test)
acc = np.average(y_hat == y_test)
run.log('Accuracy', np.float(acc))

# calculate AUC
y_scores = model.predict_proba(X_test)
auc = roc_auc_score(y_test,y_scores[:,1])
run.log('AUC', np.float(auc))

os.makedirs('outputs', exist_ok=True)
# note file saved in the outputs folder is automatically uploaded into experiment record
joblib.dump(value=model, filename='outputs/churn.pkl')

# Get explanation
explainer = TabularExplainer(model, X_train, features=features, classes=labels)
explanation = explainer.explain_global(X_test)

# Get an Explanation Client and upload the explanation
explain_client = ExplanationClient.from_run(run)
explain_client.upload_model_explanation(explanation, comment='Tabular Explanation')

# Complete the run
run.complete()

Writing churn_train_and_explain/churn_training.py


In [10]:
%%writefile $experiment_folder/interpret_env.yml
name: batch_environment
dependencies:
- python=3.6.2
- scikit-learn
- pandas
- pip
- pip:
  - azureml-defaults
  - azureml-interpret

Writing churn_train_and_explain/interpret_env.yml


In [11]:
from azureml.core import Experiment, ScriptRunConfig, Environment
from azureml.core.runconfig import DockerConfiguration
from azureml.widgets import RunDetails


# Create a Python environment for the experiment
explain_env = Environment.from_conda_specification("explain_env", experiment_folder + "/interpret_env.yml")

# Create a script config
script_config = ScriptRunConfig(source_directory=experiment_folder,
                      script='churn_training.py',
                      environment=explain_env,
                      docker_runtime_config=DockerConfiguration(use_docker=True)) 

# submit the experiment
experiment_name = 'mslearn-churn-explain'
experiment = Experiment(workspace=ws, name=experiment_name)
run = experiment.submit(config=script_config)
RunDetails(run).show()
run.wait_for_completion()

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

KeyboardInterrupt: 

In [None]:
from azureml.interpret import ExplanationClient

# Get the feature explanations
client = ExplanationClient.from_run(run)
engineered_explanations = client.download_model_explanation()
feature_importances = engineered_explanations.get_feature_importance_dict()

# Overall feature importance
print('Feature\tImportance')
for key, value in feature_importances.items():
    print(key, '\t', value)