# Chapter 10 code snippets
This notebook contains all code snippets from chapter 10.

In [None]:
# Upgrade to latest the packages used in this notebook
!pip install --upgrade interpret-community
!pip install --upgrade raiwidgets
!pip install --upgrade fairlearn
raise Exception("Please comment out this cell and restart the Jupyter kernel")

## Interpreting the predictions of the model

In [None]:
from sklearn.datasets import make_classification
import pandas as pd
import numpy as np

features, target = make_classification(
    n_samples=500, n_features=3,
    n_redundant=1, shift=0, scale=1,
    weights=[0.7, 0.3], random_state=1337)

def fix_series(series, min_val, max_val):
    series = series - min(series)
    series = series / max(series)
    series = series * (max_val - min_val) + min_val
    return series.round(0)

features[:,0] = fix_series(features[:,0], 0, 10000)
features[:,1] = fix_series(features[:,1], 0, 10)
features[:,2] = fix_series(features[:,2], 18, 85)

classsification_df = pd.DataFrame(features, dtype='int')
classsification_df.set_axis([
    'income','credit_cards', 'age'], 
    axis=1, inplace=True)

classsification_df['approved_loan']= target
classsification_df.head()

In [None]:
from azureml.core import Workspace, Dataset

ws = Workspace.from_config()
dstore = ws.get_default_datastore()
loans_dataset = Dataset.Tabular.register_pandas_dataframe(
    dataframe=classsification_df,
    target=(dstore,"/samples/loans"),
    name="loans",
    description="A genarated dataset for loans")


In [None]:
from sklearn.model_selection import train_test_split
x = classsification_df[['income','credit_cards', 'age']]
y = classsification_df['approved_loan'].values
x_train, x_test, y_train, y_test = \
        train_test_split(x, y, test_size=0.2, random_state=42)


In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier


datatransformer = ColumnTransformer(
    transformers=[
        ('scale', MinMaxScaler(), x_train.columns)
])

model_pipeline = Pipeline(steps=[
                      ('datatransformer', datatransformer),
                      ('model', RandomForestClassifier())
])

# Could be as simple as the following line
# model_pipeline = RandomForestClassifier()
model_pipeline.fit(x_train, y_train)

In [None]:
test_df = pd.DataFrame(data=[
    [2000, 2, 45],
    [2000, 9, 45],
    [10000, 2, 45]
], columns=[
    'income','credit_cards', 'age'
])

test_pred = model_pipeline.predict(test_df)
print(test_pred)

In [None]:
model_pipeline.named_steps['model'].feature_importances_

In [None]:
from interpret.ext.blackbox import TabularExplainer

explainer = TabularExplainer(
                model_pipeline.named_steps['model'],
                initialization_examples=x_train, 
                features= x_train.columns,
                classes=["Reject", "Approve"],
                transformations=model_pipeline.named_steps['datatransformer'])

In [None]:
local_explanation = explainer.explain_local(test_df)

sorted_local_values = \
    local_explanation.get_ranked_local_values()
sorted_local_names = \
    local_explanation.get_ranked_local_names()

for sample_index in range(0,test_df.shape[0]):
    print(f"Test sample number {sample_index+1}")
    print("\t", test_df.iloc[[sample_index]]
                         .to_dict(orient='list'))
    prediction = test_pred[sample_index]
    print("\t", f"The prediction was {prediction}")
    importance_values = \
        sorted_local_values[prediction][sample_index]
    importance_names = \
        sorted_local_names[prediction][sample_index]
    local_importance = dict(zip(importance_names,
                                importance_values))
    print("\t", "Local feature importance")
    print("\t", local_importance)


In [None]:
global_explanation = explainer.explain_global(x_test)
print("Feature names:", 
        global_explanation.get_ranked_global_names())
print("Feature importances:",
        global_explanation.get_ranked_global_values())
print(f"Method used: {explainer._method}")

In [None]:
from raiwidgets import ExplanationDashboard
ExplanationDashboard(global_explanation, model_pipeline, 
                      dataset=x_test, true_y=y_test)

### Understanding the tabular data interpretation techniques

In [None]:
from interpret.ext.glassbox import (
    LGBMExplainableModel,
    LinearExplainableModel,
    SGDExplainableModel,
    DecisionTreeExplainableModel
)

from interpret.ext.blackbox import MimicExplainer
mimic_explainer = MimicExplainer(
                           model=model_pipeline, 
                           initialization_examples=x_train,
                           explainable_model=DecisionTreeExplainableModel,
                           augment_data=True, 
                           max_num_of_augmentations=10,
                           features=x_train.columns,
                           classes=["Reject", "Approve"], 
                           model_task='classification')

In [None]:
mimic_global_explanation = \
        mimic_explainer.explain_global(x_test)
print("Feature names:", 
        mimic_global_explanation.get_ranked_global_names())
print("Feature importances:",
        mimic_global_explanation.get_ranked_global_values())
print(f"Method used: {mimic_explainer._method}")

In [None]:
mimic_local_explanation = mimic_explainer.explain_local(test_df)
mimic_sorted_local_values = \
    mimic_local_explanation.get_ranked_local_values()
mimic_sorted_local_names = \
    mimic_local_explanation.get_ranked_local_names()
for sample_index in range(0,test_df.shape[0]):
    print(f"Test sample number {sample_index+1}")
    print("\t", test_df.iloc[[sample_index]]
                         .to_dict(orient='list'))
    prediction = test_pred[sample_index]
    print("\t", f"The prediction was {prediction}")
    mimic_importance_values = \
        mimic_sorted_local_values[prediction][sample_index]
    mimic_importance_names = \
        mimic_sorted_local_names[prediction][sample_index]
    mimic_local_importance = dict(zip(mimic_importance_names,
                                mimic_importance_values))
    print("\t", "Local feature importance")
    print("\t", mimic_local_importance)

In [None]:
from interpret.ext.blackbox import PFIExplainer
pfi_explainer = PFIExplainer(model_pipeline,
                             features=x_train.columns,
                             classes=["Reject", "Approve"]
                             )

In [None]:
pfi_global_explanation = \
        pfi_explainer.explain_global(x_test, 
                                     true_labels=y_test)
print("Feature names:", 
        pfi_global_explanation.get_ranked_global_names())
print("Feature importances:",
        pfi_global_explanation.get_ranked_global_values())
print(f"Method used: {pfi_explainer._method}")

### Reviewing the interpretation results

In [None]:
from azureml.core import Workspace, Experiment
from azureml.interpret import ExplanationClient

ws = Workspace.from_config()
exp = Experiment(workspace=ws, name="chapter10")
run = exp.start_logging(snapshot_directory=None)
client = ExplanationClient.from_run(run)
client.upload_model_explanation(
    global_explanation, 
    true_ys= y_test,
    comment='global explanation: TabularExplainer'

)
run.complete()
print(run.get_portal_url())

# If you get a ModuleNotFoundError: No module named 'shap.common', use the following code in a new cell to instal a specific version of shap:
# !pip install --upgrade shap==0.34
# If the problem persists, restart the kernel and start over the execution of all steps above.

In [None]:
from azureml.core import Workspace, Dataset, Experiment
from azureml.train.automl import AutoMLConfig

ws = Workspace.from_config()
compute_target = ws.compute_targets["cpu-sm-cluster"]

loans_dataset = Dataset.get_by_name(workspace=ws, name='loans')

train_ds,validate_ds = loans_dataset.random_split(percentage=0.8, seed=1337)

experiment_config = AutoMLConfig(
    task = "classification",
    primary_metric = 'accuracy',
    training_data = train_ds,
    label_column_name = "approved_loan",
    validation_data = validate_ds,
    compute_target = compute_target,
    experiment_timeout_hours = 0.25,
    iterations = 4,
    model_explainability = True
)

automl_experiment = Experiment(ws, 'loans-automl')
automl_run = automl_experiment.submit(experiment_config)

automl_run.wait_for_completion(show_output=True)

### Analyzing model errors

In [None]:
from raiwidgets import ErrorAnalysisDashboard
ErrorAnalysisDashboard(global_explanation, model_pipeline, 
                       dataset=x_test, true_y=y_test)

# If you don't see any graphics, open the notebook in Jupyter or Jupyter Lab
# and re run the cell to generate the interactive widget.

### Detecting potential model fairness issues
You have already installed the fairlearn in the top of this notebook

In [None]:
from fairlearn.metrics import MetricFrame
from sklearn.metrics import accuracy_score
y_pred = model_pipeline.predict(x_test)

age = x_test['age']
model_metrics = MetricFrame(accuracy_score, y_test, 
                             y_pred, sensitive_features=age)
print(model_metrics.overall)
print(model_metrics.by_group[model_metrics.by_group < 1])

In [None]:
from raiwidgets import FairnessDashboard

FairnessDashboard(
    sensitive_features=age,
    y_true=y_test,
    y_pred=y_pred)

# Open the notebook in Jupyter or Jupyter Lab to run this cell