In [None]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.pipeline import Pipeline
import alibi
from alibi.datasets import adult
import joblib
import dill
from google.cloud import storage

# Load data

In [None]:
adult()

In [None]:
data, labels, feature_names, category_map = adult()

In [None]:
labels

In [None]:
feature_names

In [None]:
category_map

# define train and test set

In [None]:
np.random.seed(0)
data_perm = np.random.permutation(np.c_[data, labels])
data = data_perm[:, :-1]
labels = data_perm[:, -1]

In [None]:
idx = 30000
X_train, Y_train = data[:idx, :], labels[:idx]
X_test, Y_test = data[idx + 1:, :], labels[idx + 1:]

# feature transformation pipeline

In [None]:
ordinal_features = [x for x in range(len(feature_names)) if x not in list(category_map.keys())]
ordinal_transformer = Pipeline(steps=[('imputer', SimpleImputer(strategy='median')),
                                      ('scaler', StandardScaler())])

categorical_features = list(category_map.keys())
categorical_transformer = Pipeline(steps=[('imputer', SimpleImputer(strategy='median')),
                                          ('onehot', OneHotEncoder(handle_unknown='ignore'))])

preprocessor = ColumnTransformer(transformers=[('num', ordinal_transformer, ordinal_features),
                                               ('cat', categorical_transformer, categorical_features)])

# train an RandomForest model

In [None]:
print("Train random forest model")
np.random.seed(0)
clf = RandomForestClassifier(n_estimators=50)
pipeline = Pipeline([('preprocessor', preprocessor),
                     ('clf', clf)])
pipeline.fit(X_train, Y_train)

# fit explainer

In [None]:
print("Creating an explainer")
predict_fn = lambda x: clf.predict(preprocessor.transform(x))
explainer = alibi.explainers.AnchorTabular(predict_fn,
                                           feature_names=feature_names,
                                           categorical_names=category_map)

In [None]:
explainer.fit(X_train)

# Save model and explainer

In [None]:
joblib.dump(pipeline, 'model/model.joblib')

In [None]:
with open("explainer/explainer.dill", 'wb') as f:
    dill.dump(explainer,f)

# Save model and explainer to GCS

In [None]:
#Please set GOOGLE_APPLICATION_CREDENTIALS or explicitly create credentials and re-run the application. For more information, please see https://cloud.google.com/docs/authentication/getting-started
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'tmp/gcp.json'

In [None]:
bucket_name = 'deeploy-ml'
model_path = 'explainable-ml-engine/model/model.joblib'
explainer_path = 'explainable-ml-engine/explainer/explainer.dill'
client = storage.Client()
bucket = client.get_bucket(bucket_name)
model_blob = bucket.blob(model_path)
explainer_blob = bucket.blob(explainer_path)

In [None]:
model_blob.upload_from_filename('model/model.joblib')

In [None]:
explainer_blob.upload_from_filename('explainer/explainer.dill')
