In [1]:
# Only execute if you haven't already. Make sure to restart the kernel if these libraries have not been previously installed.
!pip install xgboost==0.82 --user
!pip install scikit-learn==0.20.4 --user



**Note**: You may need to restart the kernel to use updated packages.

In [None]:
%%writefile custom_transform.py
from sklearn.base import BaseEstimator, TransformerMixin
import numpy as np

class PositionalSelector(BaseEstimator, TransformerMixin):
    def __init__(self, positions):
        self.positions = positions

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        return np.array(X)[:, self.positions]


class StripString(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None):
        return self

    def transform(self, X):
        strip = np.vectorize(str.strip)
        return strip(np.array(X))

class SimpleOneHotEncoder(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None):
        self.values = []
        for c in range(X.shape[1]):
            Y = X[:, c]
            values = {v: i for i, v in enumerate(np.unique(Y))}
            self.values.append(values)
        return self

    def transform(self, X):
        X = np.array(X)
        matrices = []
        for c in range(X.shape[1]):
            Y = X[:, c]
            matrix = np.zeros(shape=(len(Y), len(self.values[c])), dtype=np.int8)
            for i, x in enumerate(Y):
                if x in self.values[c]:
                    matrix[i][self.values[c][x]] = 1
            matrices.append(matrix)
        res = np.concatenate(matrices, axis=1)
        return res

# Import Python packages

Execute the command below (__Shift + Enter__) to load all the python libraries we'll need for the lab.

In [2]:
import datetime
import pickle
import os

import pandas as pd
import xgboost as xgb
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import FeatureUnion, make_pipeline
from sklearn.utils import shuffle
from sklearn.base import clone
from sklearn.model_selection import train_test_split

from witwidget.notebook.visualization import WitWidget, WitConfigBuilder

import custom_transforms

import warnings
warnings.filterwarnings(action='ignore', category=DeprecationWarning)

Before we continue, note that we'll be using your Qwiklabs project id a lot in this notebook. For convenience, set it as an environment variable using the command below:

In [3]:
os.environ['QWIKLABS_PROJECT_ID'] = 'qwiklabs-gcp-01-8c7d7c737d80'

# Download and process data

The models you'll build will predict the income level, whether it's less than or equal to $50,000 per year, of individuals given 14 data points about each individual. You'll train your models on this UCI [Census Income Dataset](https://archive.ics.uci.edu/ml/datasets/Adult).

We'll read the data into a Pandas DataFrame to see what we'll be working with. It's important to shuffle our data in case the original dataset is ordered in a specific way. We use an sklearn utility called shuffle to do this, which we imported in the first cell:

In [4]:
train_csv_path = 'https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data'

COLUMNS = (
    'age',
    'workclass',
    'fnlwgt',
    'education',
    'education-num',
    'marital-status',
    'occupation',
    'relationship',
    'race',
    'sex',
    'capital-gain',
    'capital-loss',
    'hours-per-week',
    'native-country',
    'income-level'
)

raw_train_data = pd.read_csv(train_csv_path, names=COLUMNS, skipinitialspace=True)
raw_train_data = shuffle(raw_train_data, random_state=4)

`data.head()` lets us preview the first five rows of our dataset in Pandas.

In [5]:
raw_train_data.head()

Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,income-level
28762,25,Private,307643,HS-grad,9,Married-civ-spouse,Transport-moving,Husband,White,Male,0,0,40,United-States,<=50K
4823,34,Private,424988,HS-grad,9,Married-civ-spouse,Craft-repair,Husband,White,Male,0,0,45,United-States,<=50K
3106,42,Local-gov,245307,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,1977,48,United-States,>50K
11293,44,Private,56483,Bachelors,13,Never-married,Adm-clerical,Own-child,White,Female,0,0,37,United-States,<=50K
7008,49,Private,215389,Bachelors,13,Married-civ-spouse,Machine-op-inspct,Husband,Black,Male,0,0,48,United-States,<=50K


The `income-level` column is the thing our model will predict. This is the binary outcome of whether the individual makes more than $50,000 per year. To see the distribution of income levels in the dataset, run the following:

In [6]:
print(raw_train_data['income-level'].value_counts(normalize=True))

<=50K    0.75919
>50K     0.24081
Name: income-level, dtype: float64


As explained in [this paper](http://cseweb.ucsd.edu/classes/sp15/cse190-c/reports/sp15/048.pdf), each entry in the dataset contains the following information
about an individual:

* __age__: the age of an individual
* __workclass__: a general term to represent the employment status of an individual
* __fnlwgt__: final weight. In other words, this is the number of people the census believes
the entry represents...
* __education__: the highest level of education achieved by an individual.
* __education-num__: the highest level of education achieved in numerical form.
* __marital-status__: marital status of an individual. 
* __occupation__: the general type of occupation of an individual
* __relationship__: represents what this individual is relative to others. For example an
individual could be a Husband. Each entry only has one relationship attribute and is
somewhat redundant with marital status. 
* __race__: Descriptions of an individual’s race
* __sex__: the biological sex of the individual
* __capital-gain__: capital gains for an individual
* __capital-loss__: capital loss for an individual
* __hours-per-week__: the hours an individual has reported to work per week
* __native-country__: country of origin for an individual
* __income-level__: whether or not an individual makes more than $50,000 annually

An important concept in machine learning is train / test split. We'll take the majority of our data and use it to train our model, and we'll set aside the rest for testing our model on data it's never seen before. There are many ways to create training and test datasets. Fortunately, for our census data we can simply download a pre-defined test set. 

In [7]:
test_csv_path = 'https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.test'
raw_test_data = pd.read_csv(test_csv_path, names=COLUMNS, skipinitialspace=True, skiprows=1)

In [8]:
raw_test_data.head()

Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,income-level
0,25,Private,226802,11th,7,Never-married,Machine-op-inspct,Own-child,Black,Male,0,0,40,United-States,<=50K.
1,38,Private,89814,HS-grad,9,Married-civ-spouse,Farming-fishing,Husband,White,Male,0,0,50,United-States,<=50K.
2,28,Local-gov,336951,Assoc-acdm,12,Married-civ-spouse,Protective-serv,Husband,White,Male,0,0,40,United-States,>50K.
3,44,Private,160323,Some-college,10,Married-civ-spouse,Machine-op-inspct,Husband,Black,Male,7688,0,40,United-States,>50K.
4,18,?,103497,Some-college,10,Never-married,?,Own-child,White,Female,0,0,30,United-States,<=50K.


Since we don't want to train a model on our labels, we're going to separate them from the features in both the training and test datasets. Also, notice that `income-level` is a string datatype. For machine learning, it's better to convert this to an binary integer datatype. We do this in the next cell.  

In [9]:
raw_train_features = raw_train_data.drop('income-level', axis=1).values
raw_test_features = raw_test_data.drop('income-level', axis=1).values

# Create training labels list
train_labels = (raw_train_data['income-level'] == '>50K').values.astype(int)
test_labels = (raw_test_data['income-level'] == '>50K.').values.astype(int)

Now you're ready to build and train your first model!

# Build a First Model

The model we build closely follows a template for the [census dataset found on AI Hub](https://aihub.cloud.google.com/p/products%2F526771c4-9b36-4022-b9c9-63629e9e3289). For our model we use an XGBoost classifier. However, before we train our model we have to pre-process the data a little bit. We build a processing pipeline using [Scikit-Learn's Pipeline constructor](https://scikit-learn.org/stable/modules/generated/sklearn.pipeline.Pipeline.html). We appl some custom transformations that are defined in `custom_transforms.py`. Open the file `custom_transforms.py` and inspect the code. Out features are either numerical or categorical. The numerical features are `age-num`, and `hours-per-week`. These features will be processed by applying [Scikit-Learn's StandardScaler function](https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html). The categorical features are `workclass`, `education`, `marital-status`, and `relationship`. These features are [one-hot encoded](https://machinelearningmastery.com/why-one-hot-encode-data-in-machine-learning/). 

In [10]:
numerical_indices = [0, 12]  
categorical_indices = [1, 3, 5, 7]  

p1 = make_pipeline(
    custom_transforms.PositionalSelector(categorical_indices),
    custom_transforms.StripString(),
    custom_transforms.SimpleOneHotEncoder()
)
p2 = make_pipeline(
    custom_transforms.PositionalSelector(numerical_indices),
    StandardScaler()
)
p3 = FeatureUnion([
    ('numericals', p1),
    ('categoricals', p2),
])

To finalize the pipeline we attach an XGBoost classifier at the end. The complete pipeline object takes the raw data we loaded from csv files, processes the categorical features, processes the numerical features, concatenates the two, and then passes the result through the XGBoost classifier.   

In [11]:
pipeline = make_pipeline(
    p3,
    xgb.sklearn.XGBClassifier(max_depth=4)
)

We train our model with one function call using the fit() method. We pass the fit() method our training data.

In [12]:
pipeline.fit(raw_train_features, train_labels)



Pipeline(memory=None,
     steps=[('featureunion', FeatureUnion(n_jobs=None,
       transformer_list=[('numericals', Pipeline(memory=None,
     steps=[('positionalselector', PositionalSelector(positions=[1, 3, 5, 7])), ('stripstring', StripString()), ('simpleonehotencoder', SimpleOneHotEncoder())])), ('categoricals', Pipeline...
       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
       silent=True, subsample=1))])

Let's go ahead and save our model as a pickle file. Executing the command below will save the trained model in the file `model.pkl` in the same directory as this notebook. 

In [13]:
with open('model.pkl', 'wb') as model_file:
    pickle.dump(pipeline, model_file)

# Save Trained Model to AI Platform

We've got our model working locally, but it would be nice if we could make predictions on it from anywhere (not just this notebook!). In this step we'll deploy it to the cloud. For detailed instructions on how to do this visit [the official documenation](https://cloud.google.com/ai-platform/prediction/docs/exporting-for-prediction). Note that since we have custom components in our data pipeline we need to go through a few extra steps.  

## Create a Cloud Storage bucket for the model

We first need to create a storage bucket to store our pickled model file. We'll point Cloud AI Platform at this file when we deploy. Run this gsutil command to create a bucket. This will ensure the name of the cloud storage bucket you create will be globally unique.

In [14]:
!gsutil mb gs://$QWIKLABS_PROJECT_ID

Creating gs://qwiklabs-gcp-01-8c7d7c737d80/...


## Package custom transform code

Since we're using custom transformation code we need to package it up and direct AI Platform to it when we ask it make predictions. To package our custom code we create a source distribution. The following code creates this distribution and then ports the distribution and the model file to the bucket we created. Ignore the warnings about missing meta data. 

In [None]:
%%writefile setup.py
from setuptools import setup

setup(name='custom_transforms', version='0.1', scripts=['custom_transforms.py', 'predictor.py'])

In [15]:
%%bash

python setup.py sdist --formats=gztar

gsutil cp model.pkl gs://$QWIKLABS_PROJECT_ID/original/
gsutil cp dist/custom_transforms-0.1.tar.gz gs://$QWIKLABS_PROJECT_ID/

running sdist
running egg_info
writing custom_transforms.egg-info/PKG-INFO
writing dependency_links to custom_transforms.egg-info/dependency_links.txt
writing top-level names to custom_transforms.egg-info/top_level.txt
reading manifest file 'custom_transforms.egg-info/SOURCES.txt'
writing manifest file 'custom_transforms.egg-info/SOURCES.txt'
running check
creating custom_transforms-0.1
creating custom_transforms-0.1/custom_transforms.egg-info
copying files to custom_transforms-0.1...
copying custom_transforms.py -> custom_transforms-0.1
copying predictor.py -> custom_transforms-0.1
copying setup.py -> custom_transforms-0.1
copying custom_transforms.egg-info/PKG-INFO -> custom_transforms-0.1/custom_transforms.egg-info
copying custom_transforms.egg-info/SOURCES.txt -> custom_transforms-0.1/custom_transforms.egg-info
copying custom_transforms.egg-info/dependency_links.txt -> custom_transforms-0.1/custom_transforms.egg-info
copying custom_transforms.egg-info/top_level.txt -> custom_transf




Copying file://model.pkl [Content-Type=application/octet-stream]...
/ [1 files][117.3 KiB/117.3 KiB]                                                
Operation completed over 1 objects/117.3 KiB.                                    
Copying file://dist/custom_transforms-0.1.tar.gz [Content-Type=application/x-tar]...
/ [1 files][  1.7 KiB/  1.7 KiB]                                                
Operation completed over 1 objects/1.7 KiB.                                      


## Create and Deploy Model

The following ai-platform gcloud command will create a new model in your project. We'll call this one `census_income_classifier`.

In [16]:
!gcloud ai-platform models create census_income_classifier --regions us-central1

Using endpoint [https://ml.googleapis.com/]
Created ai platform model [projects/qwiklabs-gcp-01-8c7d7c737d80/models/census_income_classifier].


Now it's time to deploy the model. We can do that with this gcloud command:

In [None]:
%%writefile predictor.py
import os
import pickle
import numpy as np

class MyPredictor(object):
    """An example Predictor for an AI Platform custom prediction routine."""

    def __init__(self, model):
        """Stores artifacts for prediction. Only initialized via `from_path`.
        """
        self._model = model

    def predict(self, instances, **kwargs):
        """Performs custom prediction.
        Preprocesses inputs, then performs prediction using the trained
        scikit-learn model.
        Args:
            instances: A list of prediction input instances.
            **kwargs: A dictionary of keyword args provided as additional
                fields on the predict request body.
        Returns:
            A list of outputs containing the prediction results.
        """
        inputs = np.asarray(instances)
        outputs = self._model.predict_proba(inputs)
        return outputs.tolist()

    @classmethod
    def from_path(cls, model_dir):
        """Creates an instance of MyPredictor using the given path.
        This loads artifacts that have been copied from your model directory in
        Cloud Storage. MyPredictor uses them during prediction.
        Args:
            model_dir: The local directory that contains the trained
                scikit-learn model and the pickled preprocessor instance. These
                are copied from the Cloud Storage model directory you provide
                when you deploy a version resource.
        Returns:
            An instance of `MyPredictor`.
        """
        model_path = os.path.join(model_dir, 'model.pkl')

        with open(model_path, 'rb') as f:
            model = pickle.load(f)

        return cls(model)

In [17]:
%%bash

MODEL_NAME="census_income_classifier"
VERSION_NAME="original"
MODEL_DIR="gs://$QWIKLABS_PROJECT_ID/original/"
CUSTOM_CODE_PATH="gs://$QWIKLABS_PROJECT_ID/custom_transforms-0.1.tar.gz"

gcloud beta ai-platform versions create $VERSION_NAME \
  --model $MODEL_NAME \
  --runtime-version 1.15 \
  --python-version 3.7 \
  --origin $MODEL_DIR \
  --package-uris $CUSTOM_CODE_PATH \
  --prediction-class predictor.MyPredictor \
  --region=global

Using endpoint [https://ml.googleapis.com/]
Creating version (this might take a few minutes)......
............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................done.


While this is running, check the [models section](https://console.cloud.google.com/ai-platform/models) of your AI Platform console. You should see your new version deploying there. When the deploy completes successfully you'll see a green check mark where the loading spinner is. The deploy should take 2-3 minutes. You will need to click on the model name in order to see the spinner/checkmark. In the command above, notice we specify `prediction-class`. The reason we must specify a prediction class is that by default, AI Platform prediction will call a Scikit-Learn model's `predict` method, which in this case returns either 0 or 1. However, the What-If Tool requires output from a model in line with a Scikit-Learn model's `predict_proba` method.  This is because WIT wants the probabilities of the negative and positive classes, not just the final determination on which class a person belongs to. Because that allows us to do more fine-grained exploration of the model. Consequently, we must write a [custom prediction routine](https://cloud.google.com/ai-platform/prediction/docs/custom-prediction-routines) that basically renames `predict_proba` as `predict`. The custom prediction method can be found in the file `predictor.py`. This file was packaged in the section __Package custom transform code__. By specifying `prediction-class` we're telling AI Platform to call our custom prediction method--basically, `predict_proba`-- instead of the default `predict` method.

## Test the deployed model

To make sure your deployed model is working, test it out using gcloud to make a prediction. First, save a JSON file with one test instance for prediction:

In [18]:
%%writefile predictions.json
[25, "Private", 226802, "11th", 7, "Never-married", "Machine-op-inspct", "Own-child", "Black", "Male", 0, 0, 40, "United-States"]

Writing predictions.json


Test your model by running this code:

In [19]:
!gcloud ai-platform predict --model=census_income_classifier --json-instances=predictions.json --version=original --region=global

Using endpoint [https://ml.googleapis.com/]
[[0.9930716156959534, 0.006928374525159597]]


You should see your model's prediction in the output. The first entry in the output is the model's probability that the individual makes under \\$50K while the second entry is the model's confidence that the individual makes over \\$50k. The two entries sum to 1. 

# What-If Tool

To connect the What-if Tool to your AI Platform models, you need to pass it a subset of your test examples along with the ground truth values for those examples. Let's create a Numpy array of 2000 of our test examples.

In [20]:
num_datapoints = 2000  

test_examples = np.hstack(
    (raw_test_features[:num_datapoints], 
     test_labels[:num_datapoints].reshape(-1,1)
    )
)

Instantiating the What-if Tool is as simple as creating a WitConfigBuilder object and passing it the AI Platform model we built. Note that it'll take a minute to load the visualization.

In [21]:
config_builder = (
    WitConfigBuilder(test_examples.tolist(), COLUMNS)
    .set_ai_platform_model(os.environ['QWIKLABS_PROJECT_ID'], 'census_income_classifier', 'original')
    .set_target_feature('income-level')
    .set_model_type('classification')
    .set_label_vocab(['Under 50K', 'Over 50K'])
)

WitWidget(config_builder, height=800)

WitWidget(config={'model_type': 'classification', 'label_vocab': ['Under 50K', 'Over 50K'], 'feature_names': (…

The default view on the What-if Tool is the __Datapoint editor__ tab. Here, you can click on any individual data point to see its features and even change feature values. Navigate to the __Performance & Fairness__ tab in the What-if Tool. By slicing on a feature you can view the model error for individual feature values. Finally, navigate to the __Features__ tab in the What-if Tool. This shows you the distribution of values for each feature in your dataset. You can use this tab to make sure your dataset is balanced. For example, if we only had Asians in a population, the model's predictions wouldn't necessarily reflect real world data. This tab gives us a good opportunity to see where our dataset might fall short, so that we can go back and collect more data to make it balanced.

In the __Features__ tab, we can look to see the distribution of values for each feature in the dataset. We can see that of the 2000 test datapoints, 1346 are from men and 1702 are from caucasions. Women and minorities seem under-represented in this dataset. That may lead to the model not learning an accurate representation of the world in which it is trying to make predictions (of course, even if it does learn an accurate representation, is that what we want the model to perpetuate? This is a much deeper question still falling under the ML fairness umbrella and worthy of discussion outside of WIT). Predictions on those under-represented groups are more likely to be inaccurate than predictions on the over-represented groups.

The features in this visualization can be sorted by a number of different metrics, including non-uniformity. With this sorting, the features that have the most non-uniform distributions are shown first. For numeric features, capital gain is very non-uniform, with most datapoints having it set to 0, but a small number having non-zero capital gains, all the way up to a maximum of 100k. For categorical features, country is the most non-uniform with most datapoints being from the USA, but there is a long tail of 40 other countries which are not well represented.

Back in the __Performance & Fairness__ tab, we can set an input feature (or set of features) with which to slice the data. For example, setting this to `sex` allows us to see the breakdown of model performance on male datapoints versus female datapoints. We can see that the model is more accurate (has less false positives and false negatives) on females than males. We can also see that the model predicts high income for females much less than it does for males (8.0% of the time for females vs 27.1% of the time for males). __Note, your numbers will be slightly different due to the random elements of model training__.

Imagine a scenario where this simple income classifier was used to approve or reject loan applications (not a realistic example but it illustrates the point). In this case, 28% of men from the test dataset have their loans approved but only 10% of women have theirs approved. If we wished to ensure than men and women get their loans approved the same percentage of the time, that is a fairness concept called "demographic parity". One way to achieve demographic parity would be to have different classification thresholds for males and females in our model.

In this case, demographic parity can be found with both groups getting loans 16% of the time by having the male threshold at 0.67 and the female threshold at 0.31. Because of the vast difference in the properties of the male and female training data in this 1994 census dataset, we need quite different thresholds to achieve demographic parity. Notice how with the high male threshold there are many more false negatives than before, and with the low female threshold there are many more false positives than before. This is necessary to get the percentage of positive predictions to be equal between the two groups. WIT has buttons to optimize for other fairness constraints as well, such as "equal opportunity" and "equal accuracy". Note that the demographic parity numbers may be different from the ones in your text as the trained models are always a bit different.

The use of these features can help shed light on subsets of your data on which your classifier is performing very differently. Understanding biases in your datasets and data slices on which your model has disparate performance are very important parts of analyzing a model for fairness. There are many approaches to improving fairness, including augmenting training data, building fairness-related loss functions into your model training procedure, and post-training inference adjustments like those seen in WIT. We think that WIT provides a great interface for furthering ML fairness learning, but of course there is no silver bullet to improving ML fairness.

# Training on a more balanced dataset

Using the What-If Tool we saw that the model we trained on the census dataset wouldn't be very considerate in a production environment. What if we retrained the model on a dataset that was more balanced? Fortunately, we have such a dataset. Let's train a new model on this balanced dataset and compare it to our original dataset using the What-If Tool. 

First, let's load the balanced dataset into a Pandas dataframe.

In [22]:
bal_data_path = 'https://storage.googleapis.com/cloud-training/dei/balanced_census_data.csv' 
bal_data = pd.read_csv(bal_data_path, names=COLUMNS, skiprows=1)

In [23]:
bal_data.head()

Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,income-level
0,28,Private,272913,10th,6,Married-civ-spouse,Handlers-cleaners,Husband,White,Male,0,0,30,Mexico,<=50K
1,49,Private,200949,10th,6,Never-married,Other-service,Unmarried,White,Female,0,0,38,Peru,<=50K
2,34,Private,275438,Some-college,10,Married-civ-spouse,Prof-specialty,Husband,White,Male,5178,0,40,United-States,>50K
3,70,Private,90245,HS-grad,9,Widowed,Adm-clerical,Unmarried,White,Female,0,0,5,United-States,<=50K
4,38,Private,199256,HS-grad,9,Divorced,Adm-clerical,Unmarried,White,Female,0,0,40,United-States,<=50K


Execute the command below to see the distribution of gender in the data.

In [24]:
bal_data['sex'].value_counts(normalize=True)

Male      0.5
Female    0.5
Name: sex, dtype: float64

Unlike the original dataset, this dataset has an equal number of rows for both males and females. Execute the command below to see the distriubtion of rows in the dataset of both `sex` and `income-level`. 

In [25]:
bal_data.groupby(['sex', 'income-level'])['sex'].count()

sex     income-level
Female  <=50K           15128
        >50K            15128
Male    <=50K           15128
        >50K            15128
Name: sex, dtype: int64

We see that not only is the dataset balanced across gender, it's also balanced across income. Let's train a model on this data. We'll use exactly the same model pipeline as in the previous section. Scikit-Learn has a convenient utility function for copying model pipelines, `clone`. The `clone` function copies a pipeline architecture without saving learned parameter values. 

In [26]:
bal_data['income-level'] = bal_data['income-level'].isin(['>50K', '>50K.']).values.astype(int)

raw_bal_features = bal_data.drop('income-level', axis=1).values
bal_labels = bal_data['income-level'].values

In [27]:
pipeline_bal = clone(pipeline)

In [28]:
pipeline_bal.fit(raw_bal_features, bal_labels)



Pipeline(memory=None,
     steps=[('featureunion', FeatureUnion(n_jobs=None,
       transformer_list=[('numericals', Pipeline(memory=None,
     steps=[('positionalselector', PositionalSelector(positions=[1, 3, 5, 7])), ('stripstring', StripString()), ('simpleonehotencoder', SimpleOneHotEncoder())])), ('categoricals', Pipeline...
       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
       silent=True, subsample=1))])

As before, we save our trained model to a pickle file. Note, when we version this model in AI Platform the model in this case must be named `model.pkl`. It's ok to overwrite the existing `model.pkl` file since we'll be uploading it to Cloud Storage anyway. 

In [29]:
with open('model.pkl', 'wb') as model_file:
    pickle.dump(pipeline_bal, model_file)

Deploy the model to AI Platform using the following bash script:

In [30]:
%%bash

gsutil cp model.pkl gs://$QWIKLABS_PROJECT_ID/balanced/
    
MODEL_NAME="census_income_classifier"
VERSION_NAME="balanced"
MODEL_DIR="gs://$QWIKLABS_PROJECT_ID/balanced/"
CUSTOM_CODE_PATH="gs://$QWIKLABS_PROJECT_ID/custom_transforms-0.1.tar.gz"

gcloud beta ai-platform versions create $VERSION_NAME \
  --model $MODEL_NAME \
  --runtime-version 1.15 \
  --python-version 3.7 \
  --origin $MODEL_DIR \
  --package-uris $CUSTOM_CODE_PATH \
  --prediction-class predictor.MyPredictor \
  --region=global

Copying file://model.pkl [Content-Type=application/octet-stream]...
/ [1 files][120.9 KiB/120.9 KiB]                                                
Operation completed over 1 objects/120.9 KiB.                                    
Using endpoint [https://ml.googleapis.com/]
Creating version (this might take a few minutes)......
...................................................................................................................................................................................................................done.


Now let's instantiate the What-if Tool by configuring a WitConfigBuilder. Here, we want to compare the original model we built with the one trained on the balanced census dataset. To achieve this we utilize the `set_compare_ai_platform_model` method. We want to compare the models on a balanced test set. The balanced test is loaded and then input to `WitConfigBuilder`.

In [31]:
bal_test_csv_path = 'https://storage.googleapis.com/cloud-training/dei/balanced_census_data_test.csv'
bal_test_data = pd.read_csv(bal_test_csv_path, names=COLUMNS, skipinitialspace=True)
bal_test_data['income-level'] = (bal_test_data['income-level'] == '>50K').values.astype(int)

In [32]:
config_builder = (
    WitConfigBuilder(bal_test_data.to_numpy()[1:].tolist(), COLUMNS)
    .set_ai_platform_model(os.environ['QWIKLABS_PROJECT_ID'], 'census_income_classifier', 'original')
    .set_compare_ai_platform_model(os.environ['QWIKLABS_PROJECT_ID'], 'census_income_classifier', 'balanced')
    .set_target_feature('income-level')
    .set_model_type('classification')
    .set_label_vocab(['Under 50K', 'Over 50K'])
)

WitWidget(config_builder, height=800)

WitWidget(config={'model_type': 'classification', 'label_vocab': ['Under 50K', 'Over 50K'], 'feature_names': (…

Once the WIT widget loads, click on the __Performance & Fairness__ tab. In the __Slice by__ field select `sex` and wait a minute for the graphics to load. For females, the model trained on the balanced dataset is over two times more likely to predict an income of over 50k than the model trained on the original dataset. While this results in a higher false positive rate, the false negative rate is decreased by a factor of three. This results in an improved overall accuracy of some 10 percentage points. 

How else does the model trained on balanced data perform differently when compared to the original model? 