In [1]:
import numpy as np
import pandas as pd
import re
import dill as pickle
import seaborn as sns
import matplotlib.pyplot as plt
from eval import *
from pandas.io.json import json_normalize
from copy import deepcopy

sns.set_style('whitegrid')
pd.options.display.max_colwidth = 200
pd.options.display.max_rows = 300

In [2]:
from keras.models import load_model

Using TensorFlow backend.


In [3]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2
%config InlineBackend.figure_format = 'retina'

## Apply trained model to the test data

In [4]:
# Load data
feats = pickle.load(open('data/feats.pkl', 'rb'))

# Categorical features
cat_ix_trn, cat_ix_dev, cat_ix_test = feats['cat_ix_trn'], feats['cat_ix_dev'], feats['cat_ix_test']

# Numerical features
cont_trn, cont_dev, cont_test = feats['cont_trn'], feats['cont_dev'], feats['cont_test']

# Labels
y_trn_oh, y_dev_oh, y_test_oh = feats['y_trn_oh'], feats['y_dev_oh'], feats['y_test_oh']

# Supporting data
cat_vars, cont_vars = feats['cat_vars'], feats['cont_vars']
cat_mappers = feats['cat_mappers']
max_len = feats['max_len']

In [5]:
# Load model
rnn = load_model('models/gru.h5')

In [6]:
# Apply model to the test data and evaluate results
def predict_and_eval(x, y, model, track_itos, per_track=True):
    # Generate predictions
    y_pred_prob = model.predict(x)
    
    # Reshape to 2D (thus concatenating all observations together)
    y_rs = y.reshape(-1, 3)
    y_pred_prob_rs = y_pred_prob.reshape(-1, 3)

    # Add track
    track_rs = x[0].flatten()  # The first element of the feature list is `track`

    # Remove padded observations
    # (the first vertical axis indicates whether it's a padded observation or not)
    non_pad_ix = np.where(y_rs[:, 0] == 0)[0]
    y_rs = y_rs[non_pad_ix, :]
    y_pred_prob_rs = y_pred_prob_rs[non_pad_ix, :]
    track_rs = track_rs[non_pad_ix]

    # Only evaluate on the positive class
    y_rs = y_rs[:, -1]
    y_pred_prob_rs = y_pred_prob_rs[:, -1]
    
    # Put together in a dataframe
    y_pred_df = pd.DataFrame({'track': [track_itos[t] for t in track_rs], 'actual': y_rs, 'predicted': y_pred_prob_rs})
    
    # Compute metrics per track
    if per_track:
        metrics_per_track = y_pred_df.groupby('track').apply(lambda grp: evaluate_metrics(grp['actual'].values, grp['predicted'].values))
        return json_normalize(metrics_per_track).set_index(metrics_per_track.index)[['auroc', 'F1']]
    else:
        metrics = evaluate_metrics(y_pred_df['actual'].values, y_pred_df['predicted'].values)
        return json_normalize(metrics)[['auroc', 'F1']]

In [67]:
x_test = [cat_ix_test[var] for var in cat_vars] + [cont_test[var] for var in cont_vars]
predict_and_eval(x_test, y_test_oh, rnn, cat_mappers['track']['itos'])

Unnamed: 0_level_0,auroc,F1
track,Unnamed: 1_level_1,Unnamed: 2_level_1
en_es,0.81844,0.426078
es_en,0.785258,0.397108
fr_en,0.810768,0.458776


## Analyze feature importance

In [7]:
def estimate_feat_importance(x, y, model, track_itos, feats_order, seed=0):
    # Compute baseline metrics using unshuffled data
    baseline = predict_and_eval(x, y, model, track_itos, per_track=False)
    
    # Generate shuffled index
    ix = np.arange(len(y))
    np.random.seed(seed)
    np.random.shuffle(ix)
    
    metrics_change = {}
    
    # Shuffle by feature
    for i, f in enumerate(feats_order):
        save = deepcopy(x[i])
        
        # Shuffle selected features
        x[i] = x[i][ix]
        
        # Re-compute metrics
        new_score = predict_and_eval(x, y, model, track_itos, per_track=False)
        
        # Compute differences
        metrics_change[f] = baseline - new_score
        
        # Restore the original value
        x[i] = save

    return metrics_change

In [11]:
# Use validation data
x_dev = [cat_ix_dev[var] for var in cat_vars] + [cont_dev[var] for var in cont_vars]
feats_order = cat_vars + cont_vars

In [None]:
# Estimate importance for each feature
rnn_feat_imp = estimate_feat_importance(x_dev, y_dev_oh, rnn, cat_mappers['track']['itos'], feats_order)

In [10]:
rnn_feat_imp

{'track':       auroc        F1
 0  0.080033  0.091811}

In [81]:
x, y, model, track_itos, feats_order = x_dev, y_dev_oh, rnn, cat_mappers['track']['itos'], feats_order[:2]

In [87]:
baseline = predict_and_eval(x, y, model, track_itos, per_track=False)

In [88]:
baseline

Unnamed: 0,auroc,F1
0,0.806096,0.422265


In [90]:
seed = 0
ix = np.arange(len(y))
np.random.seed(seed)
np.random.shuffle(ix)
ix

array([1746, 4065, 5236, ..., 1653, 2607, 2732])

In [91]:
i = 0
save = deepcopy(x[i])

In [94]:
x[i] = x[i][ix]

In [95]:
x[i]

array([[0, 0, 0, ..., 2, 2, 2],
       [0, 0, 0, ..., 4, 4, 4],
       [0, 0, 0, ..., 3, 3, 3],
       ..., 
       [0, 0, 0, ..., 2, 2, 2],
       [0, 0, 0, ..., 2, 2, 2],
       [0, 0, 0, ..., 3, 3, 3]], dtype=int32)

In [96]:
x

[array([[0, 0, 0, ..., 2, 2, 2],
        [0, 0, 0, ..., 4, 4, 4],
        [0, 0, 0, ..., 3, 3, 3],
        ..., 
        [0, 0, 0, ..., 2, 2, 2],
        [0, 0, 0, ..., 2, 2, 2],
        [0, 0, 0, ..., 3, 3, 3]], dtype=int32),
 array([[   0,    0,    0, ..., 3701, 3701, 3701],
        [   0,    0,    0, ..., 1217, 1217, 1217],
        [   0,    0,    0, ..., 2871, 2871, 2871],
        ..., 
        [   0,    0,    0, ..., 4994, 4994, 4994],
        [   0,    0,    0, ..., 4223, 4223, 4223],
        [   0,    0,    0, ..., 3681, 3681, 3681]], dtype=int32),
 array([[ 0,  0,  0, ..., 32, 32, 32],
        [ 0,  0,  0, ...,  3,  3,  3],
        [ 0,  0,  0, ...,  2,  2,  2],
        ..., 
        [ 0,  0,  0, ..., 14, 14, 14],
        [ 0,  0,  0, ..., 16, 16, 16],
        [ 0,  0,  0, ...,  2,  2,  2]], dtype=int32),
 array([[0, 0, 0, ..., 3, 3, 3],
        [0, 0, 0, ..., 2, 2, 2],
        [0, 0, 0, ..., 2, 2, 2],
        ..., 
        [0, 0, 0, ..., 2, 2, 2],
        [0, 0, 0, ..., 2, 2, 