# Anchors for Titanic

In [1]:
# load modules
import pandas as pd
import numpy as np
from alibi.explainers import AnchorTabular
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import KNeighborsClassifier

**load data**

In [2]:
#np.random.seed(0)
df_train = pd.read_pickle("data/titanic_train.pkl")
df_test_ = pd.read_pickle("data/titanic_test.pkl")

msk = np.random.rand(len(df_test_)) < 0.8

df_test = df_test_[msk]
df_val = df_test_[~msk]

X_train, y_train = df_train.drop(["Survived"], axis=1), df_train["Survived"]
X_test, y_test = df_test.drop(["Survived"], axis=1), df_test["Survived"]
X_val, y_val = df_val.drop(["Survived"], axis=1), df_val["Survived"]

In [3]:
X_test

Unnamed: 0,Pclass,Age,Fare,Familiy_Size,Sex=male,Embarked=Q,Embarked=S,Title=rareTitle,marital_status=no,marital_status=yes,Deck=B,Deck=C,Deck=D,Deck=E,Deck=F,Deck=G,Deck=T,Deck=unknown
709,3,25.0,15.2458,2,1,0,0,1,0,0,0,0,0,0,0,0,0,1
439,2,31.0,10.5000,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1
840,3,20.0,7.9250,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1
720,2,6.0,33.0000,1,0,0,1,0,1,0,0,0,0,0,0,0,0,1
39,3,14.0,11.2417,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
852,3,9.0,15.2458,2,0,0,0,0,1,0,0,0,0,0,0,0,0,1
433,3,17.0,7.1250,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1
773,3,25.0,7.2250,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1
25,3,38.0,31.3875,6,0,0,1,0,0,1,0,0,0,0,0,0,0,1


**construct knn model**

In [4]:
model = Pipeline([
    ("scaler", MinMaxScaler()),
    ("knn", KNeighborsClassifier(n_neighbors=3))
])

**fit model**

In [5]:
model.fit(X_train, y_train)

Pipeline(memory=None,
         steps=[('scaler', MinMaxScaler(copy=True, feature_range=(0, 1))),
                ('knn',
                 KNeighborsClassifier(algorithm='auto', leaf_size=30,
                                      metric='minkowski', metric_params=None,
                                      n_jobs=None, n_neighbors=3, p=2,
                                      weights='uniform'))],
         verbose=False)

In [6]:
model.score(X_test, y_test)

0.8356164383561644

**setup explainer**

In [7]:
predict = lambda x: model.predict_proba(x)

In [8]:
explainer = AnchorTabular(predict, feature_names=X_train.columns.tolist(), seed=42)

In [9]:
explainer.fit(X_test.values)

AnchorTabular(meta={
  'name': 'AnchorTabular',
  'type': ['blackbox'],
  'explanations': ['local'],
  'params': {'disc_perc': (25, 50, 75), 'seed': 42}}
)

**Try to explain**

In [12]:
df_val.head()

Unnamed: 0,Survived,Pclass,Age,Fare,Familiy_Size,Sex=male,Embarked=Q,Embarked=S,Title=rareTitle,marital_status=no,marital_status=yes,Deck=B,Deck=C,Deck=D,Deck=E,Deck=F,Deck=G,Deck=T,Deck=unknown
290,1,1,26.0,78.85,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1
485,0,3,25.0,25.4667,4,0,0,1,0,1,0,0,0,0,0,0,0,0,1
682,0,3,20.0,9.225,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1
877,0,3,19.0,7.8958,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1
141,1,3,22.0,7.75,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1


In [17]:
explaination = explainer.explain(X_val.values[0,:], delta=0.1, threshold=0.95, beam_size=1)

In [14]:
model.predict(X_val)

array([1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1,
       0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1])

In [19]:
explaination

Explanation(meta={
  'name': 'AnchorTabular',
  'type': ['blackbox'],
  'explanations': ['local'],
  'params': {
              'seed': 42,
              'disc_perc': (25, 50, 75),
              'threshold': 0.95,
              'delta': 0.1,
              'tau': 0.15,
              'batch_size': 100,
              'coverage_samples': 10000,
              'beam_size': 1,
              'stop_on_first': False,
              'max_anchor_size': None,
              'min_samples_start': 100,
              'n_covered_ex': 10,
              'binary_cache_size': 10000,
              'cache_margin': 1000,
              'verbose': False,
              'verbose_every': 1,
              'kwargs': {}}
            }
, data={
  'anchor': ['Fare > 32.46', 'Sex=male <= 0.00', 'Pclass <= 1.00'],
  'precision': 0.9829268292682927,
  'coverage': 0.2808219178082192,
  'raw': {
           'feature': [2, 4, 0],
           'mean': [0.5633754697642638, 0.8709377901578459, 0.9829268292682927],
           'precisio