# Anchors

In [1]:
pip install anchor-exp

Note: you may need to restart the kernel to use updated packages.


In [2]:
from __future__ import print_function
import numpy as np
np.random.seed(1)
import sys
import sklearn
import sklearn.ensemble
%load_ext autoreload
%autoreload 2
from anchor import utils
from anchor import anchor_tabular
import pandas as pd
import matplotlib.pyplot as plt

In [3]:
dataset_folder = 'datasets/dataset500.csv'
df = pd.read_csv(dataset_folder)

all_true_dataset = df[(df['req_0'] == 1) & (df['req_1'] == 1) & (df['req_2'] == 1) & (df['req_3'] == 1)]
all_true_dataset = all_true_dataset.drop(columns=['req_0','req_1', 'req_2', 'req_3'])
print(all_true_dataset.shape)

(12, 9)


In [4]:
df0 = df.drop(columns=['req_1', 'req_2', 'req_3'])
df0.to_csv('datasets/dataset500req0.csv', index=False)

df1 = df.drop(columns=['req_0', 'req_2', 'req_3'])
df1.to_csv('datasets/dataset500req1.csv', index=False)

df2 = df.drop(columns=['req_0', 'req_1', 'req_3'])
df2.to_csv('datasets/dataset500req2.csv', index=False)

df3 = df.drop(columns=['req_0', 'req_1', 'req_2'])
df3.to_csv('datasets/dataset500req3.csv', index=False)

In [5]:
datasets = []
dataframes = []
for i in range(4):
    dataset_folder = 'datasets/dataset500req' + str(i) + '.csv'
    datasets.append(utils.load_csv_dataset(dataset_folder, 9))

    c = sklearn.ensemble.RandomForestClassifier(n_estimators=50, n_jobs=5)
    c.fit(datasets[i].train, datasets[i].labels_train)
    print('Train', sklearn.metrics.accuracy_score(datasets[i].labels_train, c.predict(datasets[i].train)))
    print('Test', sklearn.metrics.accuracy_score(datasets[i].labels_test, c.predict(datasets[i].test)))

    explainer = anchor_tabular.AnchorTabularExplainer(
        datasets[i].class_names, #it maps the 0 and 1 in the dataset's requirements to the class names
        datasets[i].feature_names,
        datasets[i].train,
        datasets[i].categorical_names)
    
    names = []
    for j in range(all_true_dataset.shape[0]):
        
        #do a csv with a prediction for each sample
        #print(all_true_dataset.iloc[i].values.reshape(1, -1)[0])
        with open('datasets/anchorsReq' +str(i) +'.csv', 'a') as f:
            f.write('Prediction: %s\n' % (explainer.class_names[c.predict(all_true_dataset.iloc[j].values.reshape(1, -1))[0]]))
            exp = explainer.explain_instance(all_true_dataset.iloc[j].values.reshape(1, -1), c.predict, threshold=0.95)
            f.write('Anchor: %s\n' % (' AND '.join(exp.names())))
            #print('Anchor: %s' % (' AND '.join(exp.names())))
            f.write('Precision: %.2f\n' % exp.precision())
            f.write('Coverage: %.2f\n' % exp.coverage())
            f.write('\n')
            names.append(exp.names())
    
    dataframes.append(names)
    
    print(dataframes[i])




Train 1.0
Test 0.98
[["b'firm obstacle' = 1.0", "b'power' > 23.00", "b'cruise speed' <= 71.28", "b'image resolution' > 23.99", "b'smoke intensity' <= 49.24", "b'obstacle distance' <= 43.80"], ["b'firm obstacle' = 1.0", "b'image resolution' > 75.39", "b'cruise speed' <= 44.79", "b'power' > 23.00", "b'illuminance' > 51.56", "b'smoke intensity' <= 49.24"], ["b'firm obstacle' = 1.0", "b'power' > 23.00", "b'cruise speed' <= 44.79", "b'smoke intensity' <= 49.24", "b'image resolution' > 23.99"], ["b'firm obstacle' = 1.0", "b'cruise speed' <= 22.33", "b'power' > 23.00", "b'smoke intensity' <= 74.25", "b'image resolution' > 23.99"], ["b'firm obstacle' = 1.0", "b'image resolution' > 75.39", "b'cruise speed' <= 44.79", "b'power' > 23.00", "b'smoke intensity' <= 49.24", "b'illuminance' > 27.51"], ["b'firm obstacle' = 1.0", "b'smoke intensity' <= 23.26", "b'image resolution' > 51.33", "27.51 < b'illuminance' <= 75.57"], ["b'firm obstacle' = 1.0", "b'image resolution' > 75.39", "b'cruise speed' <= 4

It prints the rules that allow us to keep the sample's prediction, in this case False for requirement 2, and the precision and coverage with which these rules hold

The coverage tells us how much of the dataset we have explained with this rule.

In [6]:
lista = []
for i in range(df.shape[0]):
    dat = df.iloc[i]
    if((dat['firm obstacle'] == 1 and dat['power']>23.00 and dat['cruise speed']<=71.28 and dat['smoke intensity']<=49.24 and dat['image resolution']>23.99 and dat['obstacle distance']<=43.80)):
        lista.append(dat)
        print(dat['firm obstacle'], dat['power'], dat['cruise speed'], dat['smoke intensity'], dat['image resolution'], dat['obstacle distance'], dat['req_0'], dat['req_1'], dat['req_2'], dat['req_3'])

print(len(lista)-12)

1.0 69.0 22.7995 32.0775 47.5676 18.952 True False False True
1.0 72.0 18.0329 9.0105 88.1242 14.3371 True False True True
1.0 88.0 47.0146 11.4562 55.1652 28.0682 True False True True
1.0 72.0 61.9286 35.0041 80.1206 34.0424 True False False False
1.0 38.0 50.9071 45.0738 33.397 15.681 True True True True
1.0 30.0 35.8364 46.5315 49.6464 7.3371 True True True True
1.0 46.0 29.8661 45.8727 50.6223 17.2824 True False True True
1.0 97.0 12.8875 0.9936 49.3559 13.2518 True True True True
1.0 54.0 0.8053 6.5258 82.9626 12.8178 False False False False
1.0 38.0 0.5755 22.1285 61.4105 37.1102 True True False True
1.0 77.0 25.9089 31.7312 93.6499 22.3561 True True True True
1.0 31.0 43.2555 21.008 25.6881 34.9979 True False True True
1.0 80.0 38.3862 26.5053 75.4232 11.7324 True False False True
1.0 89.0 27.4022 15.124 76.5957 16.5379 True False False False
1.0 56.0 27.8734 13.3137 48.469 18.1208 True False False False
1.0 51.0 69.8011 15.0281 81.2153 15.2147 True False True True
1.0 89.0 67.3

In [8]:
# Get test examples where the anchora pplies
#fit_anchor = np.where(np.all(new_dataset.test[:, exp.features()] == new_dataset.test[idx][exp.features()], axis=1))[0]
#print('Anchor test precision: %.2f' % (np.mean(c.predict(new_dataset.test[fit_anchor]) == c.predict(new_dataset.test[idx].reshape(1, -1)))))
#print('Anchor test coverage: %.2f' % (fit_anchor.shape[0] / float(new_dataset.test.shape[0])))