# **Anchors on all requirement**

In [1]:
from __future__ import print_function
import numpy as np
np.random.seed(1)
import sys
import sklearn
import sklearn.ensemble
from sklearn.metrics import accuracy_score
%load_ext autoreload
%autoreload 2
from anchor import utils
from anchor import anchor_tabular
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.neural_network import MLPClassifier

## Definition of useful data-wrangling functions

Function to separate the name of the feature from the ranges

In [2]:
def get_anchor(a):
    quoted_part = a.split("'")[1]
    rest = a.replace(f"'{quoted_part}'", '').replace("b", '').strip()

    return quoted_part, rest

Function creating the intervals

In [3]:
import re
from math import inf

def parse_range(expr: str):
    expr = expr.strip().replace(" ", "")
    
    patterns = [
        (r"^=(\-?\d+(\.\d+)?)$", 'equals'),
        (r"^(>=|>)\s*(-?\d+(\.\d+)?)$", 'lower'),
        (r"^(<=|<)\s*(-?\d+(\.\d+)?)$", 'upper'),
        (r"^(-?\d+(\.\d+)?)(<=|<){1,2}(<=|<)(-?\d+(\.\d+)?)$", 'between'),
        (r"^(-?\d+(\.\d+)?)(>=|>){1,2}(>=|>)(-?\d+(\.\d+)?)$", 'reverse_between'),
    ]
    
    for pattern, kind in patterns:
        match = re.match(pattern, expr)
        if match:
            if kind == 'equals':
                num = float(match.group(1))
                return (num, num, True, True)
            elif kind == 'lower':
                op, num = match.group(1), float(match.group(2))
                return (
                    num,
                    inf,
                    op == '>=',
                    False
                )
            elif kind == 'upper':
                op, num = match.group(1), float(match.group(2))
                return (
                    -inf,
                    num,
                    False,
                    op == '<='
                )
            elif kind == 'between':
                low = float(match.group(1))
                op1 = match.group(3)
                op2 = match.group(4)
                high = float(match.group(5))
                return (
                    low,
                    high,
                    op1 == '<=',
                    op2 == '<='
                )
            elif kind == 'reverse_between':
                high = float(match.group(1))
                op1 = match.group(3)
                op2 = match.group(4)
                low = float(match.group(5))
                return (
                    low,
                    high,
                    op2 == '>=',
                    op1 == '>='
                )

    raise ValueError(f"Unrecognized format: {expr}")

Function intersecting two given intervals

In [4]:
from typing import Optional, Tuple

def intersect(
    a: Tuple[float, float, bool, bool],
    b: Tuple[float, float, bool, bool]
) -> Optional[Tuple[float, float, bool, bool]]:
    
    a_low, a_high, a_li, a_ui = a
    b_low, b_high, b_li, b_ui = b

    # Compute max of lower bounds
    if a_low > b_low:
        low, li = a_low, a_li
    elif a_low < b_low:
        low, li = b_low, b_li
    else:
        low = a_low
        li = a_li and b_li

    # Compute min of upper bounds
    if a_high < b_high:
        high, ui = a_high, a_ui
    elif a_high > b_high:
        high, ui = b_high, b_ui
    else:
        high = a_high
        ui = a_ui and b_ui

    # Check for empty intersection
    if low > high:
        return None
    if low == high and not (li and ui):
        return None

    return (low, high, li, ui)

Function that returns the truth value of a num (val) being inside a given interval

In [5]:
def inside(val, interval):
    low, high, li, ui = interval
    if li and ui:
        return low <= val <= high
    elif li and not ui:
        return low <= val < high
    elif not li and ui:
        return low < val <= high
    else:
        return low < val < high

Function to classify an input using anchors

In [6]:
def classify_w_anchor(input, thresholds, feature_names):
    out = np.zeros(input.shape[0])
    
    for i in range(input.shape[0]):
        for j in range(len(thresholds)):
            flag = True
            out[i] = 1
            for nk,k in enumerate(feature_names):
                if k in thresholds[j]:
                    if not (inside(input[i,nk], thresholds[j][k])):
                        flag = False
                        out[i] = 0
                        break
            if flag:
                break
            else:
                flag = True
        
    return out

## DataFrame Preparation

In [7]:
#meta parameters
train_percentage = 80
val_percentage = 20

req_names = ['req_0', 'req_1', 'req_2', 'req_3']
req_number = len(req_names)
feature_names = ['cruise speed','image resolution','illuminance','controls responsiveness','power','smoke intensity','obstacle size','obstacle distance','firm obstacle']
feature_number = len(feature_names)

training_dataset = '../datasets/dataset5000.csv'

# Load the dataset
df = pd.read_csv(training_dataset)
n_samples = df.shape[0]
print("Number of samples: ", n_samples)

#Split 80 20 the training dataset in training and validation to have more similar data
indices = np.arange(0,n_samples)
np.random.seed(1234)
indices = np.random.permutation(indices)

training_indices = indices[0:int(n_samples*train_percentage/100)]
validation_indices = indices[int(n_samples*train_percentage/100):]

training_df = df.iloc[training_indices]
validation_df = df.iloc[validation_indices]
print('Training dataset size: ', training_df.shape)
print('Validation dataset size: ', validation_df.shape)

#select the samples that have all the requirements satisfied both in training and validation
# and drop the requirements columns
all_true_training = training_df[
    (training_df['req_0'] == 1) &
    (training_df['req_1'] == 1) &
    (training_df['req_2'] == 1) &
    (training_df['req_3'] == 1)
].drop(columns=req_names)

all_true_validation = validation_df[
    (validation_df['req_0'] == 1) &
    (validation_df['req_1'] == 1) &
    (validation_df['req_2'] == 1) &
    (validation_df['req_3'] == 1)
].drop(columns=req_names)

print('Training samples with all requirements satisfied: ', all_true_training.shape)
print('Validation samples with all requirements satisfied: ', all_true_validation.shape)

#select the samples that have one specific requirement satisfied
req_true_training = {}
for r in req_names:
    req_true_training[r] = training_df[training_df[r] == 1].drop(columns=req_names)
    print('Training samples with {} satisfied: '.format(r), req_true_training[r].shape)

req_true_validation = {}
for r in req_names:
    req_true_validation[r] = validation_df[validation_df[r] == 1].drop(columns=req_names)
    print('Validation samples with {} satisfied: '.format(r), req_true_validation[r].shape)

#create a csv with the new training data and save it
training_df.to_csv('../datasets/training_dataset.csv', index=False)
validation_df.to_csv('../datasets/validation_dataset.csv', index=False)

Number of samples:  5000
Training dataset size:  (4000, 13)
Validation dataset size:  (1000, 13)
Training samples with all requirements satisfied:  (156, 9)
Validation samples with all requirements satisfied:  (49, 9)
Training samples with req_0 satisfied:  (1382, 9)
Training samples with req_1 satisfied:  (723, 9)
Training samples with req_2 satisfied:  (908, 9)
Training samples with req_3 satisfied:  (1041, 9)
Validation samples with req_0 satisfied:  (342, 9)
Validation samples with req_1 satisfied:  (172, 9)
Validation samples with req_2 satisfied:  (235, 9)
Validation samples with req_3 satisfied:  (261, 9)


In [8]:
datasets = [] #will contain the datasets as needed by the anchor library
feature_to_use = [i for i in range(feature_number)] #contains the range of features to use
true_from_anchors_df = {}

for i,r in enumerate(req_names):
    #we load the dataset in anchors
    datasets.append(\
        utils.load_csv_dataset(\
            training_dataset, feature_number+i,\
            features_to_use=feature_to_use,\
            categorical_features=None))
    
    true_from_anchors_df[r] = np.nonzero(datasets[i].labels_train)[0]
    print('Training samples with {} satisfied: '.format(r), true_from_anchors_df[r].shape)


Training samples with req_0 satisfied:  (1365,)
Training samples with req_1 satisfied:  (725,)
Training samples with req_2 satisfied:  (903,)
Training samples with req_3 satisfied:  (1029,)


In [9]:
training_dataset = '../datasets/training_dataset.csv'
validation_dataset = '../datasets/validation_dataset.csv'

## Learning Phase

Create a model for each requirement and train it.

Initialize the anchor explainer.

In [10]:

models = [] #will contain the models (one per requirement)

explainer = []

# explanations = np.zeros((req_number, all_true_training.shape[0]), dtype=object) #will contain the explanations (objects)
# exp_txt = [] #will contain the textual explanations its structure is a matrix (list of lists) where each row corresponds to a requirement 
#              #and each column corresponds to the explanation for the corresponding row in all_true_training_dataset


for i in range(req_number):
    print(f"{i} out of {req_number-1}")
   
    models.append(\
            sklearn.ensemble.GradientBoostingClassifier(random_state=1234))
    models[i].fit(datasets[i].train, datasets[i].labels_train)
    
    #initialize the explainer
    explainer.append(anchor_tabular.AnchorTabularExplainer(
        datasets[i].class_names, #it maps the 0 and 1 in the dataset's requirements to the class names
        datasets[i].feature_names,
        datasets[i].train,
        datasets[i].categorical_names))

0 out of 3
1 out of 3
2 out of 3
3 out of 3


Accuracy of the trained models

In [11]:
for i in range(req_number):
    print(f"Model {i+1} training accuracy: {accuracy_score(datasets[i].labels_train, models[i].predict(datasets[i].train)):.4f}")

Model 1 training accuracy: 0.9390
Model 2 training accuracy: 0.9035
Model 3 training accuracy: 0.9437
Model 4 training accuracy: 0.9293


In [12]:
for i, req in enumerate(req_names):
    print(f"___________Requirement {i+1}: {req}___________")
    output = models[i].predict(datasets[i].train)

    #obtain the indices of the samples that have the requirement satisfied (truly in the dataset)
    real_values_single_req = np.where(datasets[i].labels_train == 1)[0]

    if(i == 0):
        final = np.where(output == 1)[0]
        real_values = real_values_single_req
    else:
        final = np.intersect1d(final, np.where(output == 1)[0]) 
        real_values = np.intersect1d(real_values, real_values_single_req)


positively_classified = final
print(f"Number of samples with all requirements satisfied (according to model): {positively_classified.shape[0]}")

print(f"Number of samples with all requirements satisfied (real data): {real_values.shape[0]}")
#calculate false positives
f_p = positively_classified.shape[0]- np.intersect1d(real_values, positively_classified).shape[0]
print(f"Number of false positives from model: {f_p}")
#calculate the missclassified real positive
m_r_p = real_values.shape[0] - np.intersect1d(real_values, positively_classified).shape[0]
print(f"Number of missclassified real positives: {m_r_p}")

___________Requirement 1: req_0___________
___________Requirement 2: req_1___________
___________Requirement 3: req_2___________
___________Requirement 4: req_3___________
Number of samples with all requirements satisfied (according to model): 21
Number of samples with all requirements satisfied (real data): 166
Number of false positives from model: 3
Number of missclassified real positives: 148


Now we will find all points in the dataset that have not satisfied each requirement.

In [13]:
for i, req in enumerate(req_names):
    print(f"___________Requirement {i+1}: {req}___________")
    output = models[i].predict(datasets[i].train)

    #obtain the indices of the samples that have the requirement satisfied (truly in the dataset)
    real_values_single_req = datasets[i].labels_train

    if(i == 0):
        final = output
        real_values = real_values_single_req
    else:
        final *= final
        real_values *= real_values_single_req

negatively_classified = np.where(final == 0)[0]
true_negative = np.where(real_values == 0)[0]

print(f"Number of samples with all requirements satisfied (according to model): {negatively_classified.shape[0]}")
print(f"Number of samples with all requirements satisfied (real data): {true_negative.shape[0]}")
#calculate false negatives
f_n = negatively_classified.shape[0]- np.intersect1d(true_negative, negatively_classified).shape[0]
print(f"Number of false negatives from model: {f_n}")
#calculate the missclassified real negative
m_r_n = true_negative.shape[0] - np.intersect1d(true_negative, negatively_classified).shape[0]
print(f"Number of missclassified real negatives: {m_r_n}")

___________Requirement 1: req_0___________
___________Requirement 2: req_1___________
___________Requirement 3: req_2___________
___________Requirement 4: req_3___________
Number of samples with all requirements satisfied (according to model): 2697
Number of samples with all requirements satisfied (real data): 3834
Number of false negatives from model: 6
Number of missclassified real negatives: 1143


## Explain the model using Anchor

In [14]:
explanations = []

for j, p_sample in enumerate(positively_classified):
    intersected_exp = {}
    for i in range(req_number):
        #get the sample
        sample = datasets[i].train[p_sample]
        #explain the sample
        exp = explainer[i].explain_instance(sample, models[i].predict, threshold=0.95)
        #get the textual explanation
        exp = exp.names()
        #transform the textual explanations in an interval
        for boundings in exp:
            quoted, rest = get_anchor(boundings)            
            if(quoted not in intersected_exp):
                intersected_exp[quoted] = parse_range(rest)
            else:
                intersected_exp[quoted] = intersect(intersected_exp[quoted], parse_range(rest))

    #prepare the data structure
    explanations.append(intersected_exp)

In [72]:
datasets[i].train[0]

array([61.2058, 53.6657, 78.9786, 96.9228, 82.    , 40.3708, 72.7148,
       98.1789,  1.    ])

Let's verify that the data structure is correctly built

In [15]:
print(len(explanations) == positively_classified.shape[0])

True


In [16]:
explanations

[{'firm obstacle': (1.0, 1.0, True, True),
  'smoke intensity': (-inf, 23.42, False, True),
  'illuminance': (75.91, inf, False, False),
  'cruise speed': (-inf, 25.21, False, True),
  'image resolution': (75.24, inf, False, False),
  'obstacle size': (26.74, 50.25, False, True),
  'power': (51.0, inf, False, False),
  'controls responsiveness': (73.9, inf, False, False),
  'obstacle distance': (25.34, 49.94, False, True)},
 {'firm obstacle': (1.0, 1.0, True, True),
  'image resolution': (75.24, inf, False, False),
  'smoke intensity': (-inf, 48.87, False, True),
  'cruise speed': (-inf, 25.21, False, True),
  'illuminance': (75.91, inf, False, False),
  'obstacle size': (74.61, inf, False, False),
  'controls responsiveness': (73.9, inf, False, False),
  'power': (25.0, 76.0, False, True),
  'obstacle distance': (25.34, inf, False, False)},
 {'firm obstacle': (1.0, 1.0, True, True),
  'image resolution': (75.24, inf, False, False),
  'illuminance': (75.91, inf, False, False),
  'smoke

## Reordering of anchors' features

In [80]:
feature_names
for exp in explanations:
    exp_reordered = {}
    for k in feature_names:
        if k in exp:
            exp_reordered[k] = exp[k]
    exp = exp_reordered

In [81]:
print(explanations)

[{'firm obstacle': (1.0, 1.0, True, True), 'smoke intensity': (-inf, 23.42, False, True), 'illuminance': (75.91, inf, False, False), 'cruise speed': (-inf, 25.21, False, True), 'image resolution': (75.24, inf, False, False), 'obstacle size': (26.74, 50.25, False, True), 'power': (51.0, inf, False, False), 'controls responsiveness': (73.9, inf, False, False), 'obstacle distance': (25.34, 49.94, False, True)}, {'firm obstacle': (1.0, 1.0, True, True), 'image resolution': (75.24, inf, False, False), 'smoke intensity': (-inf, 48.87, False, True), 'cruise speed': (-inf, 25.21, False, True), 'illuminance': (75.91, inf, False, False), 'obstacle size': (74.61, inf, False, False), 'controls responsiveness': (73.9, inf, False, False), 'power': (25.0, 76.0, False, True), 'obstacle distance': (25.34, inf, False, False)}, {'firm obstacle': (1.0, 1.0, True, True), 'image resolution': (75.24, inf, False, False), 'illuminance': (75.91, inf, False, False), 'smoke intensity': (-inf, 23.42, False, True),

### Anchors for negative points

In [17]:
# neg_explanations = []

# for j, p_sample in enumerate(negatively_classified):
#     final_exp = {}
#     for i in range(req_number):
#         #get the sample
#         sample = datasets[i].train[p_sample]
#         #explain the sample
#         exp = explainer[i].explain_instance(sample, models[i].predict, threshold=0.95)
#         #get the textual explanation
#         exp = exp.names()
#         #transform the textual explanations in an interval
#         for boundings in exp:
#             quoted, rest = get_anchor(boundings)            
#             final_exp[quoted] = parse_range(rest)
#         #update the 
#         neg_explanations.append(final_exp)

In [18]:
# print(len(neg_explanations) == negatively_classified.shape[0]*req_number)

In [19]:
# neg_explanations

Now a point will be classified as positive if it's simultaniously inside the area defined by explanations and not inside the are of negative_explanations

# Validation

Verify if the function works properly by submitting the positively classified samples in the training dataset, we should obtain that all the input are positively classified in this case.

In [82]:
idx = positively_classified

samples = datasets[0].train[idx]
#classify the samples with the anchor function
sat = classify_w_anchor(samples, explanations, feature_names)

#obtain the indices of the samples that have the requirement satisfied
anchors_positives = np.where(sat != 0)[0]
print(f"Number of samples with {req} classified as satisfied: {len(anchors_positives)}.\
      \nIf this number is {len(idx)} it means that the anchor function classifies correctly the samples classified true by the model.\
      \nIn this case it is {len(idx) == len(anchors_positives)}")

Number of samples with req_3 classified as satisfied: 21.      
If this number is 21 it means that the anchor function classifies correctly the samples classified true by the model.      
In this case it is True


Validate the anchors classifier on the validation set

In [83]:
val_set = validation_df.values
print(val_set.shape)

(1000, 13)


In [84]:
val_set[0]

array([64.2909, 16.3241, 65.5295, 55.7508, 25.0, 28.5735, 6.1418, 89.258,
       0.0, False, True, False, False], dtype=object)

In [85]:
#obtain the samples
samples = val_set[:, 0:feature_number]
print(samples.shape)
for r, req in enumerate(req_names):
    print(f"___________Requirement {req}___________")
    
    #classify the samples with the model
    tmp_output = models[r].predict(samples)
    if(r == 0):
        output = tmp_output
    else:
        output *= tmp_output

#classify the samples with the anchor function
sat = classify_w_anchor(samples, explanations, feature_names)
    
#obtain the indices of the samples that are classified as true by the model
models_positives = np.where(output != 0)[0]
    
#obtain the indices of the samples that are classified as true by anchors
anchors_positives = np.where(sat != 0)[0]

#obtain the samples classified correctly by anchors w.r.t. the model
correctly_classified = np.intersect1d(models_positives, anchors_positives)

print(f"Number of samples with all reqs classified as satisfied by the model: {len(models_positives)}")
print(f"Number of samples with all reqs classified as satisfied by the anchor function: {len(anchors_positives)}")
print(f"Number of samples with all reqs classified as satisfied by the model and the anchor function: {len(correctly_classified)}")
print("\n")
print(f"Number of samples with all reqs classified as satisfied: {len(anchors_positives)}.\
        \nIf this number is {len(models_positives)} it means that the anchor function classifies correctly the samples classified true by the model.\
        \nIn this case it is {len(models_positives) == len(anchors_positives)}")

#calculate the false positives
f_p = anchors_positives.shape[0] - correctly_classified.shape[0]
print(f"Number of false positives: {f_p}, ratio (over anchor_positives): {f_p/anchors_positives.shape[0]}")

#calculate the missclassified real positive
m_r_p = models_positives.shape[0] - correctly_classified.shape[0]
print(f"Number of missclassified real positives: {m_r_p}, ratio (over model_positives): {m_r_p/models_positives.shape[0]}")
print("\n")

(1000, 9)
___________Requirement req_0___________
___________Requirement req_1___________
___________Requirement req_2___________
___________Requirement req_3___________
Number of samples with all reqs classified as satisfied by the model: 7
Number of samples with all reqs classified as satisfied by the anchor function: 6
Number of samples with all reqs classified as satisfied by the model and the anchor function: 5


Number of samples with all reqs classified as satisfied: 6.        
If this number is 7 it means that the anchor function classifies correctly the samples classified true by the model.        
In this case it is False
Number of false positives: 1, ratio (over anchor_positives): 0.16666666666666666
Number of missclassified real positives: 2, ratio (over model_positives): 0.2857142857142857




In [86]:
observable_feature_names = feature_names[3:7]

s = samples[:, 3:7]
print(samples.shape)
print(s.shape)

cl = classify_w_anchor(s, explanations, observable_feature_names)

print(cl.shape)


(1000, 9)
(1000, 4)
(1000,)


In [87]:
print(np.where(cl != 0)[0].shape)

(219,)


# Validation using also negative area

In [88]:
##obtain the negatively classified inidces
#idx = negatively_classified

#samples = datasets[0].train[idx]
##classify the samples with the anchor function
#sat = classify_w_anchor(samples, neg_explanations, feature_names)

##obtain the indices of the samples that have the requirement satisfied
#anchors_negatives = np.where(sat != 0)[0]
#print(f"Number of samples with {req} classified as satisfied: {len(anchors_negatives)}.\
#      \nIf this number is {len(idx)} it means that the anchor function classifies correctly the samples classified true by the model.\
#      \nIn this case it is {len(idx) == len(anchors_negatives)}")

In [89]:
##obtain the samples
#samples = val_set[:, 0:feature_number]
#
#for r, req in enumerate(req_names):
#    print(f"___________Requirement {req}___________")
#    
#    #classify the samples with the model
#    tmp_output = models[r].predict(samples)
#    if(r == 0):
#        output = tmp_output
#    else:
#        output *= tmp_output
#
##classify the positive samples with the anchor function
#pos_anch_classif = classify_w_anchor(samples, explanations, feature_names)
#
##classify the negative samples with the anchor function
#neg_anch_classif = classify_w_anchor(samples, neg_explanations, feature_names)
#
#final_anch = pos_anch_classif * neg_anch_classif
#
##obtain the indices of the samples that are classified as true by the model
#models_positives = np.where(output != 0)[0]
#    
##obtain the indices of the samples that are classified as true by anchors
#anchors_positives = np.where(final_anch != 0)[0]
#
##obtain the samples classified correctly by anchors w.r.t. the model
#correctly_classified = np.intersect1d(models_positives, anchors_positives)
#
#print(f"Number of samples with all reqs classified as satisfied by the model: {len(models_positives)}")
#print(f"Number of samples with all reqs classified as satisfied by the anchor function: {len(anchors_positives)}")
#print(f"Number of samples with all reqs classified as satisfied by the model and the anchor function: {len(correctly_classified)}")
#print("\n")
#print(f"Number of samples with all reqs classified as satisfied: {len(anchors_positives)}.\
#        \nIf this number is {len(models_positives)} it means that the anchor function classifies correctly the samples classified true by the model.\
#        \nIn this case it is {len(models_positives) == len(anchors_positives)}")
#
##calculate the false positives
#f_p = anchors_positives.shape[0] - correctly_classified.shape[0]
#print(f"Number of false positives: {f_p}, ratio (over anchor_positives): {f_p/anchors_positives.shape[0]}")
#
##calculate the missclassified real positive
#m_r_p = models_positives.shape[0] - correctly_classified.shape[0]
#print(f"Number of missclassified real positives: {m_r_p}, ratio (over model_positives): {m_r_p/models_positives.shape[0]}")
#print("\n")

## Coverage over the non controllable features

In [90]:
from anchors_predictor import AnchorsPredictor

# Create the AnchorsPredictor object
predictor = AnchorsPredictor(anchors=explanations)

feature_names_NC = feature_names[3:7]
coverage = predictor.coverage(explanations, feature_names_NC)

In [91]:
print(f"Coverage of the anchor function: {coverage*100:.2f}%")

Coverage of the anchor function: 41.81%


In [92]:
predictor.classify(samples, explanations, feature_names_NC)

array([0., 1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 1., 0., 1., 0., 0.,
       0., 0., 1., 1., 0., 0., 0., 0., 1., 0., 1., 0., 0., 1., 0., 0., 0.,
       1., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 1.,
       0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.,
       0., 0., 0., 0., 1., 1., 1., 0., 1., 0., 0., 1., 1., 0., 0., 0., 0.,
       1., 0., 0., 1., 0., 1., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 1., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.,
       1., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 1., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 1., 1., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 0.,
       1., 1., 0., 0., 0.

## Augment data

In [93]:
idx = positively_classified

samples = datasets[0].train[idx]


In [94]:
explanations_aug = predictor.augment_coverage(datasets, explanations, feature_names_NC, np.zeros(len(feature_names_NC)), np.array([100 for _ in range(len(feature_names_NC))]), models, samples, req_names, explainer, req_number)
print(f"Coverage of the anchor function after augmentation: {predictor.coverage(explanations_aug, feature_names_NC) * 100:.2f}%")

4263
___________Requirement req_0___________
___________Requirement req_1___________
___________Requirement req_2___________
___________Requirement req_3___________
positive_merged_points: (2338, 9)
p_sample: [20.0201 82.9284 79.6514 90.5009 20.     20.     20.     20.      1.    ]
p_sample: [ 8.8596 89.0535 79.1326 89.8247 20.     20.     20.     20.      1.    ]
p_sample: [ 0.2506 76.5083 92.9881 39.1324 20.     20.     20.     20.      1.    ]
p_sample: [18.2964 89.6387 81.6923 96.2027 20.     20.     20.     20.      1.    ]
p_sample: [71.9996 99.8804 68.7445 55.082  20.     20.     20.     20.      1.    ]
p_sample: [27.5392 98.4133 76.1458 97.6327 20.     20.     20.     20.      1.    ]
p_sample: [25.1575 96.1266 85.6746 90.7216 20.     20.     20.     20.      1.    ]
p_sample: [22.9534 97.4496 98.9064 70.6659 20.     20.     20.     20.      1.    ]
p_sample: [16.1259 99.746  77.9488 67.2042 20.     20.     20.     20.      1.    ]
p_sample: [ 4.3656 86.2519 64.84   92.4682 20

KeyboardInterrupt: 

In [None]:
print(predictor.coverage(explanations, feature_names_NC))

0.41810389785521224


# Evaluate sample

In [95]:
print(explanations[10])

{'firm obstacle': (1.0, 1.0, True, True), 'image resolution': (75.24, inf, False, False), 'cruise speed': (-inf, 25.21, False, True), 'illuminance': (75.91, inf, False, False), 'smoke intensity': (-inf, 48.87, False, True), 'obstacle size': (50.25, inf, False, False), 'controls responsiveness': (50.29, inf, False, False), 'power': (25.0, inf, False, False), 'obstacle distance': (25.34, 74.78, False, True)}


In [96]:
sample = np.array([10, 80,50,35, 20,10,12,92,1])
print(feature_names)
sample_inside_10 = np.array([10,900,90,70,50,30,70,50,1])
print(sample_inside_10)

['cruise speed', 'image resolution', 'illuminance', 'controls responsiveness', 'power', 'smoke intensity', 'obstacle size', 'obstacle distance', 'firm obstacle']
[ 10 900  90  70  50  30  70  50   1]


In [100]:
min_dist_c, min_dist_o = predictor.evaluate_sample(sample_inside_10, explanations, feature_names[0:3], feature_names[3:7])

Sample:  [ 10 900  90  70  50  30  70  50   1]
x:  [ 10 900  90  70  50  30  70  50   1]
explanations_table:  24
i:  0
a:  0
b:  25.21
x[j]:  10
a:  75.24
b:  100
x[j]:  900
contr_f_dist[i]:  640000.0
a:  75.91
b:  100
x[j]:  90
min_dist_controllable:  640000.0
min_dist_index_controllable:  0
finito controllable features
contr_f_dist:  [640000.      0.      0.      0.      0.      0.      0.      0.      0.
      0.      0.      0.      0.      0.      0.      0.      0.      0.
      0.      0.      0.      0.      0.      0.]
f_name controls responsiveness {'firm obstacle': (1.0, 1.0, True, True), 'smoke intensity': (-inf, 23.42, False, True), 'illuminance': (75.91, inf, False, False), 'cruise speed': (-inf, 25.21, False, True), 'image resolution': (75.24, inf, False, False), 'obstacle size': (26.74, 50.25, False, True), 'power': (51.0, inf, False, False), 'controls responsiveness': (73.9, inf, False, False), 'obstacle distance': (25.34, 49.94, False, True)}
a obs:  73.9
b obs:  100


KeyError: 'power'