In [None]:
cd ..

In [None]:
from eval import eraser_faithfulness
from explainli.config import AttributionMethods, AttributionConfig, AggregationMethods, ForwardScoringOptions
from explainli.explainli import NLIAttribution

nli_model_name = '<path_to_finetuned_model>/bert-base-multilingual-finetuned-mnli'

## Additional Functions

In [None]:
import json
import torch

def save_outputs(name, comprehensiveness_score, sufficiency_score, all_compre_scores, all_suff_scores, compre_pairs_w_perturbations, suff_pairs_w_perturbations, compre_pred_out, suff_pred_out):
    with open(f"{name}_all_scores_and_perturbations.json", "w") as f:
        data = {'compre_score': str(comprehensiveness_score),
                'suff_score': str(sufficiency_score),
                'all_compre_scores': list(map(lambda x: str(x.item()), all_compre_scores)),
                'all_suff_scores': list(map(lambda x: str(x.item()), all_suff_scores)),
                'compre_perturbations': compre_pairs_w_perturbations,
                'suff_perturbations': suff_pairs_w_perturbations
               }
        json.dump(data, f)
        
        torch.save(compre_pred_out, f"{name}_compre_pred_out.pt")
        torch.save(suff_pred_out, f"{name}_suf_pred_out.pt")
    

# InputxGradient

## wrt Top Prediction with MEAN

In [None]:
attr_config = AttributionConfig(AttributionMethods.InputXGradient, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=False, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.TOP_PREDICTION,
                                             aggregation_method=AggregationMethods.MEAN,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=nli_model_name, config=attr_config)

In [None]:
comprehensiveness_score, sufficiency_score, all_compre_scores, all_suff_scores, compre_pairs_w_perturbations, \
suff_pairs_w_perturbations, compre_pred_out, suff_pred_out =  eraser_faithfulness.evaluate(attribution, 
                                                                                           percentages=[0.01, 0.05, 0.1, 0.2, 0.5],
                                                                                           batch_size=64, attr_batch_size=192, return_all_outputs=True)

In [None]:
print(f"comprehensiveness: {comprehensiveness_score}  sufficiency: {sufficiency_score}")
save_outputs('inputXgradient_tp_mean', comprehensiveness_score, sufficiency_score, all_compre_scores, all_suff_scores, compre_pairs_w_perturbations, suff_pairs_w_perturbations, compre_pred_out, suff_pred_out)

comprehensiveness: 0.2848731577396393  sufficiency: 0.26655709743499756


## wrt Top Prediction with L2

In [None]:
attr_config = AttributionConfig(AttributionMethods.InputXGradient, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=False, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.TOP_PREDICTION,
                                             aggregation_method=AggregationMethods.L2,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=nli_model_name, config=attr_config)

In [None]:
comprehensiveness_score, sufficiency_score, all_compre_scores, all_suff_scores, compre_pairs_w_perturbations, \
suff_pairs_w_perturbations, compre_pred_out, suff_pred_out =  eraser_faithfulness.evaluate(attribution, 
                                                                                           percentages=[0.01, 0.05, 0.1, 0.2, 0.5],
                                                                                           batch_size=64, attr_batch_size=192, return_all_outputs=True)

In [None]:
print(f"comprehensiveness: {comprehensiveness_score}  sufficiency: {sufficiency_score}")
save_outputs('inputXgradient_tp_l2', comprehensiveness_score, sufficiency_score, all_compre_scores, all_suff_scores, compre_pairs_w_perturbations, suff_pairs_w_perturbations, compre_pred_out, suff_pred_out)

comprehensiveness: 0.32222217321395874  sufficiency: 0.23578998446464539


## wrt Loss with MEAN

In [None]:
attr_config = AttributionConfig(AttributionMethods.InputXGradient, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=False, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.LOSS,
                                             aggregation_method=AggregationMethods.MEAN,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=nli_model_name, config=attr_config)

In [None]:
comprehensiveness_score, sufficiency_score, all_compre_scores, all_suff_scores, compre_pairs_w_perturbations, \
suff_pairs_w_perturbations, compre_pred_out, suff_pred_out =  eraser_faithfulness.evaluate(attribution, 
                                                                                           percentages=[0.01, 0.05, 0.1, 0.2, 0.5],
                                                                                           batch_size=64, attr_batch_size=192, return_all_outputs=True)

In [None]:
print(f"comprehensiveness: {comprehensiveness_score}  sufficiency: {sufficiency_score}")
save_outputs('inputXgradient_loss_mean', comprehensiveness_score, sufficiency_score, all_compre_scores, all_suff_scores, compre_pairs_w_perturbations, suff_pairs_w_perturbations, compre_pred_out, suff_pred_out)

comprehensiveness: 0.29643404483795166  sufficiency: 0.24230821430683136


## wrt Loss with L2

In [None]:
attr_config = AttributionConfig(AttributionMethods.InputXGradient, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=False, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.LOSS,
                                             aggregation_method=AggregationMethods.L2,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=nli_model_name, config=attr_config)

In [None]:
comprehensiveness_score, sufficiency_score, all_compre_scores, all_suff_scores, compre_pairs_w_perturbations, \
suff_pairs_w_perturbations, compre_pred_out, suff_pred_out =  eraser_faithfulness.evaluate(attribution, 
                                                                                           percentages=[0.01, 0.05, 0.1, 0.2, 0.5],
                                                                                           batch_size=64, attr_batch_size=192, return_all_outputs=True)

In [None]:
print(f"comprehensiveness: {comprehensiveness_score}  sufficiency: {sufficiency_score}")
save_outputs('inputXgradient_loss_l2', comprehensiveness_score, sufficiency_score, all_compre_scores, all_suff_scores, compre_pairs_w_perturbations, suff_pairs_w_perturbations, compre_pred_out, suff_pred_out)

comprehensiveness: 0.3147645592689514  sufficiency: 0.26130351424217224


# Saliency

## wrt Top Prediction with MEAN

In [None]:
attr_config = AttributionConfig(AttributionMethods.Saliency, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=False, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.TOP_PREDICTION,
                                             aggregation_method=AggregationMethods.MEAN,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=nli_model_name, config=attr_config)

In [None]:
comprehensiveness_score, sufficiency_score, all_compre_scores, all_suff_scores, compre_pairs_w_perturbations, \
suff_pairs_w_perturbations, compre_pred_out, suff_pred_out =  eraser_faithfulness.evaluate(attribution, 
                                                                                           percentages=[0.01, 0.05, 0.1, 0.2, 0.5],
                                                                                           batch_size=64, attr_batch_size=192, return_all_outputs=True)

In [None]:
print(f"comprehensiveness: {comprehensiveness_score}  sufficiency: {sufficiency_score}")
save_outputs('saliency_tp_mean', comprehensiveness_score, sufficiency_score, all_compre_scores, all_suff_scores, compre_pairs_w_perturbations, suff_pairs_w_perturbations, compre_pred_out, suff_pred_out)

comprehensiveness: 0.3138962984085083  sufficiency: 0.2259143590927124


## wrt Top Prediction with L2

In [None]:
attr_config = AttributionConfig(AttributionMethods.Saliency, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=False, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.TOP_PREDICTION,
                                             aggregation_method=AggregationMethods.L2,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=nli_model_name, config=attr_config)

In [None]:
comprehensiveness_score, sufficiency_score, all_compre_scores, all_suff_scores, compre_pairs_w_perturbations, \
suff_pairs_w_perturbations, compre_pred_out, suff_pred_out =  eraser_faithfulness.evaluate(attribution, 
                                                                                           percentages=[0.01, 0.05, 0.1, 0.2, 0.5],
                                                                                           batch_size=64, attr_batch_size=192, return_all_outputs=True)

In [None]:
print(f"comprehensiveness: {comprehensiveness_score}  sufficiency: {sufficiency_score}")
save_outputs('saliency_tp_l2', comprehensiveness_score, sufficiency_score, all_compre_scores, all_suff_scores, compre_pairs_w_perturbations, suff_pairs_w_perturbations, compre_pred_out, suff_pred_out)

comprehensiveness: 0.3098418116569519  sufficiency: 0.23827123641967773


## wrt Loss with MEAN

In [None]:
attr_config = AttributionConfig(AttributionMethods.Saliency, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=False, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.LOSS,
                                             aggregation_method=AggregationMethods.MEAN,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=nli_model_name, config=attr_config)

In [None]:
comprehensiveness_score, sufficiency_score, all_compre_scores, all_suff_scores, compre_pairs_w_perturbations, \
suff_pairs_w_perturbations, compre_pred_out, suff_pred_out =  eraser_faithfulness.evaluate(attribution, 
                                                                                           percentages=[0.01, 0.05, 0.1, 0.2, 0.5],
                                                                                           batch_size=64, attr_batch_size=192, return_all_outputs=True)

In [None]:
print(f"comprehensiveness: {comprehensiveness_score}  sufficiency: {sufficiency_score}")
save_outputs('saliency_loss_mean', comprehensiveness_score, sufficiency_score, all_compre_scores, all_suff_scores, compre_pairs_w_perturbations, suff_pairs_w_perturbations, compre_pred_out, suff_pred_out)

comprehensiveness: 0.3183731436729431  sufficiency: 0.23191587626934052


## wrt Loss with L2

In [None]:
attr_config = AttributionConfig(AttributionMethods.Saliency, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=False, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.LOSS,
                                             aggregation_method=AggregationMethods.L2,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=nli_model_name, config=attr_config)

In [None]:
comprehensiveness_score, sufficiency_score, all_compre_scores, all_suff_scores, compre_pairs_w_perturbations, \
suff_pairs_w_perturbations, compre_pred_out, suff_pred_out =  eraser_faithfulness.evaluate(attribution, 
                                                                                           percentages=[0.01, 0.05, 0.1, 0.2, 0.5],
                                                                                           batch_size=64, attr_batch_size=192, return_all_outputs=True)

In [None]:
print(f"comprehensiveness: {comprehensiveness_score}  sufficiency: {sufficiency_score}")
save_outputs('saliency_loss_mean', comprehensiveness_score, sufficiency_score, all_compre_scores, all_suff_scores, compre_pairs_w_perturbations, suff_pairs_w_perturbations, compre_pred_out, suff_pred_out)

comprehensiveness: 0.3206157982349396  sufficiency: 0.23769353330135345


# Activation 

## wrt Top Prediction with MEAN

In [None]:
attr_config = AttributionConfig(AttributionMethods.Activation, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=False, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.TOP_PREDICTION,
                                             aggregation_method=AggregationMethods.MEAN,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=nli_model_name, config=attr_config)

In [None]:
comprehensiveness_score, sufficiency_score, all_compre_scores, all_suff_scores, compre_pairs_w_perturbations, \
suff_pairs_w_perturbations, compre_pred_out, suff_pred_out =  eraser_faithfulness.evaluate(attribution, 
                                                                                           percentages=[0.01, 0.05, 0.1, 0.2, 0.5],
                                                                                           batch_size=64, attr_batch_size=192, return_all_outputs=True)

In [None]:
print(f"comprehensiveness: {comprehensiveness_score}  sufficiency: {sufficiency_score}")
save_outputs('activation_tp_mean', comprehensiveness_score, sufficiency_score, all_compre_scores, all_suff_scores, compre_pairs_w_perturbations, suff_pairs_w_perturbations, compre_pred_out, suff_pred_out)

comprehensiveness: 0.2402103692293167  sufficiency: 0.17900384962558746


## wrt Top Prediction with L2

In [None]:
attr_config = AttributionConfig(AttributionMethods.Activation, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=False, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.TOP_PREDICTION,
                                             aggregation_method=AggregationMethods.L2,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=nli_model_name, config=attr_config)

In [None]:
comprehensiveness_score, sufficiency_score, all_compre_scores, all_suff_scores, compre_pairs_w_perturbations, \
suff_pairs_w_perturbations, compre_pred_out, suff_pred_out =  eraser_faithfulness.evaluate(attribution, 
                                                                                           percentages=[0.01, 0.05, 0.1, 0.2, 0.5],
                                                                                           batch_size=64, attr_batch_size=192, return_all_outputs=True)

In [None]:
print(f"comprehensiveness: {comprehensiveness_score}  sufficiency: {sufficiency_score}")
save_outputs('activation_tp_l2', comprehensiveness_score, sufficiency_score, all_compre_scores, all_suff_scores, compre_pairs_w_perturbations, suff_pairs_w_perturbations, compre_pred_out, suff_pred_out)

comprehensiveness: 0.30651262402534485  sufficiency: 0.33295711874961853


## wrt Loss with MEAN

In [None]:
attr_config = AttributionConfig(AttributionMethods.Activation, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=False, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.LOSS,
                                             aggregation_method=AggregationMethods.MEAN,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=nli_model_name, config=attr_config)

In [None]:
comprehensiveness_score, sufficiency_score, all_compre_scores, all_suff_scores, compre_pairs_w_perturbations, \
suff_pairs_w_perturbations, compre_pred_out, suff_pred_out =  eraser_faithfulness.evaluate(attribution, 
                                                                                           percentages=[0.01, 0.05, 0.1, 0.2, 0.5],
                                                                                           batch_size=64, attr_batch_size=192, return_all_outputs=True)

In [None]:
print(f"comprehensiveness: {comprehensiveness_score}  sufficiency: {sufficiency_score}")
save_outputs('activation_loss_mean', comprehensiveness_score, sufficiency_score, all_compre_scores, all_suff_scores, compre_pairs_w_perturbations, suff_pairs_w_perturbations, compre_pred_out, suff_pred_out)

comprehensiveness: 0.2402103692293167  sufficiency: 0.17900384962558746


## wrt Loss with L2

In [None]:
attr_config = AttributionConfig(AttributionMethods.Activation, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=False, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.LOSS,
                                             aggregation_method=AggregationMethods.L2,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=nli_model_name, config=attr_config)

In [None]:
comprehensiveness_score, sufficiency_score, all_compre_scores, all_suff_scores, compre_pairs_w_perturbations, \
suff_pairs_w_perturbations, compre_pred_out, suff_pred_out =  eraser_faithfulness.evaluate(attribution, 
                                                                                           percentages=[0.01, 0.05, 0.1, 0.2, 0.5],
                                                                                           batch_size=64, attr_batch_size=192, return_all_outputs=True)

In [None]:
print(f"comprehensiveness: {comprehensiveness_score}  sufficiency: {sufficiency_score}")
save_outputs('activation_loss_l2', comprehensiveness_score, sufficiency_score, all_compre_scores, all_suff_scores, compre_pairs_w_perturbations, suff_pairs_w_perturbations, compre_pred_out, suff_pred_out)

comprehensiveness: 0.30651262402534485  sufficiency: 0.33295711874961853


# GuidedBackprop 

## wrt Top Prediction with MEAN

In [None]:
attr_config = AttributionConfig(AttributionMethods.GuidedBackprop, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=False, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.TOP_PREDICTION,
                                             aggregation_method=AggregationMethods.MEAN,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=nli_model_name, config=attr_config)

In [None]:
comprehensiveness_score, sufficiency_score, all_compre_scores, all_suff_scores, compre_pairs_w_perturbations, \
suff_pairs_w_perturbations, compre_pred_out, suff_pred_out =  eraser_faithfulness.evaluate(attribution, 
                                                                                           percentages=[0.01, 0.05, 0.1, 0.2, 0.5],
                                                                                           batch_size=64, attr_batch_size=192, return_all_outputs=True)

In [None]:
print(f"comprehensiveness: {comprehensiveness_score}  sufficiency: {sufficiency_score}")
save_outputs('guided_bp_tp_mean', comprehensiveness_score, sufficiency_score, all_compre_scores, all_suff_scores, compre_pairs_w_perturbations, suff_pairs_w_perturbations, compre_pred_out, suff_pred_out)

comprehensiveness: 0.2737332880496979  sufficiency: 0.28174301981925964


## wrt Top Prediction with L2

In [None]:
attr_config = AttributionConfig(AttributionMethods.GuidedBackprop, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=False, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.TOP_PREDICTION,
                                             aggregation_method=AggregationMethods.L2,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=nli_model_name, config=attr_config)

In [None]:
comprehensiveness_score, sufficiency_score, all_compre_scores, all_suff_scores, compre_pairs_w_perturbations, \
suff_pairs_w_perturbations, compre_pred_out, suff_pred_out =  eraser_faithfulness.evaluate(attribution, 
                                                                                           percentages=[0.01, 0.05, 0.1, 0.2, 0.5],
                                                                                           batch_size=64, attr_batch_size=192, return_all_outputs=True)

In [None]:
print(f"comprehensiveness: {comprehensiveness_score}  sufficiency: {sufficiency_score}")
save_outputs('guided_bp_tp_l2', comprehensiveness_score, sufficiency_score, all_compre_scores, all_suff_scores, compre_pairs_w_perturbations, suff_pairs_w_perturbations, compre_pred_out, suff_pred_out)

comprehensiveness: 0.3098418116569519  sufficiency: 0.23827123641967773


## wrt Loss with MEAN

In [None]:
attr_config = AttributionConfig(AttributionMethods.GuidedBackprop, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=False, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.LOSS,
                                             aggregation_method=AggregationMethods.MEAN,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=nli_model_name, config=attr_config)

In [None]:
comprehensiveness_score, sufficiency_score, all_compre_scores, all_suff_scores, compre_pairs_w_perturbations, \
suff_pairs_w_perturbations, compre_pred_out, suff_pred_out =  eraser_faithfulness.evaluate(attribution, 
                                                                                           percentages=[0.01, 0.05, 0.1, 0.2, 0.5],
                                                                                           batch_size=64, attr_batch_size=192, return_all_outputs=True)

In [None]:
print(f"comprehensiveness: {comprehensiveness_score}  sufficiency: {sufficiency_score}")
save_outputs('guided_bp_loss_mean', comprehensiveness_score, sufficiency_score, all_compre_scores, all_suff_scores, compre_pairs_w_perturbations, suff_pairs_w_perturbations, compre_pred_out, suff_pred_out)

comprehensiveness: 0.20522020757198334  sufficiency: 0.2861844599246979


## wrt Loss with L2

In [None]:
attr_config = AttributionConfig(AttributionMethods.GuidedBackprop, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=False, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.LOSS,
                                             aggregation_method=AggregationMethods.L2,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=nli_model_name, config=attr_config)

In [None]:
comprehensiveness_score, sufficiency_score, all_compre_scores, all_suff_scores, compre_pairs_w_perturbations, \
suff_pairs_w_perturbations, compre_pred_out, suff_pred_out =  eraser_faithfulness.evaluate(attribution, 
                                                                                           percentages=[0.01, 0.05, 0.1, 0.2, 0.5],
                                                                                           batch_size=64, attr_batch_size=192, return_all_outputs=True)

In [None]:
print(f"comprehensiveness: {comprehensiveness_score}  sufficiency: {sufficiency_score}")
save_outputs('guided_bp_loss_l2', comprehensiveness_score, sufficiency_score, all_compre_scores, all_suff_scores, compre_pairs_w_perturbations, suff_pairs_w_perturbations, compre_pred_out, suff_pred_out)

comprehensiveness: 0.3206157982349396  sufficiency: 0.23769353330135345


# Occlusion

## wrt Top Prediction with SUM

In [None]:
attr_config = AttributionConfig(AttributionMethods.Occlusion, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=False, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.TOP_PREDICTION,
                                             aggregation_method=AggregationMethods.SUM,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=nli_model_name, config=attr_config)

In [None]:
comprehensiveness_score, sufficiency_score, all_compre_scores, all_suff_scores, compre_pairs_w_perturbations, \
suff_pairs_w_perturbations, compre_pred_out, suff_pred_out =  eraser_faithfulness.evaluate(attribution, 
                                                                                           percentages=[0.01, 0.05, 0.1, 0.2, 0.5],
                                                                                           batch_size=64, attr_batch_size=16, return_all_outputs=True, sliding_window_shapes=(1, 768))

In [None]:
print(f"comprehensiveness: {comprehensiveness_score}  sufficiency: {sufficiency_score}")
save_outputs('occlusion_tp_sum', comprehensiveness_score, sufficiency_score, all_compre_scores, all_suff_scores, compre_pairs_w_perturbations, suff_pairs_w_perturbations, compre_pred_out, suff_pred_out)

comprehensiveness: 0.2985566556453705  sufficiency: 0.2890535593032837


## wrt Loss with SUM

In [None]:
attr_config = AttributionConfig(AttributionMethods.Occlusion, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=False, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.LOSS,
                                             aggregation_method=AggregationMethods.SUM,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=nli_model_name, config=attr_config)

In [None]:
comprehensiveness_score, sufficiency_score, all_compre_scores, all_suff_scores, compre_pairs_w_perturbations, \
suff_pairs_w_perturbations, compre_pred_out, suff_pred_out =  eraser_faithfulness.evaluate(attribution, 
                                                                                           percentages=[0.01, 0.05, 0.1, 0.2, 0.5],
                                                                                           batch_size=64, attr_batch_size=16, return_all_outputs=True, sliding_window_shapes=(1, 768))

In [None]:
print(f"comprehensiveness: {comprehensiveness_score}  sufficiency: {sufficiency_score}")
save_outputs('occlusion_loss_sum', comprehensiveness_score, sufficiency_score, all_compre_scores, all_suff_scores, compre_pairs_w_perturbations, suff_pairs_w_perturbations, compre_pred_out, suff_pred_out)

comprehensiveness: 0.3070026934146881  sufficiency: 0.23816151916980743


# LIME

## wrt Top Prediction with SUM

In [None]:
attr_config = AttributionConfig(AttributionMethods.LIME, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=False, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.TOP_PREDICTION,
                                             aggregation_method=AggregationMethods.SUM,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=nli_model_name, config=attr_config)

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
comprehensiveness_score, sufficiency_score, all_compre_scores, all_suff_scores, compre_pairs_w_perturbations, \
suff_pairs_w_perturbations, compre_pred_out, suff_pred_out =  eraser_faithfulness.evaluate(attribution, 
                                                                                           percentages=[0.01, 0.05, 0.1, 0.2, 0.5],
                                                                                           batch_size=64, attr_batch_size=8, return_all_outputs=True)

In [None]:
print(f"comprehensiveness: {comprehensiveness_score}  sufficiency: {sufficiency_score}")
save_outputs('LIME_tp_sum', comprehensiveness_score, sufficiency_score, all_compre_scores, all_suff_scores, compre_pairs_w_perturbations, suff_pairs_w_perturbations, compre_pred_out, suff_pred_out)

comprehensiveness: 0.24494752287864685  sufficiency: 0.24101042747497559


## wrt Loss with SUM

In [None]:
attr_config = AttributionConfig(AttributionMethods.LIME, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=False, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.LOSS,
                                             aggregation_method=AggregationMethods.SUM,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=nli_model_name, config=attr_config)

In [None]:
comprehensiveness_score, sufficiency_score, all_compre_scores, all_suff_scores, compre_pairs_w_perturbations, \
suff_pairs_w_perturbations, compre_pred_out, suff_pred_out =  eraser_faithfulness.evaluate(attribution, 
                                                                                           percentages=[0.01, 0.05, 0.1, 0.2, 0.5],
                                                                                           batch_size=64, attr_batch_size=8, return_all_outputs=True)

In [None]:
print(f"comprehensiveness: {comprehensiveness_score}  sufficiency: {sufficiency_score}")
save_outputs('LIME_loss_sum', comprehensiveness_score, sufficiency_score, all_compre_scores, all_suff_scores, compre_pairs_w_perturbations, suff_pairs_w_perturbations, compre_pred_out, suff_pred_out)

comprehensiveness: 0.24925467371940613  sufficiency: 0.2260829359292984


# Integrated Gradients

## wrt Top Prediction with MEAN

In [None]:
attr_config = AttributionConfig(AttributionMethods.IntegratedGradients, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=False, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.TOP_PREDICTION,
                                             aggregation_method=AggregationMethods.MEAN,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=nli_model_name, config=attr_config)

In [None]:
comprehensiveness_score, sufficiency_score, all_compre_scores, all_suff_scores, compre_pairs_w_perturbations, \
suff_pairs_w_perturbations, compre_pred_out, suff_pred_out =  eraser_faithfulness.evaluate(attribution, 
                                                                                           percentages=[0.01, 0.05, 0.1, 0.2, 0.5],
                                                                                           batch_size=64, attr_batch_size=2, return_all_outputs=True, n_steps=100)

In [None]:
print(f"comprehensiveness: {comprehensiveness_score}  sufficiency: {sufficiency_score}")
save_outputs('IG_tp_mean', comprehensiveness_score, sufficiency_score, all_compre_scores, all_suff_scores, compre_pairs_w_perturbations, suff_pairs_w_perturbations, compre_pred_out, suff_pred_out)

comprehensiveness: 0.21276625990867615  sufficiency: 0.28814610838890076


## wrt Top Prediction with L2

In [None]:
attr_config = AttributionConfig(AttributionMethods.IntegratedGradients, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=False, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.TOP_PREDICTION,
                                             aggregation_method=AggregationMethods.L2,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=nli_model_name, config=attr_config)

In [None]:
comprehensiveness_score, sufficiency_score, all_compre_scores, all_suff_scores, compre_pairs_w_perturbations, \
suff_pairs_w_perturbations, compre_pred_out, suff_pred_out =  eraser_faithfulness.evaluate(attribution, 
                                                                                           percentages=[0.01, 0.05, 0.1, 0.2, 0.5],
                                                                                           batch_size=64, attr_batch_size=2, return_all_outputs=True, n_steps=100)

In [None]:
print(f"comprehensiveness: {comprehensiveness_score}  sufficiency: {sufficiency_score}")
save_outputs('IG_tp_L2', comprehensiveness_score, sufficiency_score, all_compre_scores, all_suff_scores, compre_pairs_w_perturbations, suff_pairs_w_perturbations, compre_pred_out, suff_pred_out)

comprehensiveness: 0.3021012842655182  sufficiency: 0.29073989391326904


## wrt Loss with MEAN

In [None]:
attr_config = AttributionConfig(AttributionMethods.IntegratedGradients, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=False, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.LOSS,
                                             aggregation_method=AggregationMethods.MEAN,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=nli_model_name, config=attr_config)

In [None]:
comprehensiveness_score, sufficiency_score, all_compre_scores, all_suff_scores, compre_pairs_w_perturbations, \
suff_pairs_w_perturbations, compre_pred_out, suff_pred_out =  eraser_faithfulness.evaluate(attribution, 
                                                                                           percentages=[0.01, 0.05, 0.1, 0.2, 0.5],
                                                                                           batch_size=64, attr_batch_size=4, return_all_outputs=True, n_steps=75)

In [None]:
print(f"comprehensiveness: {comprehensiveness_score}  sufficiency: {sufficiency_score}")
save_outputs('IG_loss_MEAN', comprehensiveness_score, sufficiency_score, all_compre_scores, all_suff_scores, compre_pairs_w_perturbations, suff_pairs_w_perturbations, compre_pred_out, suff_pred_out)

comprehensiveness: 0.25857409834861755  sufficiency: 0.21338248252868652


## wrt Loss with L2

In [None]:
attr_config = AttributionConfig(AttributionMethods.IntegratedGradients, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=False, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.LOSS,
                                             aggregation_method=AggregationMethods.L2,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=nli_model_name, config=attr_config)

In [None]:
comprehensiveness_score, sufficiency_score, all_compre_scores, all_suff_scores, compre_pairs_w_perturbations, \
suff_pairs_w_perturbations, compre_pred_out, suff_pred_out =  eraser_faithfulness.evaluate(attribution, 
                                                                                           percentages=[0.01, 0.05, 0.1, 0.2, 0.5],
                                                                                           batch_size=64, attr_batch_size=2, return_all_outputs=True, n_steps=100)

In [None]:
print(f"comprehensiveness: {comprehensiveness_score}  sufficiency: {sufficiency_score}")
save_outputs('IG_loss_L2', comprehensiveness_score, sufficiency_score, all_compre_scores, all_suff_scores, compre_pairs_w_perturbations, suff_pairs_w_perturbations, compre_pred_out, suff_pred_out)

comprehensiveness: 0.29097580909729004  sufficiency: 0.28717684745788574


# Shapley

## wrt Top Prediction with SUM

In [None]:
attr_config = AttributionConfig(AttributionMethods.Shapley, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=False, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.TOP_PREDICTION,
                                             aggregation_method=AggregationMethods.SUM,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=nli_model_name, config=attr_config)

In [None]:
comprehensiveness_score, sufficiency_score, all_compre_scores, all_suff_scores, compre_pairs_w_perturbations, \
suff_pairs_w_perturbations, compre_pred_out, suff_pred_out =  eraser_faithfulness.evaluate(attribution, 
                                                                                           percentages=[0.01, 0.05, 0.1, 0.2, 0.5],
                                                                                           batch_size=64, attr_batch_size=4, return_all_outputs=True)

In [None]:
print(f"comprehensiveness: {comprehensiveness_score}  sufficiency: {sufficiency_score}")
save_outputs('Shapley_tp_sum', comprehensiveness_score, sufficiency_score, all_compre_scores, all_suff_scores, compre_pairs_w_perturbations, suff_pairs_w_perturbations, compre_pred_out, suff_pred_out)

## wrt Loss with SUM

In [None]:
attr_config = AttributionConfig(AttributionMethods.Shapley, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=False, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.LOSS,
                                             aggregation_method=AggregationMethods.SUM,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=nli_model_name, config=attr_config)

In [None]:
comprehensiveness_score, sufficiency_score, all_compre_scores, all_suff_scores, compre_pairs_w_perturbations, \
suff_pairs_w_perturbations, compre_pred_out, suff_pred_out =  eraser_faithfulness.evaluate(attribution, 
                                                                                           percentages=[0.01, 0.05, 0.1, 0.2, 0.5],
                                                                                           batch_size=64, attr_batch_size=4, return_all_outputs=True)

In [None]:
print(f"comprehensiveness: {comprehensiveness_score}  sufficiency: {sufficiency_score}")
save_outputs('Shapley_loss_sum', comprehensiveness_score, sufficiency_score, all_compre_scores, all_suff_scores, compre_pairs_w_perturbations, suff_pairs_w_perturbations, compre_pred_out, suff_pred_out)