In [None]:
%cd ../

# Download e-SNLI dev and test sets

In [None]:
!wget https://raw.githubusercontent.com/OanaMariaCamburu/e-SNLI/master/dataset/esnli_test.csv

In [None]:
!wget https://raw.githubusercontent.com/OanaMariaCamburu/e-SNLI/master/dataset/esnli_dev.csv

# Import and Initialization

In [None]:
from eval import human_agreement
from explainli.config import AttributionMethods, AttributionConfig, AggregationMethods, ForwardScoringOptions
from explainli.explainli import NLIAttribution

model_name = 'textattack/bert-base-uncased-snli'

# InputXGradient HA

## MEAN aggregation - wrt Top Prediction

In [None]:
attr_config = AttributionConfig(AttributionMethods.InputXGradient, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=True, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.TOP_PREDICTION,
                                             aggregation_method=AggregationMethods.MEAN,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=model_name, config=attr_config)


In [None]:
_, mAP, f1 = human_agreement.evaluate(attribution, 'esnli_test.csv', 192, 0)
print(f"mAP: {mAP}, f1: {f1}")

## L2 aggregation - wrt Top Prediction

In [None]:
attr_config = AttributionConfig(AttributionMethods.InputXGradient, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=True, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.TOP_PREDICTION,
                                             aggregation_method=AggregationMethods.L2,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=model_name, config=attr_config)

In [None]:
_, mAP, f1 = human_agreement.evaluate(attribution, 'esnli_test.csv', 192, 0.5)
print(f"mAP: {mAP}, f1: {f1}")


## MEAN aggregation - wrt Loss

In [None]:
attr_config = AttributionConfig(AttributionMethods.InputXGradient, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=True, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.LOSS,
                                             aggregation_method=AggregationMethods.MEAN,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=model_name, config=attr_config)


In [None]:
_, mAP, f1 = human_agreement.evaluate(attribution, 'esnli_test.csv', 192, 0)
print(f"mAP: {mAP}, f1: {f1}")


## L2 aggregation - wrt Loss

In [None]:
attr_config = AttributionConfig(AttributionMethods.InputXGradient, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=True, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.LOSS,
                                             aggregation_method=AggregationMethods.L2,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=model_name, config=attr_config)


In [None]:
_, mAP, f1 = human_agreement.evaluate(attribution, 'esnli_test.csv', 192, 0.5)
print(f"mAP: {mAP}, f1: {f1}")


# Saliency HA

## MEAN aggregation - wrt Top Prediction

In [None]:
attr_config = AttributionConfig(AttributionMethods.Saliency, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=True, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.TOP_PREDICTION,
                                             aggregation_method=AggregationMethods.MEAN,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=model_name, config=attr_config)

In [None]:
_, mAP, f1 = human_agreement.evaluate(attribution, 'esnli_test.csv', 192, 0)
print(f"mAP: {mAP}, f1: {f1}")


## L2 aggregation - wrt Top Prediction

In [None]:
attr_config = AttributionConfig(AttributionMethods.Saliency, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=True, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.TOP_PREDICTION,
                                             aggregation_method=AggregationMethods.L2,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=model_name, config=attr_config)

In [None]:
_, mAP, f1 = human_agreement.evaluate(attribution, 'esnli_test.csv', 192, 0.5)
print(f"mAP: {mAP}, f1: {f1}")


## MEAN aggregation - wrt Loss

In [None]:
attr_config = AttributionConfig(AttributionMethods.Saliency, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=True, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.LOSS,
                                             aggregation_method=AggregationMethods.MEAN,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=model_name, config=attr_config)

In [None]:
_, mAP, f1 = human_agreement.evaluate(attribution, 'esnli_test.csv', 192, 0)
print(f"mAP: {mAP}, f1: {f1}")


## L2 aggregation - wrt Loss

In [None]:
attr_config = AttributionConfig(AttributionMethods.Saliency, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=True, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.LOSS,
                                             aggregation_method=AggregationMethods.L2,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=model_name, config=attr_config)

In [None]:
_, mAP, f1 = human_agreement.evaluate(attribution, 'esnli_test.csv', 192, 0.5)
print(f"mAP: {mAP}, f1: {f1}")


# Activation HA

## MEAN aggregation - wrt Top Prediction

In [None]:
attr_config = AttributionConfig(AttributionMethods.Activation, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=True, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.TOP_PREDICTION,
                                             aggregation_method=AggregationMethods.MEAN,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=model_name, config=attr_config)

In [None]:
_, mAP, f1 = human_agreement.evaluate(attribution, 'esnli_test.csv', 192, 0)
print(f"mAP: {mAP}, f1: {f1}")

## L2 aggregation - wrt Top Prediction

In [None]:
attr_config = AttributionConfig(AttributionMethods.Activation, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=True, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.TOP_PREDICTION,
                                             aggregation_method=AggregationMethods.L2,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=model_name, config=attr_config)

In [None]:
_, mAP, f1 = human_agreement.evaluate(attribution, 'esnli_test.csv', 192, 0.5)
print(f"mAP: {mAP}, f1: {f1}")

## MEAN aggregation - wrt Loss

In [None]:
attr_config = AttributionConfig(AttributionMethods.Activation, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=True, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.LOSS,
                                             aggregation_method=AggregationMethods.MEAN,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=model_name, config=attr_config)

In [None]:
_, mAP, f1 = human_agreement.evaluate(attribution, 'esnli_test.csv', 192, 0)
print(f"mAP: {mAP}, f1: {f1}")


## L2 aggregation - wrt Loss

In [None]:
attr_config = AttributionConfig(AttributionMethods.Activation, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=True, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.LOSS,
                                             aggregation_method=AggregationMethods.L2,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=model_name, config=attr_config)

In [None]:
_, mAP, f1 = human_agreement.evaluate(attribution, 'esnli_test.csv', 192, 0.5)
print(f"mAP: {mAP}, f1: {f1}")

# Guided BP HA

## MEAN aggregation - wrt Top Prediction

In [None]:
attr_config = AttributionConfig(AttributionMethods.GuidedBackprop, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=True, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.TOP_PREDICTION,
                                             aggregation_method=AggregationMethods.MEAN,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=model_name, config=attr_config)

In [None]:
_, mAP, f1 = human_agreement.evaluate(attribution, 'esnli_test.csv', 192, 0)
print(f"mAP: {mAP}, f1: {f1}")

## L2 aggregation - wrt Top Prediction

In [None]:
attr_config = AttributionConfig(AttributionMethods.GuidedBackprop, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=True, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.TOP_PREDICTION,
                                             aggregation_method=AggregationMethods.L2,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=model_name, config=attr_config)

In [None]:
_, mAP, f1 = human_agreement.evaluate(attribution, 'esnli_test.csv', 192, 0.5)
print(f"mAP: {mAP}, f1: {f1}")

## MEAN aggregation - wrt Loss

In [None]:
attr_config = AttributionConfig(AttributionMethods.GuidedBackprop, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=True, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.LOSS,
                                             aggregation_method=AggregationMethods.MEAN,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=model_name, config=attr_config)

In [None]:
_, mAP, f1 = human_agreement.evaluate(attribution, 'esnli_test.csv', 192, 0)
print(f"mAP: {mAP}, f1: {f1}")

## L2 aggregation - wrt Loss

In [None]:
attr_config = AttributionConfig(AttributionMethods.GuidedBackprop, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=True, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.LOSS,
                                             aggregation_method=AggregationMethods.L2,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=model_name, config=attr_config)

In [None]:
_, mAP, f1 = human_agreement.evaluate(attribution, 'esnli_test.csv', 192, 0.5)
print(f"mAP: {mAP}, f1: {f1}")

# Occlusion HA

## SUM aggregation - wrt Top Prediction

In [None]:
attr_config = AttributionConfig(AttributionMethods.Occlusion, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=True, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.TOP_PREDICTION,
                                             aggregation_method=AggregationMethods.SUM,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=model_name, config=attr_config)

In [None]:
_, mAP, f1 = human_agreement.evaluate(attribution, 'esnli_test.csv', 192, 0, sliding_window_shapes=(1, 768))
print(f"mAP: {mAP}, f1: {f1}")

## SUM aggregation - wrt Loss

In [None]:
attr_config = AttributionConfig(AttributionMethods.Occlusion, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=True, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.LOSS,
                                             aggregation_method=AggregationMethods.SUM,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=model_name, config=attr_config)

In [None]:
_, mAP, f1 = human_agreement.evaluate(attribution, 'esnli_test.csv', 1, 0, sliding_window_shapes=(1, 768))
print(f"mAP: {mAP}, f1: {f1}")

# LIME HA

## SUM aggregation - wrt Top Prediction

In [None]:
attr_config = AttributionConfig(AttributionMethods.LIME, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=True, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.TOP_PREDICTION,
                                             aggregation_method=AggregationMethods.SUM,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=model_name, config=attr_config)

In [None]:
import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

_, mAP, f1 = human_agreement.evaluate(attribution, 'esnli_test.csv', 1, 0)
print(f"mAP: {mAP}, f1: {f1}")

## SUM aggregation - wrt Loss

In [None]:
attr_config = AttributionConfig(AttributionMethods.LIME, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=True, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.LOSS,
                                             aggregation_method=AggregationMethods.SUM,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=model_name, config=attr_config)

In [None]:
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

_, mAP, f1 = human_agreement.evaluate(attribution, 'esnli_test.csv', 1, 0)
print(f"mAP: {mAP}, f1: {f1}")


# Shapley

## SUM aggregation - wrt Top Prediction

In [None]:
attr_config = AttributionConfig(AttributionMethods.Shapley, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=True, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.TOP_PREDICTION,
                                             aggregation_method=AggregationMethods.SUM,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=model_name, config=attr_config)

In [None]:
_, mAP, f1 = human_agreement.evaluate(attribution, 'esnli_test.csv', 1, 0)
print(f"mAP: {mAP}, f1: {f1}")

## SUM aggregation - wrt Loss

In [None]:
attr_config = AttributionConfig(AttributionMethods.Shapley, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=True, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.LOSS,
                                             aggregation_method=AggregationMethods.SUM,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=model_name, config=attr_config)

In [None]:
_, mAP, f1 = human_agreement.evaluate(attribution, 'esnli_test.csv', 128, 0)
print(f"mAP: {mAP}, f1: {f1}")

# Integrated Gradients

## MEAN aggregation - wrt Top Prediction

In [None]:
attr_config = AttributionConfig(AttributionMethods.IntegratedGradients, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=True, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.TOP_PREDICTION,
                                             aggregation_method=AggregationMethods.MEAN,
                                             label_names=['entailment', 'neutral', 'contradiction'])
attribution = NLIAttribution(model_name=model_name, config=attr_config)

In [None]:
num_steps = 75
_, mAP, f1 = human_agreement.evaluate(attribution, 'esnli_test.csv', 1, 0, n_steps=num_steps)
print(f"mAP: {mAP}, f1: {f1} for n_steps={num_steps}")


## L2 aggregation - wrt Top Prediction

In [None]:
attr_config = AttributionConfig(AttributionMethods.IntegratedGradients, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=True, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.TOP_PREDICTION,
                                             aggregation_method=AggregationMethods.L2,
                                             label_names=['entailment', 'neutral', 'contradiction'])
attribution = NLIAttribution(model_name=model_name, config=attr_config)

In [None]:
num_steps = 100
_, mAP, f1 = human_agreement.evaluate(attribution, 'esnli_test.csv', 1, 0.5, n_steps=num_steps)
print(f"mAP: {mAP}, f1: {f1} for n_steps={num_steps}")


## MEAN aggregation - wrt Loss

In [None]:
attr_config = AttributionConfig(AttributionMethods.IntegratedGradients, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=True, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.LOSS,
                                             aggregation_method=AggregationMethods.MEAN,
                                             label_names=['entailment', 'neutral', 'contradiction'])
attribution = NLIAttribution(model_name=model_name, config=attr_config)

In [None]:
num_steps = 50
_, mAP, f1 = human_agreement.evaluate(attribution, 'esnli_test.csv', 3, 0, n_steps=num_steps)
print(f"mAP: {mAP}, f1: {f1} for n_steps={num_steps}")

## L2 aggregation - wrt Loss

In [None]:
attr_config = AttributionConfig(AttributionMethods.IntegratedGradients, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=True, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.LOSS,
                                             aggregation_method=AggregationMethods.L2,
                                             label_names=['entailment', 'neutral', 'contradiction'])
attribution = NLIAttribution(model_name=model_name, config=attr_config)

In [None]:
num_steps = 50
_, mAP, f1 = human_agreement.evaluate(attribution, 'esnli_test.csv', 3, 0.5, n_steps=num_steps)
print(f"mAP: {mAP}, f1: {f1} for n_steps={num_steps}")