In [None]:
%cd ../

# Additional functions

In [None]:
from explainli.config import AttributionMethods, AttributionConfig, AggregationMethods, ForwardScoringOptions
from explainli.explainli import NLIAttribution
from eval import crosslingual_faithfulness

from IPython.display import display
import pandas as pd
import numpy as np

nli_model_name = '<path_to_finetuned_model>/bert-base-multilingual-finetuned-mnli'
word_aligner = '<path_to_word_aligner>/awesome-align-finetuned-wo-co'

In [None]:
def show_results(avg_correlation, avg_correlations_per_lang, avg_pval, avg_pvalues_per_lang):
  avg_correlation_top5 = np.mean([avg_correlations_per_lang[lang] for lang in ['es', 'fr', 'de', 'zh', 'vi']])
  avg_pval_top5 = np.mean([avg_pvalues_per_lang[lang] for lang in ['es', 'fr', 'de', 'zh', 'vi']])

  items = [(lang, avg_correlations_per_lang[lang], avg_pvalues_per_lang[lang]) for lang in avg_correlations_per_lang if lang != 'en' ]
  df = pd.DataFrame(items)
  df.columns = ['language', 'correlation', 'pval']
  df.set_index('language', inplace=True)
  display(df)

  print(f"avg correlation: {avg_correlation}, avg pval: {avg_pval}")
  print(f"avg correlation (top5): {avg_correlation_top5}, avg pval (top5): {avg_pval_top5}")

In [None]:
import json

def save_dataset_with_alignments(dataset, out):
  pairs, labels, alignments = dataset
  alignments_as_list = list(map(lambda x: list(x) if x is not None else None, alignments))
  data_as_dict = {'alignments': alignments_as_list, 'pairs': pairs, 'labels': labels}

  with open(out, 'w') as f:
    json.dump(data_as_dict, f)

def load_dataset_with_alignments(fname):
  with open(fname, 'r') as f:
    data_as_dict = json.load(f)
    alignments = list(map(lambda x: set(map(lambda y: tuple(y), x)) if x is not None else None, data_as_dict['alignments']))
    pairs, labels = data_as_dict['pairs'], data_as_dict['labels']

  return pairs, labels, alignments

## Create dataset with alignments, save and load

In [None]:
from datasets import load_dataset

dataset = load_dataset('xnli', 'all_languages', split='test')
pairs, labels, alignments = crosslingual_faithfulness.create_dataset_with_alignments(dataset, word_aligner=word_aligner)
ds = pairs, labels, alignments

save_dataset_with_alignments(ds, 'data_w_alignments.json')

In [None]:
ds = load_dataset_with_alignments('/content/drive/MyDrive/explaiNLI/data_w_alignments.json')

# InputXGradient

## wrt Top prediction with MEAN

In [None]:
attr_config = AttributionConfig(AttributionMethods.InputXGradient, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=True, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.TOP_PREDICTION,
                                             aggregation_method=AggregationMethods.MEAN,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=nli_model_name, config=attr_config)

In [None]:
ds, avg_correlation, avg_correlations_per_lang, avg_pval, avg_pvalues_per_lang = crosslingual_faithfulness.evaluate(attribution, 16, dataset_with_alignments=ds)

In [None]:
show_results(avg_correlation, avg_correlations_per_lang, avg_pval, avg_pvalues_per_lang)

## wrt Top prediction with L2

In [None]:
attr_config = AttributionConfig(AttributionMethods.InputXGradient, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=True, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.TOP_PREDICTION,
                                             aggregation_method=AggregationMethods.L2,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=nli_model_name, config=attr_config)

In [None]:
ds, avg_correlation, avg_correlations_per_lang, avg_pval, avg_pvalues_per_lang = crosslingual_faithfulness.evaluate(attribution, 16, dataset_with_alignments=ds)

In [None]:
show_results(avg_correlation, avg_correlations_per_lang, avg_pval, avg_pvalues_per_lang)

## wrt Loss with MEAN

In [None]:
attr_config = AttributionConfig(AttributionMethods.InputXGradient, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=True, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.LOSS,
                                             aggregation_method=AggregationMethods.MEAN,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=nli_model_name, config=attr_config)

In [None]:
ds, avg_correlation, avg_correlations_per_lang, avg_pval, avg_pvalues_per_lang = crosslingual_faithfulness.evaluate(attribution, 16, dataset_with_alignments=ds)

In [None]:
show_results(avg_correlation, avg_correlations_per_lang, avg_pval, avg_pvalues_per_lang)

## wrt Loss with L2

In [None]:
attr_config = AttributionConfig(AttributionMethods.InputXGradient, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=True, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.LOSS,
                                             aggregation_method=AggregationMethods.L2,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=nli_model_name, config=attr_config)

In [None]:
ds, avg_correlation, avg_correlations_per_lang, avg_pval, avg_pvalues_per_lang = crosslingual_faithfulness.evaluate(attribution, 16, dataset_with_alignments=ds)

In [None]:
show_results(avg_correlation, avg_correlations_per_lang, avg_pval, avg_pvalues_per_lang)

# Saliency

## wrt Top prediction with MEAN

In [None]:
attr_config = AttributionConfig(AttributionMethods.Saliency, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=True, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.TOP_PREDICTION,
                                             aggregation_method=AggregationMethods.MEAN,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=nli_model_name, config=attr_config)

In [None]:
ds, avg_correlation, avg_correlations_per_lang, avg_pval, avg_pvalues_per_lang = crosslingual_faithfulness.evaluate(attribution, 16, dataset_with_alignments=ds)

In [None]:
show_results(avg_correlation, avg_correlations_per_lang, avg_pval, avg_pvalues_per_lang)

## wrt Top prediction with L2

In [None]:
attr_config = AttributionConfig(AttributionMethods.Saliency, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=True, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.TOP_PREDICTION,
                                             aggregation_method=AggregationMethods.L2,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=nli_model_name, config=attr_config)

In [None]:
ds, avg_correlation, avg_correlations_per_lang, avg_pval, avg_pvalues_per_lang = crosslingual_faithfulness.evaluate(attribution, 16, dataset_with_alignments=ds)

In [None]:
show_results(avg_correlation, avg_correlations_per_lang, avg_pval, avg_pvalues_per_lang)

## wrt Loss with MEAN

In [None]:
attr_config = AttributionConfig(AttributionMethods.Saliency, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=True, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.LOSS,
                                             aggregation_method=AggregationMethods.MEAN,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=nli_model_name, config=attr_config)

In [None]:
ds, avg_correlation, avg_correlations_per_lang, avg_pval, avg_pvalues_per_lang = crosslingual_faithfulness.evaluate(attribution, 16, dataset_with_alignments=ds)

In [None]:
show_results(avg_correlation, avg_correlations_per_lang, avg_pval, avg_pvalues_per_lang)

## wrt Loss with L2

In [None]:
attr_config = AttributionConfig(AttributionMethods.Saliency, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=True, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.LOSS,
                                             aggregation_method=AggregationMethods.L2,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=nli_model_name, config=attr_config)

In [None]:
ds, avg_correlation, avg_correlations_per_lang, avg_pval, avg_pvalues_per_lang = crosslingual_faithfulness.evaluate(attribution, 16, dataset_with_alignments=ds)

In [None]:
show_results(avg_correlation, avg_correlations_per_lang, avg_pval, avg_pvalues_per_lang)

# Activation

## wrt Top prediction with MEAN

In [None]:
attr_config = AttributionConfig(AttributionMethods.Activation, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=True, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.TOP_PREDICTION,
                                             aggregation_method=AggregationMethods.MEAN,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=nli_model_name, config=attr_config)

  "Passing `gradient_checkpointing` to a config initialization is deprecated and will be removed in v5 "


In [None]:
ds, avg_correlation, avg_correlations_per_lang, avg_pval, avg_pvalues_per_lang = crosslingual_faithfulness.evaluate(attribution, 16, dataset_with_alignments=ds)

In [None]:
show_results(avg_correlation, avg_correlations_per_lang, avg_pval, avg_pvalues_per_lang)

## wrt Top prediction with L2

In [None]:
attr_config = AttributionConfig(AttributionMethods.Activation, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=True, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.TOP_PREDICTION,
                                             aggregation_method=AggregationMethods.L2,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=nli_model_name, config=attr_config)

In [None]:
ds, avg_correlation, avg_correlations_per_lang, avg_pval, avg_pvalues_per_lang = crosslingual_faithfulness.evaluate(attribution, 16, dataset_with_alignments=ds)

In [None]:
show_results(avg_correlation, avg_correlations_per_lang, avg_pval, avg_pvalues_per_lang)

## wrt Loss with MEAN

In [None]:
attr_config = AttributionConfig(AttributionMethods.Activation, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=True, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.LOSS,
                                             aggregation_method=AggregationMethods.MEAN,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=nli_model_name, config=attr_config)

In [None]:
ds, avg_correlation, avg_correlations_per_lang, avg_pval, avg_pvalues_per_lang = crosslingual_faithfulness.evaluate(attribution, 16, dataset_with_alignments=ds)

In [None]:
show_results(avg_correlation, avg_correlations_per_lang, avg_pval, avg_pvalues_per_lang)

## wrt Loss with L2

In [None]:
attr_config = AttributionConfig(AttributionMethods.Activation, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=True, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.LOSS,
                                             aggregation_method=AggregationMethods.L2,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=nli_model_name, config=attr_config)

In [None]:
ds, avg_correlation, avg_correlations_per_lang, avg_pval, avg_pvalues_per_lang = crosslingual_faithfulness.evaluate(attribution, 16, dataset_with_alignments=ds)

In [None]:
show_results(avg_correlation, avg_correlations_per_lang, avg_pval, avg_pvalues_per_lang)

# Guided BP

## wrt Top prediction with MEAN

In [None]:
attr_config = AttributionConfig(AttributionMethods.GuidedBackprop, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=True, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.TOP_PREDICTION,
                                             aggregation_method=AggregationMethods.MEAN,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=nli_model_name, config=attr_config)

In [None]:
ds, avg_correlation, avg_correlations_per_lang, avg_pval, avg_pvalues_per_lang = crosslingual_faithfulness.evaluate(attribution, 16, dataset_with_alignments=ds)

In [None]:
show_results(avg_correlation, avg_correlations_per_lang, avg_pval, avg_pvalues_per_lang)

## wrt Top prediction with L2

In [None]:
attr_config = AttributionConfig(AttributionMethods.GuidedBackprop, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=True, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.TOP_PREDICTION,
                                             aggregation_method=AggregationMethods.L2,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=nli_model_name, config=attr_config)

In [None]:
ds, avg_correlation, avg_correlations_per_lang, avg_pval, avg_pvalues_per_lang = crosslingual_faithfulness.evaluate(attribution, 16, dataset_with_alignments=ds)

In [None]:
show_results(avg_correlation, avg_correlations_per_lang, avg_pval, avg_pvalues_per_lang)

## wrt Loss with MEAN

In [None]:
attr_config = AttributionConfig(AttributionMethods.GuidedBackprop, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=True, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.LOSS,
                                             aggregation_method=AggregationMethods.MEAN,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=nli_model_name, config=attr_config)

In [None]:
ds, avg_correlation, avg_correlations_per_lang, avg_pval, avg_pvalues_per_lang = crosslingual_faithfulness.evaluate(attribution, 16, dataset_with_alignments=ds)

In [None]:
show_results(avg_correlation, avg_correlations_per_lang, avg_pval, avg_pvalues_per_lang)

## wrt Loss with L2

In [None]:
attr_config = AttributionConfig(AttributionMethods.GuidedBackprop, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=True, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.LOSS,
                                             aggregation_method=AggregationMethods.L2,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=nli_model_name, config=attr_config)

In [None]:
ds, avg_correlation, avg_correlations_per_lang, avg_pval, avg_pvalues_per_lang = crosslingual_faithfulness.evaluate(attribution, 16, dataset_with_alignments=ds)

In [None]:
show_results(avg_correlation, avg_correlations_per_lang, avg_pval, avg_pvalues_per_lang)

# Occlusion

## General Summary

| Method | $\rho$ (p-val) | $\rho$ (p-val) [top-5] |
|--------|-----|----|
|__wrt Top prediction__|
|Occlusion ($\Sigma$) | 0.1514 (0.3381) | 0.2234 (0.2722) |
|__wrt Loss__|
|Occlusion ($\Sigma$) | in progress | in progress |

## wrt Top prediction with SUM

In [None]:
attr_config = AttributionConfig(AttributionMethods.Occlusion, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=True, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.TOP_PREDICTION,
                                             aggregation_method=AggregationMethods.SUM,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=nli_model_name, config=attr_config)

In [None]:
ds, avg_correlation, avg_correlations_per_lang, avg_pval, avg_pvalues_per_lang = crosslingual_faithfulness.evaluate(attribution, 16, dataset_with_alignments=ds, sliding_window_shapes=(1, 768))

In [None]:
show_results(avg_correlation, avg_correlations_per_lang, avg_pval, avg_pvalues_per_lang)

## wrt Loss with SUM

In [None]:
attr_config = AttributionConfig(AttributionMethods.Occlusion, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=True, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.LOSS,
                                             aggregation_method=AggregationMethods.SUM,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=nli_model_name, config=attr_config)

In [None]:
ds, avg_correlation, avg_correlations_per_lang, avg_pval, avg_pvalues_per_lang = crosslingual_faithfulness.evaluate(attribution, 16, dataset_with_alignments=ds, sliding_window_shapes=(1, 768))

In [None]:
show_results(avg_correlation, avg_correlations_per_lang, avg_pval, avg_pvalues_per_lang)

# LIME

## wrt Top prediction with SUM

In [None]:
attr_config = AttributionConfig(AttributionMethods.LIME, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=True, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.TOP_PREDICTION,
                                             aggregation_method=AggregationMethods.SUM,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=nli_model_name, config=attr_config)

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
ds, avg_correlation, avg_correlations_per_lang, avg_pval, avg_pvalues_per_lang = crosslingual_faithfulness.evaluate(attribution, 1, dataset_with_alignments=ds)

In [None]:
show_results(avg_correlation, avg_correlations_per_lang, avg_pval, avg_pvalues_per_lang)

## wrt Loss with SUM

In [None]:
attr_config = AttributionConfig(AttributionMethods.LIME, remove_pad_tokens=True,
                                             remove_cls_token=True, remove_sep_tokens=True, join_subwords=True,
                                             normalize_scores=True,
                                             forward_scoring= ForwardScoringOptions.LOSS,
                                             aggregation_method=AggregationMethods.SUM,
                                             label_names=['entailment', 'neutral', 'contradiction'])

attribution = NLIAttribution(model_name=nli_model_name, config=attr_config)

In [None]:
ds, avg_correlation, avg_correlations_per_lang, avg_pval, avg_pvalues_per_lang = crosslingual_faithfulness.evaluate(attribution, 1, dataset_with_alignments=ds)

In [None]:
show_results(avg_correlation, avg_correlations_per_lang, avg_pval, avg_pvalues_per_lang)