In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append("../")
sys.path.append("../experiments/")

In [3]:
import os

import dill
import pandas as pd
from pathlib import Path
from transformers import RobertaTokenizer, RobertaForSequenceClassification

from xbert.engine import calculate_correlation
from mnli import (read_mnli_dataset, dataset_to_input_instances, get_labels, predict,
                  MNLI_IDX2LABEL, MNLI_LABEL2IDX, OCCLUSION_STRATEGIES, GRAD_STRATEGIES, ALL_STRATEGIES)

In [4]:
from typing import List, Dict, Any


def experiment_load_relevances(experiment_dir: str,
                               relevance_filename: str = "relevances.pkl"):
    path = Path(experiment_dir)
    
    experiment_relevances = {}
    for relevance_file in path.glob(f"**/{relevance_filename}"):
        name = relevance_file.parents[0].name
        with relevance_file.open("rb") as f:
            relevances = dill.load(f)
            experiment_relevances[name] = relevances
            
    return experiment_relevances


def experiment_relevance_correlation(relevances: Dict[str, Any],
                                     strategies: List[str] = None,
                                     strategy_name_map: Dict[str, str] = None):
    strategies = strategies or ALL_STRATEGIES
    #strategies = set(strategies) & set(relevances.keys())
    strategy_name_map = strategy_name_map or {}
    
    correlations = []
    for strategy_a in strategies:
        strategy_a_mapped = strategy_name_map.get(strategy_a, strategy_a)
        
        correlation = {"Method": strategy_a_mapped}
        for strategy_b in strategies:
            strategy_b_mapped = strategy_name_map.get(strategy_b, strategy_b)
            corr = calculate_correlation(relevances[strategy_a], relevances[strategy_b])
            correlation[strategy_b_mapped] = corr
        correlations.append(correlation)
            
    df = pd.DataFrame(correlations)
    df = df.set_index("Method")
    
    df.columns = pd.MultiIndex.from_tuples([("Occlusion" if strategy in OCCLUSION_STRATEGIES else "Gradient",
                                             strategy_name_map.get(strategy, strategy))
                                            for strategy in strategies])
    
    return df

In [6]:
CUDA_DEVICE = 0 # or -1 if no GPU is available
MODEL_NAME = "roberta-large-mnli"

MNLI_DATASET_PATH = "../data/glue_data/MNLI/"
MNLI_RESULTS_PATH = "/home/christoph/Downloads/mnli/"

In [6]:
tokenizer = RobertaTokenizer.from_pretrained(MODEL_NAME)
model = RobertaForSequenceClassification.from_pretrained(MODEL_NAME).to(CUDA_DEVICE)

In [7]:
dataset = read_mnli_dataset(os.path.join(MNLI_DATASET_PATH, "dev_matched.tsv"))
input_instances = dataset_to_input_instances(dataset)
labels = get_labels(dataset)

In [7]:
experiment_relevances = experiment_load_relevances(MNLI_RESULTS_PATH)

In [10]:
correlation = experiment_relevance_correlation(experiment_relevances,
                                               strategies=["unk", "resampling", "resampling_std", "grad", "gradxinput", "saliency", "integratedgrad"],
                                               strategy_name_map={"unk": "UNK",
                                                                  "resampling": "LM resampling",
                                                                  "resampling_std": "LM resampling std",
                                                                  "grad": "Grad.",
                                                                  "gradxinput": "Grad.*input",
                                                                  "saliency": "Saliency",
                                                                  "integratedgrad": "Integrated grad."})
print(correlation.to_latex(float_format="{:0.2f}".format, multicolumn_format="c"))

\begin{tabular}{lrrrrrrr}
\toprule
{} & \multicolumn{3}{c}{Occlusion} & \multicolumn{4}{c}{Gradient} \\
{} &       UNK & LM resampling & LM resampling std &    Grad. & Grad.*input & Saliency & Integrated grad. \\
Method            &           &               &                   &          &             &          &                  \\
\midrule
UNK               &      1.00 &          0.58 &              0.32 &     0.00 &       -0.03 &     0.22 &             0.32 \\
LM resampling     &      0.58 &          1.00 &              0.61 &     0.00 &       -0.03 &     0.27 &             0.28 \\
LM resampling std &      0.32 &          0.61 &              1.00 &    -0.00 &       -0.01 &     0.35 &             0.20 \\
Grad.             &      0.00 &          0.00 &             -0.00 &     1.00 &       -0.00 &     0.00 &             0.00 \\
Grad.*input       &     -0.03 &         -0.03 &             -0.01 &    -0.00 &        1.00 &     0.03 &             0.00 \\
Saliency          &      0.22 &   

In [11]:
correlation

Unnamed: 0_level_0,Occlusion,Occlusion,Occlusion,Gradient,Gradient,Gradient,Gradient
Unnamed: 0_level_1,UNK,LM resampling,LM resampling std,Grad.,Grad.*input,Saliency,Integrated grad.
Method,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
UNK,1.0,0.580645,0.316609,0.002294,-0.034539,0.22277,0.323415
LM resampling,0.580645,1.0,0.61044,0.003561,-0.027769,0.272469,0.275429
LM resampling std,0.316609,0.61044,1.0,-0.002372,-0.005132,0.346814,0.197998
Grad.,0.002294,0.003561,-0.002372,1.0,-0.000932,0.003702,0.002953
Grad.*input,-0.034539,-0.027769,-0.005132,-0.000932,1.0,0.029389,0.002813
Saliency,0.22277,0.272469,0.346814,0.003702,0.029389,1.0,0.169672
Integrated grad.,0.323415,0.275429,0.197998,0.002953,0.002813,0.169672,1.0


In [13]:
correlation

Unnamed: 0_level_0,Occlusion,Occlusion,Gradient,Gradient,Gradient,Gradient
Unnamed: 0_level_1,UNK,LM resampling,Grad.,Grad.*input,Saliency,Integrated grad.
Method,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
UNK,1.0,0.580645,0.002294,-0.034539,0.22277,0.323415
LM resampling,0.580645,1.0,0.003561,-0.027769,0.272469,0.275429
Grad.,0.002294,0.003561,1.0,-0.000932,0.003702,0.002953
Grad.*input,-0.034539,-0.027769,-0.000932,1.0,0.029389,0.002813
Saliency,0.22277,0.272469,0.003702,0.029389,1.0,0.169672
Integrated grad.,0.323415,0.275429,0.002953,0.002813,0.169672,1.0


In [34]:
# from IPython.core.display import HTML
# from xbert.visualization import visualize_relevances

In [42]:
# n = 5
# labels_true = labels[:n]
# labels_pred = [MNLI_IDX2LABEL[predict(instance, model, tokenizer, CUDA_DEVICE)[0].argmax().item()] for instance in input_instances[:n]]

In [43]:
# viz = []
# for strategy in ALL_STRATEGIES:
#     print(strategy)
#     viz.append(visualize_relevances(input_instances[:n], experiment_relevances[strategy], labels_true, labels_pred))
# HTML("</br></br>".join(viz))

unk
resampling
grad
gradxinput
saliency
integratedgrad
