In [1]:
!pip install datasets

Collecting datasets
  Downloading datasets-3.6.0-py3-none-any.whl.metadata (19 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2025.3.0,>=2023.1.0 (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets)
  Downloading fsspec-2025.3.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.6.0-py3-none-any.whl (491 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.5/491.5 kB[0m [31m11.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m9.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2025.3.0-py3-none-any.whl (

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from datasets import load_dataset
import shap
import numpy as np
import time
import pandas as pd
from tqdm import tqdm

In [None]:
# Configuration
access_token = ""  # Replace with your token
model_name = "meta-llama/Llama-3.1-8B-Instruct"
max_length = 256  # Reduced sequence length for stability

# Label mapping
label_map = {
    0: 'entailment',
    1: 'neutral',
    2: 'contradiction'
}

# Load model and tokenizer
print("Loading model...")
tokenizer = AutoTokenizer.from_pretrained(model_name, token=access_token)
model = AutoModelForCausalLM.from_pretrained(model_name, token=access_token,
                                           torch_dtype=torch.float16,
                                           device_map="auto")

# Fix tokenizer settings
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"

model.eval()

Loading model...


tokenizer_config.json:   0%|          | 0.00/50.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/301 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/843 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.47G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/185 [00:00<?, ?B/s]

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 2048)
    (layers): ModuleList(
      (0-15): 16 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (k_proj): Linear(in_features=2048, out_features=512, bias=False)
          (v_proj): Linear(in_features=2048, out_features=512, bias=False)
          (o_proj): Linear(in_features=2048, out_features=2048, bias=False)
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=2048, out_features=8192, bias=False)
          (up_proj): Linear(in_features=2048, out_features=8192, bias=False)
          (down_proj): Linear(in_features=8192, out_features=2048, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((2048,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((2048,), eps=1e-05)
      )
    )
    (norm): LlamaRMSNorm((2048,), eps=1e-05)
    (rotary_emb):

In [None]:
# Load dataset
num_samples = 500
print("Loading dataset...")
dataset = load_dataset("esnli", split=f"validation[:{num_samples}]")

Loading dataset...


In [None]:
# Improved prediction function
def safe_predict(texts):
    try:
        if isinstance(texts, str):
            texts = [texts]
        elif isinstance(texts, np.ndarray):
            texts = texts.tolist()

        # Ensure we have a list of strings
        if not all(isinstance(t, str) for t in texts):
            texts = [str(t) for t in texts]

        inputs = tokenizer(texts, return_tensors="pt",
                         padding=True, truncation=True,
                         max_length=max_length).to(model.device)

        with torch.no_grad():
            outputs = model(**inputs)

        # Return logits for the last token only
        return outputs.logits[:, -1, :3].cpu().numpy()  # Only first 3 dimensions for our labels

    except Exception as e:
        print(f"\nPrediction error: {str(e)}")
        return np.zeros((len(texts), 3))  # Return neutral predictions on error


In [None]:
# Robust SHAP explainer
def get_shap_values(text):
    try:
        # Create explainer with safety checks
        masker = shap.maskers.Text(tokenizer, mask_token=tokenizer.eos_token)
        explainer = shap.Explainer(
            safe_predict,
            masker,
            algorithm="permutation",
            max_evals=200,  # Reduced for speed
            output_names=list(label_map.values()))

        with torch.no_grad():
            shap_values = explainer([text])
        return shap_values
    except Exception as e:
        print(f"\nSHAP error: {str(e)}")
        return None


In [None]:
# Robust explanation generator
def generate_explanation(prompt, max_new_tokens=100):
    try:
        inputs = tokenizer(prompt, return_tensors="pt",
                         max_length=max_length,
                         truncation=True).to(model.device)

        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=max_new_tokens,
                pad_token_id=tokenizer.eos_token_id,
                do_sample=True,
                temperature=0.7,
                top_p=0.9
            )

        full_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
        # Extract only the new generated part
        input_length = inputs.input_ids.shape[1]
        return full_text[input_length:].strip()
    except Exception as e:
        print(f"\nGeneration error: {str(e)}")
        return "Error generating explanation"

In [None]:
# Main processing loop
results_c = []
results_ic = []
print(f"\nProcessing {num_samples} samples...")
start_time = time.time()

for i, example in enumerate(tqdm(dataset, desc="Processing samples")):
    if i >= num_samples:
        break

    premise = example['premise']
    hypothesis = example['hypothesis']
    text = f"Premise: {premise}\nHypothesis: {hypothesis}"
    gt_label = label_map[example['label']]

    # Get prediction
    pred_prompt = f"""Classify this as entailment, neutral, or contradiction:

Premise: {premise}
Hypothesis: {hypothesis}
Answer:"""

    pred_label = generate_explanation(pred_prompt, max_new_tokens=10)
    pred_label = pred_label.lower().strip()

    # Clean and validate prediction
    pred_label = pred_label.replace('"', '').replace("'", "")
    if "entail" in pred_label:
        pred_label = "entailment"
    elif "neutral" in pred_label:
        pred_label = "neutral"
    elif "contradict" in pred_label:
        pred_label = "contradiction"
    else:
        pred_label = "neutral"  # Default fallback

    print(f"{gt_label} --- {pred_label}")

    # Get SHAP values
    shap_values = get_shap_values(text)
    shap_str = str([(token, values.tolist()) for token, values in zip(shap_values.data[0], shap_values.values[0])])

    if pred_label == gt_label:
      # Store results
      results_c.append({
        'premise': premise,
        'hypothesis': hypothesis,
        'gt_label': gt_label,
        'pred_label': pred_label,
        'shap_value': shap_str,
        'human_explanation': example.get('explanation_1', '')
      })

    else:
      # Store results
      results_ic.append({
        'premise': premise,
        'hypothesis': hypothesis,
        'gt_label': gt_label,
        'pred_label': pred_label,
        'shap_value': shap_str,
        'human_explanation': example.get('explanation_1', '')
      })



Processing 500 samples...


Processing samples:   0%|          | 0/500 [00:00<?, ?it/s]

neutral --- entailment


Processing samples:   0%|          | 1/500 [00:00<08:11,  1.01it/s]

entailment --- contradiction


Processing samples:   0%|          | 2/500 [00:02<10:21,  1.25s/it]

contradiction --- contradiction


Processing samples:   1%|          | 3/500 [00:03<10:48,  1.31s/it]

entailment --- entailment


Processing samples:   1%|          | 4/500 [00:04<09:26,  1.14s/it]

neutral --- contradiction


Processing samples:   1%|          | 5/500 [00:05<08:44,  1.06s/it]

contradiction --- entailment


Processing samples:   1%|          | 6/500 [00:06<08:12,  1.00it/s]

contradiction --- neutral


Processing samples:   1%|▏         | 7/500 [00:07<08:23,  1.02s/it]

neutral --- contradiction


Processing samples:   2%|▏         | 8/500 [00:09<10:26,  1.27s/it]

entailment --- entailment


Processing samples:   2%|▏         | 9/500 [00:10<10:39,  1.30s/it]

entailment --- entailment


Processing samples:   2%|▏         | 10/500 [00:12<11:09,  1.37s/it]

contradiction --- contradiction


Processing samples:   2%|▏         | 11/500 [00:13<12:05,  1.48s/it]

neutral --- entailment


Processing samples:   2%|▏         | 12/500 [00:16<14:32,  1.79s/it]

neutral --- entailment


Processing samples:   3%|▎         | 13/500 [00:17<13:22,  1.65s/it]

entailment --- entailment


Processing samples:   3%|▎         | 14/500 [00:18<11:58,  1.48s/it]

contradiction --- contradiction


Processing samples:   3%|▎         | 15/500 [00:20<11:15,  1.39s/it]

entailment --- entailment


Processing samples:   3%|▎         | 16/500 [00:21<12:09,  1.51s/it]

neutral --- neutral


Processing samples:   3%|▎         | 17/500 [00:23<12:42,  1.58s/it]

contradiction --- entailment


Processing samples:   4%|▎         | 18/500 [00:25<12:47,  1.59s/it]

entailment --- entailment


Processing samples:   4%|▍         | 19/500 [00:26<12:35,  1.57s/it]

contradiction --- contradiction


Processing samples:   4%|▍         | 20/500 [00:28<12:46,  1.60s/it]

contradiction --- contradiction


Processing samples:   4%|▍         | 21/500 [00:29<11:51,  1.49s/it]

entailment --- entailment


Processing samples:   4%|▍         | 22/500 [00:30<10:56,  1.37s/it]

neutral --- entailment


Processing samples:   5%|▍         | 23/500 [00:31<10:33,  1.33s/it]

contradiction --- entailment


Processing samples:   5%|▍         | 24/500 [00:33<10:13,  1.29s/it]

neutral --- entailment


Processing samples:   5%|▌         | 25/500 [00:34<09:56,  1.26s/it]

entailment --- entailment


Processing samples:   5%|▌         | 26/500 [00:35<09:35,  1.21s/it]

contradiction --- entailment


Processing samples:   5%|▌         | 27/500 [00:36<09:17,  1.18s/it]

contradiction --- entailment


Processing samples:   6%|▌         | 28/500 [00:37<09:00,  1.14s/it]

contradiction --- contradiction


Processing samples:   6%|▌         | 29/500 [00:38<09:23,  1.20s/it]

contradiction --- contradiction


Processing samples:   6%|▌         | 30/500 [00:40<11:16,  1.44s/it]

entailment --- entailment


Processing samples:   6%|▌         | 31/500 [00:42<11:45,  1.50s/it]

contradiction --- neutral


Processing samples:   6%|▋         | 32/500 [00:44<11:42,  1.50s/it]

neutral --- entailment


Processing samples:   7%|▋         | 33/500 [00:45<12:15,  1.57s/it]

entailment --- entailment


Processing samples:   7%|▋         | 34/500 [00:47<11:49,  1.52s/it]

contradiction --- entailment


Processing samples:   7%|▋         | 35/500 [00:48<11:28,  1.48s/it]

neutral --- contradiction


Processing samples:   7%|▋         | 36/500 [00:50<11:30,  1.49s/it]

entailment --- neutral


Processing samples:   7%|▋         | 37/500 [00:52<12:35,  1.63s/it]

contradiction --- entailment


Processing samples:   8%|▊         | 38/500 [00:54<14:25,  1.87s/it]

neutral --- neutral


Processing samples:   8%|▊         | 39/500 [00:56<15:10,  1.97s/it]

contradiction --- contradiction


Processing samples:   8%|▊         | 40/500 [00:58<14:01,  1.83s/it]

neutral --- neutral


Processing samples:   8%|▊         | 41/500 [00:59<13:16,  1.74s/it]

neutral --- entailment


Processing samples:   8%|▊         | 42/500 [01:01<13:03,  1.71s/it]

contradiction --- entailment


Processing samples:   9%|▊         | 43/500 [01:02<11:54,  1.56s/it]

entailment --- neutral


Processing samples:   9%|▉         | 44/500 [01:03<11:03,  1.46s/it]

neutral --- entailment


Processing samples:   9%|▉         | 45/500 [01:07<16:58,  2.24s/it]

contradiction --- entailment


Processing samples:   9%|▉         | 46/500 [01:11<19:47,  2.61s/it]

entailment --- entailment


Processing samples:   9%|▉         | 47/500 [01:13<18:46,  2.49s/it]

neutral --- contradiction


Processing samples:  10%|▉         | 48/500 [01:15<16:43,  2.22s/it]

neutral --- entailment


Processing samples:  10%|▉         | 49/500 [01:16<15:47,  2.10s/it]

contradiction --- entailment


Processing samples:  10%|█         | 50/500 [01:19<16:08,  2.15s/it]

entailment --- contradiction


Processing samples:  10%|█         | 51/500 [01:20<14:53,  1.99s/it]

neutral --- neutral


Processing samples:  10%|█         | 52/500 [01:22<13:59,  1.87s/it]

contradiction --- neutral


Processing samples:  11%|█         | 53/500 [01:24<13:26,  1.80s/it]

entailment --- neutral


Processing samples:  11%|█         | 54/500 [01:25<13:10,  1.77s/it]

neutral --- entailment


Processing samples:  11%|█         | 55/500 [01:27<13:24,  1.81s/it]

entailment --- entailment


Processing samples:  11%|█         | 56/500 [01:28<12:08,  1.64s/it]

contradiction --- contradiction


Processing samples:  11%|█▏        | 57/500 [01:30<11:43,  1.59s/it]

entailment --- neutral


Processing samples:  12%|█▏        | 58/500 [01:31<11:35,  1.57s/it]

contradiction --- entailment


Processing samples:  12%|█▏        | 59/500 [01:33<11:01,  1.50s/it]

neutral --- contradiction


Processing samples:  12%|█▏        | 60/500 [01:34<10:48,  1.47s/it]

contradiction --- entailment


Processing samples:  12%|█▏        | 61/500 [01:35<09:53,  1.35s/it]

neutral --- entailment


Processing samples:  12%|█▏        | 62/500 [01:36<09:24,  1.29s/it]

entailment --- entailment


Processing samples:  13%|█▎        | 63/500 [01:38<09:22,  1.29s/it]

contradiction --- neutral


Processing samples:  13%|█▎        | 64/500 [01:39<09:15,  1.28s/it]

neutral --- entailment


Processing samples:  13%|█▎        | 65/500 [01:40<08:37,  1.19s/it]

entailment --- entailment


Processing samples:  13%|█▎        | 66/500 [01:41<08:10,  1.13s/it]

contradiction --- entailment


Processing samples:  13%|█▎        | 67/500 [01:42<08:37,  1.19s/it]

neutral --- entailment


Processing samples:  14%|█▎        | 68/500 [01:43<08:36,  1.20s/it]

contradiction --- contradiction


Processing samples:  14%|█▍        | 69/500 [01:45<08:49,  1.23s/it]

entailment --- entailment


Processing samples:  14%|█▍        | 70/500 [01:46<08:43,  1.22s/it]

neutral --- neutral


Processing samples:  14%|█▍        | 71/500 [01:47<09:11,  1.29s/it]

contradiction --- entailment


Processing samples:  14%|█▍        | 72/500 [01:48<08:30,  1.19s/it]

entailment --- neutral


Processing samples:  15%|█▍        | 73/500 [01:49<08:00,  1.12s/it]

neutral --- entailment


Processing samples:  15%|█▍        | 74/500 [01:50<07:39,  1.08s/it]

contradiction --- neutral


Processing samples:  15%|█▌        | 75/500 [01:51<07:43,  1.09s/it]

entailment --- neutral


Processing samples:  15%|█▌        | 76/500 [01:53<07:45,  1.10s/it]

neutral --- entailment


Processing samples:  15%|█▌        | 77/500 [01:54<08:14,  1.17s/it]

neutral --- entailment


Processing samples:  16%|█▌        | 78/500 [01:55<08:56,  1.27s/it]

contradiction --- entailment


Processing samples:  16%|█▌        | 79/500 [01:56<08:17,  1.18s/it]

entailment --- neutral


Processing samples:  16%|█▌        | 80/500 [01:57<07:50,  1.12s/it]

neutral --- entailment


Processing samples:  16%|█▌        | 81/500 [01:58<07:30,  1.07s/it]

entailment --- neutral


Processing samples:  16%|█▋        | 82/500 [01:59<07:22,  1.06s/it]

contradiction --- entailment


Processing samples:  17%|█▋        | 83/500 [02:01<07:47,  1.12s/it]

contradiction --- neutral


Processing samples:  17%|█▋        | 84/500 [02:02<07:44,  1.12s/it]

entailment --- contradiction


Processing samples:  17%|█▋        | 85/500 [02:03<07:37,  1.10s/it]

neutral --- neutral


Processing samples:  17%|█▋        | 86/500 [02:04<07:44,  1.12s/it]

entailment --- contradiction


Processing samples:  17%|█▋        | 87/500 [02:05<07:55,  1.15s/it]

entailment --- entailment


Processing samples:  18%|█▊        | 88/500 [02:07<08:43,  1.27s/it]

contradiction --- neutral


Processing samples:  18%|█▊        | 89/500 [02:08<08:40,  1.27s/it]

neutral --- entailment


Processing samples:  18%|█▊        | 90/500 [02:09<08:24,  1.23s/it]

contradiction --- entailment


Processing samples:  18%|█▊        | 91/500 [02:10<08:02,  1.18s/it]

entailment --- entailment


Processing samples:  18%|█▊        | 92/500 [02:11<07:45,  1.14s/it]

neutral --- entailment


Processing samples:  19%|█▊        | 93/500 [02:12<07:53,  1.16s/it]

contradiction --- entailment


Processing samples:  19%|█▉        | 94/500 [02:14<07:55,  1.17s/it]

entailment --- entailment


Processing samples:  19%|█▉        | 95/500 [02:15<07:57,  1.18s/it]

entailment --- entailment


Processing samples:  19%|█▉        | 96/500 [02:16<07:39,  1.14s/it]

contradiction --- entailment


Processing samples:  19%|█▉        | 97/500 [02:17<08:02,  1.20s/it]

entailment --- entailment


Processing samples:  20%|█▉        | 98/500 [02:19<08:17,  1.24s/it]

neutral --- neutral


Processing samples:  20%|█▉        | 99/500 [02:20<08:13,  1.23s/it]

contradiction --- entailment


Processing samples:  20%|██        | 100/500 [02:21<07:56,  1.19s/it]

entailment --- neutral


Processing samples:  20%|██        | 101/500 [02:22<07:43,  1.16s/it]

neutral --- entailment


Processing samples:  20%|██        | 102/500 [02:23<07:45,  1.17s/it]

contradiction --- entailment


Processing samples:  21%|██        | 103/500 [02:24<07:34,  1.14s/it]

entailment --- entailment


Processing samples:  21%|██        | 104/500 [02:25<07:37,  1.16s/it]

entailment --- neutral


Processing samples:  21%|██        | 105/500 [02:26<07:13,  1.10s/it]

neutral --- entailment


Processing samples:  21%|██        | 106/500 [02:27<06:57,  1.06s/it]

contradiction --- neutral


Processing samples:  21%|██▏       | 107/500 [02:28<06:44,  1.03s/it]

neutral --- neutral


Processing samples:  22%|██▏       | 108/500 [02:29<06:42,  1.03s/it]

contradiction --- entailment


Processing samples:  22%|██▏       | 109/500 [02:31<07:45,  1.19s/it]

entailment --- contradiction


Processing samples:  22%|██▏       | 110/500 [02:32<08:02,  1.24s/it]

entailment --- neutral


Processing samples:  22%|██▏       | 111/500 [02:33<07:40,  1.19s/it]

neutral --- entailment


Processing samples:  22%|██▏       | 112/500 [02:34<07:25,  1.15s/it]

contradiction --- entailment


Processing samples:  23%|██▎       | 113/500 [02:35<07:23,  1.15s/it]

contradiction --- entailment


Processing samples:  23%|██▎       | 114/500 [02:37<07:35,  1.18s/it]

entailment --- entailment


Processing samples:  23%|██▎       | 115/500 [02:38<07:30,  1.17s/it]

neutral --- entailment


Processing samples:  23%|██▎       | 116/500 [02:39<07:36,  1.19s/it]

entailment --- entailment


Processing samples:  23%|██▎       | 117/500 [02:40<07:35,  1.19s/it]

entailment --- entailment


Processing samples:  24%|██▎       | 118/500 [02:42<07:52,  1.24s/it]

contradiction --- contradiction


Processing samples:  24%|██▍       | 119/500 [02:43<08:36,  1.36s/it]

contradiction --- neutral


Processing samples:  24%|██▍       | 120/500 [02:44<08:01,  1.27s/it]

contradiction --- entailment


Processing samples:  24%|██▍       | 121/500 [02:45<07:36,  1.20s/it]

entailment --- entailment


Processing samples:  24%|██▍       | 122/500 [02:46<07:18,  1.16s/it]

entailment --- entailment


Processing samples:  25%|██▍       | 123/500 [02:48<07:30,  1.20s/it]

neutral --- entailment


Processing samples:  25%|██▍       | 124/500 [02:49<07:24,  1.18s/it]

contradiction --- neutral


Processing samples:  25%|██▌       | 125/500 [02:50<07:24,  1.19s/it]

neutral --- neutral


Processing samples:  25%|██▌       | 126/500 [02:52<08:29,  1.36s/it]

contradiction --- neutral


Processing samples:  25%|██▌       | 127/500 [02:53<08:18,  1.34s/it]

entailment --- neutral


Processing samples:  26%|██▌       | 128/500 [02:55<08:35,  1.39s/it]

entailment --- entailment


Processing samples:  26%|██▌       | 129/500 [02:56<08:22,  1.36s/it]

contradiction --- entailment


Processing samples:  26%|██▌       | 130/500 [02:57<07:58,  1.29s/it]

neutral --- neutral


Processing samples:  26%|██▌       | 131/500 [02:58<07:49,  1.27s/it]

entailment --- contradiction


Processing samples:  26%|██▋       | 132/500 [02:59<07:33,  1.23s/it]

contradiction --- entailment


Processing samples:  27%|██▋       | 133/500 [03:01<07:23,  1.21s/it]

neutral --- neutral


Processing samples:  27%|██▋       | 134/500 [03:02<07:14,  1.19s/it]

neutral --- entailment


Processing samples:  27%|██▋       | 135/500 [03:03<07:09,  1.18s/it]

contradiction --- entailment


Processing samples:  27%|██▋       | 136/500 [03:04<06:51,  1.13s/it]

neutral --- neutral


Processing samples:  27%|██▋       | 137/500 [03:05<06:55,  1.14s/it]

entailment --- neutral


Processing samples:  28%|██▊       | 138/500 [03:06<07:18,  1.21s/it]

contradiction --- entailment


Processing samples:  28%|██▊       | 139/500 [03:08<07:46,  1.29s/it]

neutral --- contradiction


Processing samples:  28%|██▊       | 140/500 [03:09<07:54,  1.32s/it]

neutral --- entailment


Processing samples:  28%|██▊       | 141/500 [03:11<08:18,  1.39s/it]

neutral --- entailment


Processing samples:  28%|██▊       | 142/500 [03:12<08:25,  1.41s/it]

neutral --- entailment


Processing samples:  29%|██▊       | 143/500 [03:14<08:21,  1.40s/it]

neutral --- entailment


Processing samples:  29%|██▉       | 144/500 [03:15<07:59,  1.35s/it]

entailment --- neutral


Processing samples:  29%|██▉       | 145/500 [03:16<07:42,  1.30s/it]

contradiction --- entailment


Processing samples:  29%|██▉       | 146/500 [03:17<07:09,  1.21s/it]

entailment --- entailment


Processing samples:  29%|██▉       | 147/500 [03:19<07:44,  1.31s/it]

contradiction --- neutral


Processing samples:  30%|██▉       | 148/500 [03:20<07:39,  1.31s/it]

entailment --- neutral


Processing samples:  30%|██▉       | 149/500 [03:21<07:28,  1.28s/it]

contradiction --- contradiction


Processing samples:  30%|███       | 150/500 [03:22<07:19,  1.25s/it]

entailment --- entailment


Processing samples:  30%|███       | 151/500 [03:23<07:00,  1.20s/it]

neutral --- entailment


Processing samples:  30%|███       | 152/500 [03:25<06:57,  1.20s/it]

contradiction --- entailment


Processing samples:  31%|███       | 153/500 [03:26<06:40,  1.15s/it]

neutral --- neutral


Processing samples:  31%|███       | 154/500 [03:27<06:40,  1.16s/it]

entailment --- entailment


Processing samples:  31%|███       | 155/500 [03:28<06:39,  1.16s/it]

neutral --- neutral


Processing samples:  31%|███       | 156/500 [03:29<06:30,  1.14s/it]

neutral --- entailment


Processing samples:  31%|███▏      | 157/500 [03:30<06:51,  1.20s/it]

entailment --- entailment


Processing samples:  32%|███▏      | 158/500 [03:32<07:00,  1.23s/it]

contradiction --- contradiction


Processing samples:  32%|███▏      | 159/500 [03:33<07:00,  1.23s/it]

entailment --- contradiction


Processing samples:  32%|███▏      | 160/500 [03:34<06:42,  1.18s/it]

neutral --- entailment


Processing samples:  32%|███▏      | 161/500 [03:35<06:27,  1.14s/it]

neutral --- entailment


Processing samples:  32%|███▏      | 162/500 [03:36<06:20,  1.13s/it]

entailment --- entailment


Processing samples:  33%|███▎      | 163/500 [03:37<06:28,  1.15s/it]

contradiction --- entailment


Processing samples:  33%|███▎      | 164/500 [03:38<06:15,  1.12s/it]

neutral --- contradiction


Processing samples:  33%|███▎      | 165/500 [03:40<06:13,  1.11s/it]

entailment --- entailment


Processing samples:  33%|███▎      | 166/500 [03:41<06:21,  1.14s/it]

contradiction --- entailment


Processing samples:  33%|███▎      | 167/500 [03:42<06:26,  1.16s/it]

neutral --- entailment


Processing samples:  34%|███▎      | 168/500 [03:43<06:44,  1.22s/it]

entailment --- entailment


Processing samples:  34%|███▍      | 169/500 [03:45<07:03,  1.28s/it]

contradiction --- neutral


Processing samples:  34%|███▍      | 170/500 [03:46<06:42,  1.22s/it]

neutral --- contradiction


Processing samples:  34%|███▍      | 171/500 [03:47<06:46,  1.23s/it]

contradiction --- entailment


Processing samples:  34%|███▍      | 172/500 [03:48<06:41,  1.22s/it]

entailment --- neutral


Processing samples:  35%|███▍      | 173/500 [03:49<06:33,  1.20s/it]

contradiction --- contradiction


Processing samples:  35%|███▍      | 174/500 [03:51<06:33,  1.21s/it]

neutral --- entailment


Processing samples:  35%|███▌      | 175/500 [03:52<06:22,  1.18s/it]

entailment --- entailment


Processing samples:  35%|███▌      | 176/500 [03:53<06:24,  1.19s/it]

entailment --- entailment


Processing samples:  35%|███▌      | 177/500 [03:54<06:28,  1.20s/it]

neutral --- entailment


Processing samples:  36%|███▌      | 178/500 [03:56<07:10,  1.34s/it]

contradiction --- contradiction


Processing samples:  36%|███▌      | 179/500 [03:57<06:55,  1.29s/it]

contradiction --- contradiction


Processing samples:  36%|███▌      | 180/500 [03:58<06:40,  1.25s/it]

neutral --- neutral


Processing samples:  36%|███▌      | 181/500 [03:59<06:29,  1.22s/it]

entailment --- neutral


Processing samples:  36%|███▋      | 182/500 [04:00<06:13,  1.17s/it]

contradiction --- entailment


Processing samples:  37%|███▋      | 183/500 [04:02<06:11,  1.17s/it]

entailment --- entailment


Processing samples:  37%|███▋      | 184/500 [04:03<06:08,  1.17s/it]

neutral --- neutral


Processing samples:  37%|███▋      | 185/500 [04:04<06:06,  1.16s/it]

contradiction --- entailment


Processing samples:  37%|███▋      | 186/500 [04:05<06:08,  1.17s/it]

neutral --- contradiction


Processing samples:  37%|███▋      | 187/500 [04:06<05:55,  1.14s/it]

entailment --- neutral


Processing samples:  38%|███▊      | 188/500 [04:08<06:23,  1.23s/it]

contradiction --- entailment


Processing samples:  38%|███▊      | 189/500 [04:09<06:20,  1.22s/it]

neutral --- entailment


Processing samples:  38%|███▊      | 190/500 [04:10<06:12,  1.20s/it]

entailment --- entailment


Processing samples:  38%|███▊      | 191/500 [04:11<05:56,  1.15s/it]

entailment --- entailment


Processing samples:  38%|███▊      | 192/500 [04:12<05:55,  1.16s/it]

neutral --- neutral


Processing samples:  39%|███▊      | 193/500 [04:13<06:06,  1.19s/it]

contradiction --- entailment


Processing samples:  39%|███▉      | 194/500 [04:15<06:04,  1.19s/it]

contradiction --- entailment


Processing samples:  39%|███▉      | 195/500 [04:16<05:44,  1.13s/it]

entailment --- entailment


Processing samples:  39%|███▉      | 196/500 [04:17<05:38,  1.11s/it]

neutral --- neutral


Processing samples:  39%|███▉      | 197/500 [04:18<05:43,  1.13s/it]

contradiction --- entailment


Processing samples:  40%|███▉      | 198/500 [04:19<06:18,  1.25s/it]

entailment --- entailment


Processing samples:  40%|███▉      | 199/500 [04:21<06:22,  1.27s/it]

contradiction --- entailment


Processing samples:  40%|████      | 200/500 [04:22<06:03,  1.21s/it]

neutral --- entailment


Processing samples:  40%|████      | 201/500 [04:23<05:48,  1.17s/it]

contradiction --- entailment


Processing samples:  40%|████      | 202/500 [04:24<05:38,  1.13s/it]

contradiction --- entailment


Processing samples:  41%|████      | 203/500 [04:25<05:32,  1.12s/it]

entailment --- entailment


Processing samples:  41%|████      | 204/500 [04:26<05:58,  1.21s/it]

neutral --- entailment


Processing samples:  41%|████      | 205/500 [04:28<06:47,  1.38s/it]

contradiction --- contradiction


Processing samples:  41%|████      | 206/500 [04:29<06:22,  1.30s/it]

entailment --- entailment


Processing samples:  41%|████▏     | 207/500 [04:30<05:59,  1.23s/it]

neutral --- entailment


Processing samples:  42%|████▏     | 208/500 [04:32<06:25,  1.32s/it]

contradiction --- entailment


Processing samples:  42%|████▏     | 209/500 [04:33<06:03,  1.25s/it]

neutral --- entailment


Processing samples:  42%|████▏     | 210/500 [04:35<06:32,  1.35s/it]

entailment --- entailment


Processing samples:  42%|████▏     | 211/500 [04:36<06:32,  1.36s/it]

contradiction --- contradiction


Processing samples:  42%|████▏     | 212/500 [04:38<07:11,  1.50s/it]

neutral --- neutral


Processing samples:  43%|████▎     | 213/500 [04:39<07:07,  1.49s/it]

neutral --- contradiction


Processing samples:  43%|████▎     | 214/500 [04:41<07:55,  1.66s/it]

contradiction --- entailment


Processing samples:  43%|████▎     | 215/500 [04:43<07:19,  1.54s/it]

neutral --- entailment


Processing samples:  43%|████▎     | 216/500 [04:44<07:02,  1.49s/it]

entailment --- neutral


Processing samples:  43%|████▎     | 217/500 [04:45<06:51,  1.45s/it]

neutral --- contradiction


Processing samples:  44%|████▎     | 218/500 [04:46<06:16,  1.34s/it]

neutral --- entailment


Processing samples:  44%|████▍     | 219/500 [04:48<06:06,  1.30s/it]

entailment --- entailment


Processing samples:  44%|████▍     | 220/500 [04:49<05:51,  1.25s/it]

neutral --- entailment


Processing samples:  44%|████▍     | 221/500 [04:50<05:44,  1.23s/it]

contradiction --- neutral


Processing samples:  44%|████▍     | 222/500 [04:51<05:41,  1.23s/it]

entailment --- neutral


Processing samples:  45%|████▍     | 223/500 [04:52<05:30,  1.19s/it]

neutral --- entailment


Processing samples:  45%|████▍     | 224/500 [04:53<05:19,  1.16s/it]

contradiction --- entailment


Processing samples:  45%|████▌     | 225/500 [04:54<05:09,  1.13s/it]

contradiction --- neutral


Processing samples:  45%|████▌     | 226/500 [04:57<06:45,  1.48s/it]

neutral --- contradiction


Processing samples:  45%|████▌     | 227/500 [04:58<06:20,  1.39s/it]

entailment --- entailment


Processing samples:  46%|████▌     | 228/500 [04:59<06:04,  1.34s/it]

entailment --- neutral


Processing samples:  46%|████▌     | 229/500 [05:00<05:56,  1.32s/it]

neutral --- contradiction


Processing samples:  46%|████▌     | 230/500 [05:01<05:34,  1.24s/it]

contradiction --- contradiction


Processing samples:  46%|████▌     | 231/500 [05:02<05:17,  1.18s/it]

contradiction --- entailment


Processing samples:  46%|████▋     | 232/500 [05:03<04:59,  1.12s/it]

entailment --- entailment


Processing samples:  47%|████▋     | 233/500 [05:04<04:47,  1.08s/it]

contradiction --- neutral


Processing samples:  47%|████▋     | 234/500 [05:06<05:00,  1.13s/it]

neutral --- entailment


Processing samples:  47%|████▋     | 235/500 [05:07<04:52,  1.10s/it]

neutral --- contradiction


Processing samples:  47%|████▋     | 236/500 [05:08<05:28,  1.25s/it]

contradiction --- entailment


Processing samples:  47%|████▋     | 237/500 [05:10<05:41,  1.30s/it]

contradiction --- neutral


Processing samples:  48%|████▊     | 238/500 [05:11<05:38,  1.29s/it]

entailment --- neutral


Processing samples:  48%|████▊     | 239/500 [05:12<05:42,  1.31s/it]

neutral --- neutral


Processing samples:  48%|████▊     | 240/500 [05:13<05:15,  1.21s/it]

neutral --- entailment


Processing samples:  48%|████▊     | 241/500 [05:14<05:01,  1.17s/it]

contradiction --- entailment


Processing samples:  48%|████▊     | 242/500 [05:15<04:51,  1.13s/it]

entailment --- contradiction


Processing samples:  49%|████▊     | 243/500 [05:17<05:01,  1.17s/it]

contradiction --- contradiction


Processing samples:  49%|████▉     | 244/500 [05:18<04:53,  1.15s/it]

entailment --- entailment


Processing samples:  49%|████▉     | 245/500 [05:19<04:57,  1.17s/it]

neutral --- entailment


Processing samples:  49%|████▉     | 246/500 [05:21<05:22,  1.27s/it]

entailment --- entailment


Processing samples:  49%|████▉     | 247/500 [05:22<05:29,  1.30s/it]

entailment --- entailment


Processing samples:  50%|████▉     | 248/500 [05:23<05:25,  1.29s/it]

contradiction --- entailment


Processing samples:  50%|████▉     | 249/500 [05:24<05:14,  1.25s/it]

neutral --- neutral


Processing samples:  50%|█████     | 250/500 [05:26<05:12,  1.25s/it]

contradiction --- contradiction


Processing samples:  50%|█████     | 251/500 [05:27<04:58,  1.20s/it]

entailment --- entailment


Processing samples:  50%|█████     | 252/500 [05:28<04:57,  1.20s/it]

contradiction --- entailment


Processing samples:  51%|█████     | 253/500 [05:29<04:47,  1.16s/it]

neutral --- entailment


Processing samples:  51%|█████     | 254/500 [05:30<04:40,  1.14s/it]

neutral --- entailment


Processing samples:  51%|█████     | 255/500 [05:31<04:44,  1.16s/it]

contradiction --- entailment


Processing samples:  51%|█████     | 256/500 [05:33<05:15,  1.29s/it]

neutral --- entailment


Processing samples:  51%|█████▏    | 257/500 [05:34<05:11,  1.28s/it]

entailment --- entailment


Processing samples:  52%|█████▏    | 258/500 [05:35<04:53,  1.21s/it]

neutral --- entailment


Processing samples:  52%|█████▏    | 259/500 [05:36<04:51,  1.21s/it]

entailment --- entailment


Processing samples:  52%|█████▏    | 260/500 [05:37<04:41,  1.17s/it]

contradiction --- entailment


Processing samples:  52%|█████▏    | 261/500 [05:39<04:44,  1.19s/it]

entailment --- entailment


Processing samples:  52%|█████▏    | 262/500 [05:40<04:27,  1.13s/it]

entailment --- neutral


Processing samples:  53%|█████▎    | 263/500 [05:41<04:20,  1.10s/it]

contradiction --- neutral


Processing samples:  53%|█████▎    | 264/500 [05:42<04:12,  1.07s/it]

neutral --- entailment


Processing samples:  53%|█████▎    | 265/500 [05:43<04:24,  1.13s/it]

contradiction --- neutral


Processing samples:  53%|█████▎    | 266/500 [05:44<04:54,  1.26s/it]

entailment --- neutral


Processing samples:  53%|█████▎    | 267/500 [05:46<05:06,  1.32s/it]

entailment --- entailment


Processing samples:  54%|█████▎    | 268/500 [05:47<05:10,  1.34s/it]

contradiction --- entailment


Processing samples:  54%|█████▍    | 269/500 [05:49<05:12,  1.35s/it]

entailment --- entailment


Processing samples:  54%|█████▍    | 270/500 [05:50<05:18,  1.38s/it]

contradiction --- neutral


Processing samples:  54%|█████▍    | 271/500 [05:51<04:55,  1.29s/it]

entailment --- entailment


Processing samples:  54%|█████▍    | 272/500 [05:52<04:38,  1.22s/it]

neutral --- entailment


Processing samples:  55%|█████▍    | 273/500 [05:53<04:29,  1.19s/it]

neutral --- entailment


Processing samples:  55%|█████▍    | 274/500 [05:54<04:18,  1.15s/it]

entailment --- contradiction


Processing samples:  55%|█████▌    | 275/500 [05:56<04:22,  1.17s/it]

contradiction --- entailment


Processing samples:  55%|█████▌    | 276/500 [05:57<04:44,  1.27s/it]

entailment --- entailment


Processing samples:  55%|█████▌    | 277/500 [05:58<04:33,  1.23s/it]

neutral --- entailment


Processing samples:  56%|█████▌    | 278/500 [05:59<04:30,  1.22s/it]

contradiction --- entailment


Processing samples:  56%|█████▌    | 279/500 [06:01<04:25,  1.20s/it]

neutral --- entailment


Processing samples:  56%|█████▌    | 280/500 [06:02<04:13,  1.15s/it]

contradiction --- neutral


Processing samples:  56%|█████▌    | 281/500 [06:03<04:17,  1.18s/it]

neutral --- entailment


Processing samples:  56%|█████▋    | 282/500 [06:04<04:08,  1.14s/it]

entailment --- neutral


Processing samples:  57%|█████▋    | 283/500 [06:05<04:16,  1.18s/it]

contradiction --- entailment


Processing samples:  57%|█████▋    | 284/500 [06:07<04:20,  1.21s/it]

entailment --- entailment


Processing samples:  57%|█████▋    | 285/500 [06:08<04:21,  1.21s/it]

contradiction --- neutral


Processing samples:  57%|█████▋    | 286/500 [06:09<04:44,  1.33s/it]

neutral --- neutral


Processing samples:  57%|█████▋    | 287/500 [06:10<04:30,  1.27s/it]

entailment --- entailment


Processing samples:  58%|█████▊    | 288/500 [06:12<04:25,  1.25s/it]

neutral --- contradiction


Processing samples:  58%|█████▊    | 289/500 [06:13<04:13,  1.20s/it]

contradiction --- entailment


Processing samples:  58%|█████▊    | 290/500 [06:14<04:14,  1.21s/it]

entailment --- entailment


Processing samples:  58%|█████▊    | 291/500 [06:15<04:03,  1.17s/it]

contradiction --- contradiction


Processing samples:  58%|█████▊    | 292/500 [06:16<03:56,  1.14s/it]

neutral --- entailment


Processing samples:  59%|█████▊    | 293/500 [06:17<03:56,  1.14s/it]

entailment --- entailment


Processing samples:  59%|█████▉    | 294/500 [06:18<03:55,  1.15s/it]

neutral --- entailment


Processing samples:  59%|█████▉    | 295/500 [06:20<03:58,  1.16s/it]

contradiction --- entailment


Processing samples:  59%|█████▉    | 296/500 [06:21<04:08,  1.22s/it]

entailment --- neutral


Processing samples:  59%|█████▉    | 297/500 [06:22<04:19,  1.28s/it]

neutral --- entailment


Processing samples:  60%|█████▉    | 298/500 [06:24<04:17,  1.28s/it]

contradiction --- entailment


Processing samples:  60%|█████▉    | 299/500 [06:25<03:57,  1.18s/it]

contradiction --- contradiction


Processing samples:  60%|██████    | 300/500 [06:26<03:54,  1.17s/it]

entailment --- neutral


Processing samples:  60%|██████    | 301/500 [06:27<03:51,  1.16s/it]

contradiction --- entailment


Processing samples:  60%|██████    | 302/500 [06:28<03:48,  1.15s/it]

neutral --- contradiction


Processing samples:  61%|██████    | 303/500 [06:29<03:44,  1.14s/it]

entailment --- neutral


Processing samples:  61%|██████    | 304/500 [06:30<03:46,  1.16s/it]

contradiction --- entailment


Processing samples:  61%|██████    | 305/500 [06:31<03:42,  1.14s/it]

contradiction --- entailment


Processing samples:  61%|██████    | 306/500 [06:33<04:00,  1.24s/it]

neutral --- contradiction


Processing samples:  61%|██████▏   | 307/500 [06:34<04:07,  1.28s/it]

entailment --- neutral


Processing samples:  62%|██████▏   | 308/500 [06:35<03:54,  1.22s/it]

neutral --- entailment


Processing samples:  62%|██████▏   | 309/500 [06:37<03:55,  1.23s/it]

contradiction --- neutral


Processing samples:  62%|██████▏   | 310/500 [06:38<03:56,  1.24s/it]

entailment --- entailment


Processing samples:  62%|██████▏   | 311/500 [06:39<03:55,  1.24s/it]

neutral --- entailment


Processing samples:  62%|██████▏   | 312/500 [06:40<03:52,  1.24s/it]

contradiction --- neutral


Processing samples:  63%|██████▎   | 313/500 [06:41<03:40,  1.18s/it]

entailment --- entailment


Processing samples:  63%|██████▎   | 314/500 [06:43<03:41,  1.19s/it]

entailment --- entailment


Processing samples:  63%|██████▎   | 315/500 [06:44<03:45,  1.22s/it]

contradiction --- entailment


Processing samples:  63%|██████▎   | 316/500 [06:46<04:07,  1.35s/it]

neutral --- entailment


Processing samples:  63%|██████▎   | 317/500 [06:47<03:58,  1.31s/it]

neutral --- entailment


Processing samples:  64%|██████▎   | 318/500 [06:48<03:39,  1.20s/it]

contradiction --- entailment


Processing samples:  64%|██████▍   | 319/500 [06:49<03:39,  1.21s/it]

entailment --- neutral


Processing samples:  64%|██████▍   | 320/500 [06:50<03:36,  1.20s/it]

entailment --- neutral


Processing samples:  64%|██████▍   | 321/500 [06:51<03:35,  1.20s/it]

neutral --- contradiction


Processing samples:  64%|██████▍   | 322/500 [06:53<03:29,  1.17s/it]

contradiction --- entailment


Processing samples:  65%|██████▍   | 323/500 [06:54<03:25,  1.16s/it]

entailment --- neutral


Processing samples:  65%|██████▍   | 324/500 [06:55<03:25,  1.17s/it]

neutral --- neutral


Processing samples:  65%|██████▌   | 325/500 [06:56<03:17,  1.13s/it]

contradiction --- entailment


Processing samples:  65%|██████▌   | 326/500 [06:57<03:34,  1.23s/it]

neutral --- neutral


Processing samples:  65%|██████▌   | 327/500 [06:59<03:30,  1.22s/it]

entailment --- entailment


Processing samples:  66%|██████▌   | 328/500 [07:00<03:29,  1.22s/it]

contradiction --- entailment


Processing samples:  66%|██████▌   | 329/500 [07:01<03:21,  1.18s/it]

neutral --- contradiction


Processing samples:  66%|██████▌   | 330/500 [07:02<03:23,  1.20s/it]

entailment --- contradiction


Processing samples:  66%|██████▌   | 331/500 [07:03<03:14,  1.15s/it]

contradiction --- entailment


Processing samples:  66%|██████▋   | 332/500 [07:04<03:18,  1.18s/it]

neutral --- neutral


Processing samples:  67%|██████▋   | 333/500 [07:06<03:18,  1.19s/it]

neutral --- entailment


Processing samples:  67%|██████▋   | 334/500 [07:07<03:22,  1.22s/it]

entailment --- entailment


Processing samples:  67%|██████▋   | 335/500 [07:08<03:20,  1.21s/it]

contradiction --- neutral


Processing samples:  67%|██████▋   | 336/500 [07:10<03:30,  1.29s/it]

neutral --- neutral


Processing samples:  67%|██████▋   | 337/500 [07:11<03:34,  1.32s/it]

neutral --- entailment


Processing samples:  68%|██████▊   | 338/500 [07:12<03:22,  1.25s/it]

entailment --- entailment


Processing samples:  68%|██████▊   | 339/500 [07:13<03:13,  1.20s/it]

contradiction --- entailment


Processing samples:  68%|██████▊   | 340/500 [07:14<03:14,  1.22s/it]

neutral --- neutral


Processing samples:  68%|██████▊   | 341/500 [07:16<03:11,  1.21s/it]

entailment --- contradiction


Processing samples:  68%|██████▊   | 342/500 [07:17<03:12,  1.22s/it]

contradiction --- entailment


Processing samples:  69%|██████▊   | 343/500 [07:18<03:10,  1.21s/it]

entailment --- entailment


Processing samples:  69%|██████▉   | 344/500 [07:19<03:14,  1.25s/it]

contradiction --- contradiction


Processing samples:  69%|██████▉   | 345/500 [07:21<03:25,  1.33s/it]

neutral --- entailment


Processing samples:  69%|██████▉   | 346/500 [07:23<03:42,  1.44s/it]

entailment --- entailment


Processing samples:  69%|██████▉   | 347/500 [07:24<03:23,  1.33s/it]

neutral --- entailment


Processing samples:  70%|██████▉   | 348/500 [07:25<03:10,  1.25s/it]

contradiction --- contradiction


Processing samples:  70%|██████▉   | 349/500 [07:26<02:59,  1.19s/it]

entailment --- entailment


Processing samples:  70%|███████   | 350/500 [07:27<02:54,  1.16s/it]

entailment --- contradiction


Processing samples:  70%|███████   | 351/500 [07:28<02:49,  1.14s/it]

contradiction --- entailment


Processing samples:  70%|███████   | 352/500 [07:30<03:38,  1.47s/it]

contradiction --- entailment


Processing samples:  71%|███████   | 353/500 [07:33<04:26,  1.82s/it]

entailment --- contradiction


Processing samples:  71%|███████   | 354/500 [07:36<05:25,  2.23s/it]

entailment --- contradiction


Processing samples:  71%|███████   | 355/500 [07:37<04:51,  2.01s/it]

entailment --- contradiction


Processing samples:  71%|███████   | 356/500 [07:39<04:39,  1.94s/it]

neutral --- neutral


Processing samples:  71%|███████▏  | 357/500 [07:41<04:23,  1.84s/it]

neutral --- entailment


Processing samples:  72%|███████▏  | 358/500 [07:42<04:07,  1.74s/it]

neutral --- neutral


Processing samples:  72%|███████▏  | 359/500 [07:43<03:37,  1.54s/it]

contradiction --- entailment


Processing samples:  72%|███████▏  | 360/500 [07:44<03:16,  1.40s/it]

neutral --- entailment


Processing samples:  72%|███████▏  | 361/500 [07:46<03:12,  1.39s/it]

entailment --- entailment


Processing samples:  72%|███████▏  | 362/500 [07:47<03:12,  1.40s/it]

neutral --- entailment


Processing samples:  73%|███████▎  | 363/500 [07:48<02:54,  1.27s/it]

contradiction --- contradiction


Processing samples:  73%|███████▎  | 364/500 [07:49<02:51,  1.26s/it]

entailment --- entailment


Processing samples:  73%|███████▎  | 365/500 [07:51<02:41,  1.20s/it]

neutral --- neutral


Processing samples:  73%|███████▎  | 366/500 [07:51<02:29,  1.11s/it]

contradiction --- contradiction


Processing samples:  73%|███████▎  | 367/500 [07:52<02:19,  1.05s/it]

contradiction --- neutral


Processing samples:  74%|███████▎  | 368/500 [07:53<02:19,  1.06s/it]

contradiction --- contradiction


Processing samples:  74%|███████▍  | 369/500 [07:55<02:20,  1.07s/it]

neutral --- entailment


Processing samples:  74%|███████▍  | 370/500 [07:56<02:20,  1.08s/it]

entailment --- entailment


Processing samples:  74%|███████▍  | 371/500 [07:57<02:20,  1.09s/it]

entailment --- neutral


Processing samples:  74%|███████▍  | 372/500 [07:58<02:28,  1.16s/it]

contradiction --- contradiction


Processing samples:  75%|███████▍  | 373/500 [07:59<02:33,  1.21s/it]

neutral --- entailment


Processing samples:  75%|███████▍  | 374/500 [08:01<02:33,  1.22s/it]

neutral --- neutral


Processing samples:  75%|███████▌  | 375/500 [08:02<02:34,  1.23s/it]

entailment --- entailment


Processing samples:  75%|███████▌  | 376/500 [08:03<02:27,  1.19s/it]

contradiction --- entailment


Processing samples:  75%|███████▌  | 377/500 [08:04<02:25,  1.18s/it]

neutral --- neutral


Processing samples:  76%|███████▌  | 378/500 [08:05<02:13,  1.10s/it]

entailment --- entailment


Processing samples:  76%|███████▌  | 379/500 [08:06<02:19,  1.15s/it]

contradiction --- entailment


Processing samples:  76%|███████▌  | 380/500 [08:08<02:26,  1.22s/it]

contradiction --- neutral


Processing samples:  76%|███████▌  | 381/500 [08:09<02:28,  1.25s/it]

neutral --- entailment


Processing samples:  76%|███████▋  | 382/500 [08:10<02:25,  1.24s/it]

entailment --- neutral


Processing samples:  77%|███████▋  | 383/500 [08:12<02:34,  1.32s/it]

entailment --- contradiction


Processing samples:  77%|███████▋  | 384/500 [08:13<02:26,  1.26s/it]

neutral --- neutral


Processing samples:  77%|███████▋  | 385/500 [08:14<02:23,  1.24s/it]

contradiction --- neutral


Processing samples:  77%|███████▋  | 386/500 [08:15<02:15,  1.19s/it]

contradiction --- entailment


Processing samples:  77%|███████▋  | 387/500 [08:16<02:11,  1.17s/it]

neutral --- contradiction


Processing samples:  78%|███████▊  | 388/500 [08:17<02:12,  1.19s/it]

entailment --- contradiction


Processing samples:  78%|███████▊  | 389/500 [08:19<02:09,  1.16s/it]

contradiction --- entailment


Processing samples:  78%|███████▊  | 390/500 [08:20<02:11,  1.19s/it]

neutral --- entailment


Processing samples:  78%|███████▊  | 391/500 [08:21<02:03,  1.13s/it]

entailment --- entailment


Processing samples:  78%|███████▊  | 392/500 [08:22<02:07,  1.18s/it]

entailment --- neutral


Processing samples:  79%|███████▊  | 393/500 [08:23<02:09,  1.21s/it]

contradiction --- neutral


Processing samples:  79%|███████▉  | 394/500 [08:25<02:04,  1.17s/it]

neutral --- neutral


Processing samples:  79%|███████▉  | 395/500 [08:26<02:02,  1.17s/it]

contradiction --- entailment


Processing samples:  79%|███████▉  | 396/500 [08:27<01:59,  1.14s/it]

neutral --- contradiction


Processing samples:  79%|███████▉  | 397/500 [08:28<01:56,  1.14s/it]

entailment --- entailment


Processing samples:  80%|███████▉  | 398/500 [08:29<01:54,  1.12s/it]

contradiction --- neutral


Processing samples:  80%|███████▉  | 399/500 [08:30<01:54,  1.13s/it]

neutral --- neutral


Processing samples:  80%|████████  | 400/500 [08:31<01:54,  1.14s/it]

entailment --- contradiction


Processing samples:  80%|████████  | 401/500 [08:32<01:51,  1.13s/it]

neutral --- entailment


Processing samples:  80%|████████  | 402/500 [08:33<01:48,  1.11s/it]

contradiction --- entailment


Processing samples:  81%|████████  | 403/500 [08:35<02:00,  1.24s/it]

entailment --- entailment


Processing samples:  81%|████████  | 404/500 [08:36<02:02,  1.27s/it]

entailment --- neutral


Processing samples:  81%|████████  | 405/500 [08:38<01:59,  1.26s/it]

contradiction --- entailment


Processing samples:  81%|████████  | 406/500 [08:39<01:57,  1.24s/it]

neutral --- neutral


Processing samples:  81%|████████▏ | 407/500 [08:40<01:51,  1.20s/it]

contradiction --- neutral


Processing samples:  82%|████████▏ | 408/500 [08:41<01:49,  1.19s/it]

neutral --- entailment


Processing samples:  82%|████████▏ | 409/500 [08:42<01:47,  1.19s/it]

entailment --- neutral


Processing samples:  82%|████████▏ | 410/500 [08:43<01:49,  1.21s/it]

contradiction --- entailment


Processing samples:  82%|████████▏ | 411/500 [08:45<01:52,  1.26s/it]

entailment --- entailment


Processing samples:  82%|████████▏ | 412/500 [08:46<01:53,  1.29s/it]

neutral --- entailment


Processing samples:  83%|████████▎ | 413/500 [08:48<01:59,  1.38s/it]

contradiction --- neutral


Processing samples:  83%|████████▎ | 414/500 [08:49<01:55,  1.34s/it]

entailment --- entailment


Processing samples:  83%|████████▎ | 415/500 [08:50<01:50,  1.29s/it]

neutral --- neutral


Processing samples:  83%|████████▎ | 416/500 [08:51<01:47,  1.29s/it]

entailment --- neutral


Processing samples:  83%|████████▎ | 417/500 [08:52<01:39,  1.19s/it]

contradiction --- entailment


Processing samples:  84%|████████▎ | 418/500 [08:54<01:34,  1.15s/it]

neutral --- neutral


Processing samples:  84%|████████▍ | 419/500 [08:55<01:33,  1.16s/it]

entailment --- contradiction


Processing samples:  84%|████████▍ | 420/500 [08:56<01:34,  1.18s/it]

contradiction --- entailment


Processing samples:  84%|████████▍ | 421/500 [08:57<01:35,  1.21s/it]

neutral --- neutral


Processing samples:  84%|████████▍ | 422/500 [08:59<01:54,  1.46s/it]

neutral --- entailment


Processing samples:  85%|████████▍ | 423/500 [09:01<02:01,  1.57s/it]

contradiction --- neutral


Processing samples:  85%|████████▍ | 424/500 [09:02<01:53,  1.49s/it]

entailment --- entailment


Processing samples:  85%|████████▌ | 425/500 [09:04<01:43,  1.39s/it]

contradiction --- entailment


Processing samples:  85%|████████▌ | 426/500 [09:05<01:35,  1.29s/it]

entailment --- contradiction


Processing samples:  85%|████████▌ | 427/500 [09:06<01:29,  1.22s/it]

entailment --- entailment


Processing samples:  86%|████████▌ | 428/500 [09:07<01:25,  1.18s/it]

contradiction --- neutral


Processing samples:  86%|████████▌ | 429/500 [09:08<01:23,  1.18s/it]

entailment --- neutral


Processing samples:  86%|████████▌ | 430/500 [09:09<01:21,  1.16s/it]

contradiction --- neutral


Processing samples:  86%|████████▌ | 431/500 [09:10<01:19,  1.16s/it]

neutral --- entailment


Processing samples:  86%|████████▋ | 432/500 [09:12<01:28,  1.30s/it]

entailment --- entailment


Processing samples:  87%|████████▋ | 433/500 [09:13<01:26,  1.28s/it]

neutral --- neutral


Processing samples:  87%|████████▋ | 434/500 [09:14<01:23,  1.27s/it]

contradiction --- neutral


Processing samples:  87%|████████▋ | 435/500 [09:15<01:21,  1.25s/it]

entailment --- entailment


Processing samples:  87%|████████▋ | 436/500 [09:17<01:16,  1.20s/it]

contradiction --- entailment


Processing samples:  87%|████████▋ | 437/500 [09:18<01:15,  1.19s/it]

neutral --- entailment


Processing samples:  88%|████████▊ | 438/500 [09:19<01:15,  1.21s/it]

neutral --- entailment


Processing samples:  88%|████████▊ | 439/500 [09:20<01:14,  1.22s/it]

contradiction --- entailment


Processing samples:  88%|████████▊ | 440/500 [09:21<01:10,  1.18s/it]

entailment --- entailment


Processing samples:  88%|████████▊ | 441/500 [09:23<01:13,  1.24s/it]

contradiction --- neutral


Processing samples:  88%|████████▊ | 442/500 [09:24<01:16,  1.31s/it]

neutral --- entailment


Processing samples:  89%|████████▊ | 443/500 [09:25<01:11,  1.26s/it]

entailment --- neutral


Processing samples:  89%|████████▉ | 444/500 [09:27<01:11,  1.27s/it]

entailment --- neutral


Processing samples:  89%|████████▉ | 445/500 [09:28<01:10,  1.28s/it]

contradiction --- neutral


Processing samples:  89%|████████▉ | 446/500 [09:29<01:10,  1.30s/it]

contradiction --- entailment


Processing samples:  89%|████████▉ | 447/500 [09:31<01:08,  1.30s/it]

entailment --- entailment


Processing samples:  90%|████████▉ | 448/500 [09:32<01:07,  1.29s/it]

neutral --- entailment


Processing samples:  90%|████████▉ | 449/500 [09:33<01:05,  1.28s/it]

contradiction --- entailment


Processing samples:  90%|█████████ | 450/500 [09:34<01:04,  1.29s/it]

entailment --- contradiction


Processing samples:  90%|█████████ | 451/500 [09:36<01:05,  1.34s/it]

neutral --- neutral


Processing samples:  90%|█████████ | 452/500 [09:37<01:01,  1.28s/it]

entailment --- entailment


Processing samples:  91%|█████████ | 453/500 [09:38<01:02,  1.33s/it]

contradiction --- entailment


Processing samples:  91%|█████████ | 454/500 [09:40<01:02,  1.36s/it]

neutral --- entailment


Processing samples:  91%|█████████ | 455/500 [09:41<00:54,  1.21s/it]

entailment --- entailment


Processing samples:  91%|█████████ | 456/500 [09:42<00:54,  1.23s/it]

entailment --- entailment


Processing samples:  91%|█████████▏| 457/500 [09:43<00:53,  1.25s/it]

contradiction --- entailment


Processing samples:  92%|█████████▏| 458/500 [09:44<00:48,  1.17s/it]

contradiction --- entailment


Processing samples:  92%|█████████▏| 459/500 [09:46<00:50,  1.23s/it]

neutral --- entailment


Processing samples:  92%|█████████▏| 460/500 [09:47<00:46,  1.16s/it]

entailment --- entailment


Processing samples:  92%|█████████▏| 461/500 [09:48<00:48,  1.24s/it]

neutral --- contradiction


Processing samples:  92%|█████████▏| 462/500 [09:49<00:48,  1.27s/it]

entailment --- entailment


Processing samples:  93%|█████████▎| 463/500 [09:50<00:43,  1.18s/it]

contradiction --- entailment


Processing samples:  93%|█████████▎| 464/500 [09:51<00:41,  1.14s/it]

contradiction --- entailment


Processing samples:  93%|█████████▎| 465/500 [09:52<00:38,  1.10s/it]

entailment --- entailment


Processing samples:  93%|█████████▎| 466/500 [09:54<00:38,  1.13s/it]

neutral --- neutral


Processing samples:  93%|█████████▎| 467/500 [09:55<00:36,  1.11s/it]

neutral --- contradiction


Processing samples:  94%|█████████▎| 468/500 [09:56<00:36,  1.13s/it]

contradiction --- entailment


Processing samples:  94%|█████████▍| 469/500 [09:57<00:35,  1.15s/it]

entailment --- entailment


Processing samples:  94%|█████████▍| 470/500 [09:58<00:34,  1.15s/it]

entailment --- neutral


Processing samples:  94%|█████████▍| 471/500 [10:00<00:37,  1.29s/it]

contradiction --- entailment


Processing samples:  94%|█████████▍| 472/500 [10:01<00:37,  1.34s/it]

entailment --- neutral


Processing samples:  95%|█████████▍| 473/500 [10:03<00:35,  1.32s/it]

contradiction --- contradiction


Processing samples:  95%|█████████▍| 474/500 [10:04<00:32,  1.27s/it]

entailment --- entailment


Processing samples:  95%|█████████▌| 475/500 [10:05<00:31,  1.26s/it]

neutral --- entailment


Processing samples:  95%|█████████▌| 476/500 [10:06<00:29,  1.25s/it]

entailment --- entailment


Processing samples:  95%|█████████▌| 477/500 [10:07<00:27,  1.19s/it]

neutral --- entailment


Processing samples:  96%|█████████▌| 478/500 [10:08<00:26,  1.18s/it]

contradiction --- entailment


Processing samples:  96%|█████████▌| 479/500 [10:10<00:24,  1.16s/it]

contradiction --- neutral


Processing samples:  96%|█████████▌| 480/500 [10:11<00:23,  1.15s/it]

entailment --- contradiction


Processing samples:  96%|█████████▌| 481/500 [10:12<00:23,  1.24s/it]

entailment --- entailment


Processing samples:  96%|█████████▋| 482/500 [10:13<00:22,  1.28s/it]

contradiction --- entailment


Processing samples:  97%|█████████▋| 483/500 [10:14<00:20,  1.19s/it]

neutral --- neutral


Processing samples:  97%|█████████▋| 484/500 [10:15<00:18,  1.14s/it]

entailment --- neutral


Processing samples:  97%|█████████▋| 485/500 [10:17<00:17,  1.18s/it]

neutral --- entailment


Processing samples:  97%|█████████▋| 486/500 [10:18<00:16,  1.18s/it]

entailment --- entailment


Processing samples:  97%|█████████▋| 487/500 [10:19<00:15,  1.18s/it]

contradiction --- contradiction


Processing samples:  98%|█████████▊| 488/500 [10:20<00:13,  1.14s/it]

entailment --- entailment


Processing samples:  98%|█████████▊| 489/500 [10:21<00:12,  1.12s/it]

contradiction --- entailment


Processing samples:  98%|█████████▊| 490/500 [10:23<00:11,  1.17s/it]

neutral --- contradiction


Processing samples:  98%|█████████▊| 491/500 [10:24<00:11,  1.25s/it]

contradiction --- contradiction


Processing samples:  98%|█████████▊| 492/500 [10:25<00:10,  1.31s/it]

neutral --- entailment


Processing samples:  99%|█████████▊| 493/500 [10:27<00:09,  1.31s/it]

entailment --- entailment


Processing samples:  99%|█████████▉| 494/500 [10:28<00:07,  1.28s/it]

entailment --- entailment


Processing samples:  99%|█████████▉| 495/500 [10:29<00:06,  1.27s/it]

neutral --- contradiction


Processing samples:  99%|█████████▉| 496/500 [10:30<00:05,  1.26s/it]

contradiction --- contradiction


Processing samples:  99%|█████████▉| 497/500 [10:32<00:03,  1.25s/it]

contradiction --- entailment


Processing samples: 100%|█████████▉| 498/500 [10:33<00:02,  1.23s/it]

entailment --- entailment


Processing samples: 100%|█████████▉| 499/500 [10:34<00:01,  1.23s/it]

entailment --- entailment


Processing samples: 100%|██████████| 500/500 [10:35<00:00,  1.27s/it]


In [None]:
df_c = pd.DataFrame(results_c)
df_ic = pd.DataFrame(results_ic)

In [None]:
# Save results
output_file_c  = "llama3_nli_analysis_500_shap_correct.csv"
output_file_ic = "llama3_nli_analysis_500_shap_incorrect.csv"

df_c.to_csv(output_file_c, index=False)
df_ic.to_csv(output_file_ic, index=False)

In [None]:
df_c.shape

(179, 6)