In [19]:
import polars as pl

df = pl.read_csv("fever_test.adversarial.csv")

In [2]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

MODELS = {}
TOKENIZERS = {}

model_name_base = "MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli"
model_name_large = "MoritzLaurer/DeBERTa-v3-large-mnli-fever-anli-ling-wanli"
model_name_large_2 = "Joelzhang/deberta-v3-large-snli_mnli_fever_anli_R1_R2_R3-nli"
MAP = {
    'base' : model_name_base,
    'large1': model_name_large,
    'large2': model_name_large_2,
}

for model_name in MAP.values():
    print(f"> loading {model_name}")
    TOKENIZERS[model_name] = AutoTokenizer.from_pretrained(model_name)
    MODELS[model_name] = AutoModelForSequenceClassification.from_pretrained(model_name).to(device)


def inference(model_name, premise, hypothesis):
    model_input = TOKENIZERS[model_name](premise, hypothesis, truncation=False, return_tensors="pt")
    output = MODELS[model_name](model_input["input_ids"].to(device))  # device = "cuda:0" or "cpu"
    prediction = torch.softmax(output["logits"][0], -1).tolist()
    label_names = ["ENTAILMENT", "NEUTRAL", "CONTRADICTION"]
    return {name: round(float(pred) * 100, 1) for pred, name in zip(prediction, label_names)}

> loading MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli


tokenizer_config.json:   0%|          | 0.00/1.28k [00:00<?, ?B/s]

spm.model:   0%|          | 0.00/2.46M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/8.66M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/23.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/286 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.09k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/369M [00:00<?, ?B/s]

> loading MoritzLaurer/DeBERTa-v3-large-mnli-fever-anli-ling-wanli


model.safetensors:   0%|          | 0.00/870M [00:00<?, ?B/s]

> loading Joelzhang/deberta-v3-large-snli_mnli_fever_anli_R1_R2_R3-nli


tokenizer_config.json:   0%|          | 0.00/310 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/1.06k [00:00<?, ?B/s]

spm.model:   0%|          | 0.00/2.46M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/18.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]



pytorch_model.bin:   0%|          | 0.00/1.74G [00:00<?, ?B/s]

In [21]:
import os

for model_id, model_name in MAP.items():
    score_file = f"{model_id}-model_scores.log"
    if os.path.isfile(score_file):
        os.remove(score_file)
    print(f"> using model {model_id} :: {model_name}")
    for i, elem in enumerate(df.iter_rows(named=True)):
        prediction = inference(model_name, elem['PREMISE'], elem['NEW HYPOTHESIS'])
        predicted = max(prediction, key=prediction.get)
        score = int(predicted != elem['LABEL'])
        print(f"{elem['ID']}. label: {elem['LABEL']} - predicted: {predicted} >>> {score}")
        with open(score_file, 'a+') as fout:
            print(f"{score}",file=fout)

> using model base :: MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli
0. label: NEUTRAL - predicted: ENTAILMENT >>> 1
1. label: ENTAILMENT - predicted: NEUTRAL >>> 1
2. label: CONTRADICTION - predicted: ENTAILMENT >>> 1
3. label: CONTRADICTION - predicted: ENTAILMENT >>> 1
4. label: CONTRADICTION - predicted: ENTAILMENT >>> 1
5. label: CONTRADICTION - predicted: ENTAILMENT >>> 1
6. label: ENTAILMENT - predicted: CONTRADICTION >>> 1
7. label: CONTRADICTION - predicted: ENTAILMENT >>> 1
8. label: CONTRADICTION - predicted: ENTAILMENT >>> 1
9. label: NEUTRAL - predicted: NEUTRAL >>> 0
10. label: NEUTRAL - predicted: NEUTRAL >>> 0
11. label: NEUTRAL - predicted: NEUTRAL >>> 0
12. label: CONTRADICTION - predicted: ENTAILMENT >>> 1
13. label: CONTRADICTION - predicted: NEUTRAL >>> 1
14. label: NEUTRAL - predicted: ENTAILMENT >>> 1
15. label: ENTAILMENT - predicted: CONTRADICTION >>> 1
16. label: CONTRADICTION - predicted: ENTAILMENT >>> 1
17. label: ENTAILMENT - predicted: CONTRADICTION >>> 1
1